├── .github
    └── workflows
    │   └── testing.yml
├── .gitignore
├── .readthedocs.yaml
├── .travis.yml
├── LICENSE
├── README.rst
├── TODO.md
├── data
    ├── 38_thyroid.npz
    └── omi-1
    │   ├── omi-1_test.csv
    │   └── omi-1_train.csv
├── deepod
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── base_model.py
    │   └── networks
    │   │   ├── __init__.py
    │   │   ├── base_networks.py
    │   │   ├── network_utility.py
    │   │   ├── ts_network_dilated_conv.py
    │   │   ├── ts_network_tcn.py
    │   │   └── ts_network_transformer.py
    ├── metrics
    │   ├── __init__.py
    │   ├── _anomaly_detection.py
    │   ├── _tsad_adjustment.py
    │   ├── affiliation
    │   │   ├── __init__.py
    │   │   ├── _affiliation_zone.py
    │   │   ├── _integral_interval.py
    │   │   ├── _single_ground_truth_event.py
    │   │   ├── generics.py
    │   │   └── metrics.py
    │   └── vus
    │   │   ├── __init__.py
    │   │   ├── metrics.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       └── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── tabular
    │   │   ├── __init__.py
    │   │   ├── devnet.py
    │   │   ├── dif.py
    │   │   ├── dsad.py
    │   │   ├── dsvdd.py
    │   │   ├── feawad.py
    │   │   ├── goad.py
    │   │   ├── icl.py
    │   │   ├── neutral.py
    │   │   ├── prenet.py
    │   │   ├── rca.py
    │   │   ├── rdp.py
    │   │   ├── repen.py
    │   │   ├── rosas.py
    │   │   └── slad.py
    │   └── time_series
    │   │   ├── __init__.py
    │   │   ├── anomalytransformer.py
    │   │   ├── couta.py
    │   │   ├── dcdetector.py
    │   │   ├── devnet.py
    │   │   ├── dif.py
    │   │   ├── dsad.py
    │   │   ├── dsvdd.py
    │   │   ├── ncad.py
    │   │   ├── prenet.py
    │   │   ├── tcned.py
    │   │   ├── timesnet.py
    │   │   ├── tranad.py
    │   │   └── usad.py
    ├── test
    │   ├── __init__.py
    │   ├── test_anomalyTransformer.py
    │   ├── test_couta.py
    │   ├── test_dcdetector.py
    │   ├── test_devnet.py
    │   ├── test_dif.py
    │   ├── test_dsad.py
    │   ├── test_dsvdd.py
    │   ├── test_feawad.py
    │   ├── test_goad.py
    │   ├── test_icl.py
    │   ├── test_ncad.py
    │   ├── test_neutral.py
    │   ├── test_prenet.py
    │   ├── test_rca.py
    │   ├── test_rdp.py
    │   ├── test_repen.py
    │   ├── test_rosas.py
    │   ├── test_slad.py
    │   ├── test_tcned.py
    │   ├── test_timesnet.py
    │   ├── test_tranad.py
    │   └── test_usad.py
    ├── utils
    │   ├── __init__.py
    │   ├── data.py
    │   └── utility.py
    └── version.py
├── docs
    ├── Gemfile
    ├── Makefile
    ├── _templates
    │   └── class.rst
    ├── additional.contributing.rst
    ├── additional.license.rst
    ├── additional.star_history.rst
    ├── api_cc.rst
    ├── api_reference.base_networks.rst
    ├── api_reference.metrics.rst
    ├── api_reference.rst
    ├── api_reference.tabular.rst
    ├── api_reference.time_series.rst
    ├── conf.py
    ├── index.rst
    ├── requirements.txt
    ├── start.examples.rst
    ├── start.install.rst
    ├── start.model_save.rst
    └── zreferences.bib
├── environment.yml
├── examples
    └── __init__.py
├── requirements.txt
├── requirements_ci.yml
├── setup.py
└── testbed
    ├── configs.yaml
    ├── testbed_unsupervised_ad.py
    ├── testbed_unsupervised_tsad.py
    └── utils.py


/.github/workflows/testing.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Python Package using pip
10 | 
11 | on:
12 |   push:
13 |     branches:
14 |       - main
15 |       - dev
16 |   pull_request:
17 |     branches:
18 |       - main
19 |       - dev
20 | 
21 | permissions:
22 |   contents: read
23 | 
24 | jobs:
25 |   deploy:
26 | 
27 |     runs-on: ubuntu-latest
28 | 
29 |     strategy:
30 |       fail-fast: false
31 |       matrix:
32 |         os: [ubuntu-latest, windows-latest, macos-latest]
33 |         python-version: ["3.7", "3.8", "3.9", "3.10"]
34 | 
35 |     steps:
36 |     - uses: actions/checkout@v3
37 |     - name: Python ${{ matrix.python-version }}
38 |       uses: actions/setup-python@v3
39 |       with:
40 |         python-version: ${{ matrix.python-version }}
41 |     - name: Install dependencies
42 |       run: |
43 |         python -m pip install --upgrade pip
44 |         pip install -r requirements.txt
45 |         pip install pytest
46 |         pip install coverage
47 |         pip install build
48 |         pip install coveralls
49 | 
50 |     - name: Test with pytest
51 |       run: |
52 |         coverage run --source=deepod -m pytest
53 | 
54 |     - name: coverage report
55 |       env:
56 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57 |       run: |
58 |         coveralls --service=github
59 | 
60 | #    - name: Build package
61 | #      run: python -m build
62 | 
63 | #     - name: Publish package
64 | #       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
65 | #       with:
66 | #         user: __token__
67 | #         password: ${{ secrets.PYPI_API_TOKEN }}
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | deepod.egg-info/
 2 | dist/
 3 | build/
 4 | .idea
 5 | **/__pycache__
 6 | docs_output
 7 | docs/generated
 8 | .vscode
 9 | sphinx-build
10 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file for Sphinx projects
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version and other tools you might need
 8 | build:
 9 |   os: ubuntu-22.04
10 |   tools:
11 |     python: "3.8"
12 |     # You can also specify other tool versions:
13 |     # nodejs: "20"
14 |     # rust: "1.70"
15 |     # golang: "1.20"
16 | 
17 | # Build documentation in the "docs/" directory with Sphinx
18 | sphinx:
19 |   configuration: docs/conf.py
20 |   # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
21 |   # builder: "dirhtml"
22 |   # Fail on all warnings to avoid broken references
23 |   # fail_on_warning: true
24 | 
25 | # Optionally build your docs in additional formats such as PDF and ePub
26 | # formats:
27 | #   - pdf
28 | #   - epub
29 | 
30 | # Optional but recommended, declare the Python requirements required
31 | # to build your documentation
32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
33 | python:
34 |  install:
35 |    - requirements: docs/requirements.txt


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: xenial
 3 | 
 4 | python:
 5 | #  - "3.6"
 6 | #  - "3.7"
 7 |   - "3.8"
 8 |   - "3.9"
 9 | 
10 | install:
11 |   - pip install --upgrade pip
12 |   - pip list
13 |   - pip install importlib-metadata>=4.0.0
14 |   - pip install setuptools>=49.6.0
15 |   - pip install -r requirements.txt
16 |   - pip install -r requirements_ci.yml
17 | 
18 | # command to run tests
19 | script:
20 |    pytest --cov=deepod/
21 | 
22 | after_success:
23 |   - coveralls
24 | 
25 | notifications:
26 |   email:
27 |     recipients:
28 |       - hongzuoxu@126.com
29 |   on_success: never # default: change
30 |   on_failure: always # default: always
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2023, Hongzuo Xu
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | in the skeleton of base deep ad models:
 4 | - add early stopping mechanism
 5 | - add adversarial training?
 6 |   
 7 | models
 8 | - add AE as a baseline
 9 | - add GAN-based method as a baseline
10 | - add one-class-based methods, e.g., DROCC, HRN, ..
11 |   
12 | model selection
13 | - add the pretrained method for model selection
14 | - add more model selection methods


--------------------------------------------------------------------------------
/data/38_thyroid.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/data/38_thyroid.npz


--------------------------------------------------------------------------------
/deepod/__init__.py:
--------------------------------------------------------------------------------
1 | from deepod.version import __version__
2 | from . import core, models, metrics
3 | 
4 | __all__ = ['__version__', 'core', 'models', 'metrics']


--------------------------------------------------------------------------------
/deepod/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`pysad.core` module covers base classes of the `PySAD`.
 3 | """
 4 | from .base_model import BaseDeepAD
 5 | 
 6 | 
 7 | __all__ = ["BaseDeepAD"]
 8 | 
 9 | # print("""
10 | # ╭━━━╮          ╭━━━╮ ╭━━━ ╮
11 | # ╰╮╭╮┃          ┃╭━╮┃ ╰╮╭ ╮┃
12 | #  ┃┃┃┣━━┳━━┳━━╮ ┃┃ ┃┃  ┃┃ ┃┃
13 | #  ┃┃┃┃┃━┫┃━┫╭╮┃ ┃┃ ┃┃  ┃┃ ┃┃
14 | # ╭╯╰╯┃┃━┫┃━┫╰╯┃ ┃╰━╯┃ ╭╯╯ ╰┃
15 | # ╰━━━┻━━┻━━┫╭━╯ ╰━━━╯ ╰━━━━╯
16 | #           ┃┃
17 | #           ╰╯
18 | # """)


--------------------------------------------------------------------------------
/deepod/core/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_networks import MLPnet
 2 | from .base_networks import MlpAE
 3 | from .base_networks import GRUNet
 4 | from .base_networks import LSTMNet
 5 | from .base_networks import ConvSeqEncoder
 6 | from .base_networks import ConvNet
 7 | from .ts_network_transformer import TSTransformerEncoder
 8 | from .ts_network_tcn import TCNnet
 9 | from .ts_network_tcn import TcnAE
10 | 
11 | __all__ = ['MLPnet', 'MlpAE', 'GRUNet', 'LSTMNet', 'ConvSeqEncoder',
12 |            'ConvNet', 'TSTransformerEncoder', 'TCNnet', 'TcnAE']


--------------------------------------------------------------------------------
/deepod/core/networks/network_utility.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import warnings
 3 | import importlib
 4 | 
 5 | 
 6 | def _instantiate_class(module_name: str, class_name: str):
 7 |     module = importlib.import_module(module_name)
 8 |     class_ = getattr(module, class_name)
 9 |     return class_()
10 | 
11 | 
12 | def _handle_n_hidden(n_hidden):
13 |     if type(n_hidden) == int:
14 |         n_layers = 1
15 |         hidden_dim = n_hidden
16 |     elif type(n_hidden) == str:
17 |         n_hidden = n_hidden.split(',')
18 |         n_hidden = [int(a) for a in n_hidden]
19 |         n_layers = len(n_hidden)
20 |         hidden_dim = int(n_hidden[0])
21 | 
22 |         if np.std(n_hidden) != 0:
23 |             warnings.warn('use the first hidden num, '
24 |                           'the rest hidden numbers are deprecated', UserWarning)
25 |     else:
26 |         raise TypeError('n_hidden should be a string or a int.')
27 | 
28 |     return hidden_dim, n_layers
29 | 


--------------------------------------------------------------------------------
/deepod/core/networks/ts_network_dilated_conv.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from deepod.core.networks.network_utility import _handle_n_hidden
 3 | from torch.nn import functional as F
 4 | 
 5 | 
 6 | class SamePadConv(torch.nn.Module):
 7 |     def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1):
 8 |         super().__init__()
 9 |         self.receptive_field = (kernel_size - 1) * dilation + 1
10 |         padding = self.receptive_field // 2
11 |         self.conv = torch.nn.Conv1d(
12 |             in_channels, out_channels, kernel_size,
13 |             padding=padding,
14 |             dilation=dilation,
15 |             groups=groups
16 |         )
17 |         self.remove = 1 if self.receptive_field % 2 == 0 else 0
18 | 
19 |     def forward(self, x):
20 |         out = self.conv(x)
21 |         if self.remove > 0:
22 |             out = out[:, :, : -self.remove]
23 |         return out
24 | 
25 | 
26 | class ConvBlock(torch.nn.Module):
27 |     def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False):
28 |         super().__init__()
29 |         self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation)
30 |         self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation)
31 |         self.projector = torch.nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None
32 | 
33 |     def forward(self, x):
34 |         residual = x if self.projector is None else self.projector(x)
35 |         x = F.gelu(x)
36 |         x = self.conv1(x)
37 |         x = F.gelu(x)
38 |         x = self.conv2(x)
39 |         return x + residual
40 | 
41 | 
42 | class DilatedConvEncoder(torch.nn.Module):
43 |     def __init__(self, n_features, n_hidden='20', n_output=20,
44 |                  bias=False,
45 |                  kernel_size=3):
46 |         super().__init__()
47 | 
48 |         hidden_dim, n_layers = _handle_n_hidden(n_hidden)
49 | 
50 |         self.input_fc = torch.nn.Linear(n_features, hidden_dim, bias=bias)
51 |         channels = [hidden_dim] * n_layers + [n_output]
52 |         self.net = torch.nn.Sequential(*[
53 |             ConvBlock(
54 |                 channels[i - 1] if i > 0 else hidden_dim,
55 |                 channels[i],
56 |                 kernel_size=kernel_size,
57 |                 dilation=2 ** i,
58 |                 final=(i == len(channels) - 1),
59 |             )
60 |             for i in range(len(channels))
61 |         ])
62 | 
63 |     def forward(self, x):
64 |         x = self.input_fc(x)
65 |         x = x.transpose(1, 2)
66 |         x = self.net(x)
67 |         x = x.transpose(1, 2)
68 |         x = F.max_pool1d(
69 |             x.transpose(1, 2),
70 |             kernel_size=x.size(1)
71 |         ).transpose(1, 2).squeeze(1)
72 |         return x
73 | 


--------------------------------------------------------------------------------
/deepod/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from deepod.metrics._anomaly_detection import auc_roc
 2 | from deepod.metrics._anomaly_detection import auc_pr
 3 | from deepod.metrics._anomaly_detection import tabular_metrics
 4 | from deepod.metrics._anomaly_detection import ts_metrics
 5 | from deepod.metrics._tsad_adjustment import point_adjustment
 6 | from deepod.metrics._anomaly_detection import ts_metrics_enhanced
 7 | 
 8 | 
 9 | __all__ = [
10 |     'auc_pr',
11 |     'auc_roc',
12 |     'tabular_metrics',
13 |     'ts_metrics',
14 |     'point_adjustment',
15 |     'ts_metrics_enhanced'
16 | ]


--------------------------------------------------------------------------------
/deepod/metrics/_tsad_adjustment.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def point_adjustment(y_true, y_score):
 5 |     """
 6 |     adjust the score for segment detection. i.e., for each ground-truth anomaly segment,
 7 |     use the maximum score as the score of all points in that segment. This corresponds to point-adjust f1-score.
 8 |     *This function is copied/modified from the source code in [Zhihan Li et al. KDD21]* 
 9 | 
10 |     Args:
11 |     
12 |         y_true (np.array, required): 
13 |             Data label, 0 indicates normal timestamp, and 1 is anomaly.
14 |             
15 |         y_score (np.array, required): 
16 |             Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly.
17 | 
18 |     Returns:
19 |     
20 |         np.array: 
21 |             Adjusted anomaly scores.
22 | 
23 |     """
24 |     score = y_score.copy()
25 |     assert len(score) == len(y_true)
26 |     splits = np.where(y_true[1:] != y_true[:-1])[0] + 1
27 |     is_anomaly = y_true[0] == 1
28 |     pos = 0
29 |     for sp in splits:
30 |         if is_anomaly:
31 |             score[pos:sp] = np.max(score[pos:sp])
32 |         is_anomaly = not is_anomaly
33 |         pos = sp
34 |     sp = len(y_true)
35 |     if is_anomaly:
36 |         score[pos:sp] = np.max(score[pos:sp])
37 |     return score
38 | 


--------------------------------------------------------------------------------
/deepod/metrics/affiliation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/affiliation/__init__.py


--------------------------------------------------------------------------------
/deepod/metrics/affiliation/_affiliation_zone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | from ._integral_interval import interval_intersection
 4 | 
 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
 6 |     """
 7 |     Helper for `E_gt_func`
 8 |     
 9 |     :param j: index from 0 to len(Js) (included) on which to get the start
10 |     :param Js: ground truth events, as a list of couples
11 |     :param Trange: range of the series where Js is included
12 |     :return: generalized start such that the middle of t_start and t_stop 
13 |     always gives the affiliation zone
14 |     """
15 |     b = max(Trange)
16 |     n = len(Js)
17 |     if j == n:
18 |         return(2*b - t_stop(n-1, Js, Trange))
19 |     else:
20 |         return(Js[j][0])
21 | 
22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
23 |     """
24 |     Helper for `E_gt_func`
25 |     
26 |     :param j: index from 0 to len(Js) (included) on which to get the stop
27 |     :param Js: ground truth events, as a list of couples
28 |     :param Trange: range of the series where Js is included
29 |     :return: generalized stop such that the middle of t_start and t_stop 
30 |     always gives the affiliation zone
31 |     """
32 |     if j == -1:
33 |         a = min(Trange)
34 |         return(2*a - t_start(0, Js, Trange))
35 |     else:
36 |         return(Js[j][1])
37 | 
38 | def E_gt_func(j, Js, Trange):
39 |     """
40 |     Get the affiliation zone of element j of the ground truth
41 |     
42 |     :param j: index from 0 to len(Js) (excluded) on which to get the zone
43 |     :param Js: ground truth events, as a list of couples
44 |     :param Trange: range of the series where Js is included, can 
45 |     be (-math.inf, math.inf) for distance measures
46 |     :return: affiliation zone of element j of the ground truth represented
47 |     as a couple
48 |     """
49 |     range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
50 |     range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
51 |     return((range_left, range_right))
52 | 
53 | def get_all_E_gt_func(Js, Trange):
54 |     """
55 |     Get the affiliation partition from the ground truth point of view
56 |     
57 |     :param Js: ground truth events, as a list of couples
58 |     :param Trange: range of the series where Js is included, can 
59 |     be (-math.inf, math.inf) for distance measures
60 |     :return: affiliation partition of the events
61 |     """
62 |     # E_gt is the limit of affiliation/attraction for each ground truth event
63 |     E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
64 |     return(E_gt)
65 | 
66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
67 |     """
68 |     Cut the events into the affiliation zones
69 |     The presentation given here is from the ground truth point of view,
70 |     but it is also used in the reversed direction in the main function.
71 |     
72 |     :param Is: events as a list of couples
73 |     :param E_gt: range of the affiliation zones
74 |     :return: a list of list of intervals (each interval represented by either 
75 |     a couple or None for empty interval). The outer list is indexed by each
76 |     affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
77 |     """
78 |     out = [None] * len(E_gt)
79 |     for j in range(len(E_gt)):
80 |         E_gt_j = E_gt[j]
81 |         discarded_idx_before = [I[1] < E_gt_j[0] for I in Is]  # end point of predicted I is before the begin of E
82 |         discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
83 |         kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
84 |         Is_j = [x for x, y in zip(Is, kept_index)]
85 |         out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
86 |     return(out)
87 | 


--------------------------------------------------------------------------------
/deepod/metrics/affiliation/_single_ground_truth_event.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import math
 4 | from ._affiliation_zone import (
 5 |         get_all_E_gt_func, 
 6 |         affiliation_partition)
 7 | from ._integral_interval import (
 8 |         integral_interval_distance,
 9 |         integral_interval_probaCDF_precision, 
10 |         integral_interval_probaCDF_recall, 
11 |         interval_length,
12 |         sum_interval_lengths)
13 | 
14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
15 |     """
16 |     Compute the individual average distance from Is to a single ground truth J
17 |     
18 |     :param Is: list of predicted events within the affiliation zone of J
19 |     :param J: couple representating the start and stop of a ground truth interval
20 |     :return: individual average precision directed distance number
21 |     """
22 |     if all([I is None for I in Is]): # no prediction in the current area
23 |         return(math.nan) # undefined
24 |     return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
25 | 
26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
27 |     """
28 |     Compute the individual precision probability from Is to a single ground truth J
29 |     
30 |     :param Is: list of predicted events within the affiliation zone of J
31 |     :param J: couple representating the start and stop of a ground truth interval
32 |     :param E: couple representing the start and stop of the zone of affiliation of J
33 |     :return: individual precision probability in [0, 1], or math.nan if undefined
34 |     """
35 |     if all([I is None for I in Is]): # no prediction in the current area
36 |         return(math.nan) # undefined
37 |     return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
38 | 
39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
40 |     """
41 |     Compute the individual average distance from a single J to the predictions Is
42 |     
43 |     :param Is: list of predicted events within the affiliation zone of J
44 |     :param J: couple representating the start and stop of a ground truth interval
45 |     :return: individual average recall directed distance number
46 |     """
47 |     Is = [I for I in Is if I is not None] # filter possible None in Is
48 |     if len(Is) == 0: # there is no prediction in the current area
49 |         return(math.inf)
50 |     E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf))  # here from the point of view of the predictions
51 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
52 |     return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
53 | 
54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
55 |     """
56 |     Compute the individual recall probability from a single ground truth J to Is
57 |     
58 |     :param Is: list of predicted events within the affiliation zone of J
59 |     :param J: couple representating the start and stop of a ground truth interval
60 |     :param E: couple representing the start and stop of the zone of affiliation of J
61 |     :return: individual recall probability in [0, 1]
62 |     """
63 |     Is = [I for I in Is if I is not None] # filter possible None in Is
64 |     if len(Is) == 0: # there is no prediction in the current area
65 |         return(0)
66 |     E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
67 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
68 |     return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
69 | 


--------------------------------------------------------------------------------
/deepod/metrics/affiliation/generics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from itertools import groupby
  4 | from operator import itemgetter
  5 | import math
  6 | import gzip
  7 | import glob
  8 | import os
  9 | 
 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
 11 |     """
 12 |     Convert a binary vector (indicating 1 for the anomalous instances)
 13 |     to a list of events. The events are considered as durations,
 14 |     i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
 15 |     
 16 |     :param vector: a list of elements belonging to {0, 1}
 17 |     :return: a list of couples, each couple representing the start and stop of
 18 |     each event
 19 |     """
 20 |     positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
 21 |     events = []
 22 |     for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
 23 |         cur_cut = list(map(itemgetter(1), g))
 24 |         events.append((cur_cut[0], cur_cut[-1]))
 25 |     
 26 |     # Consistent conversion in case of range anomalies (for indexes):
 27 |     # A positive index i is considered as the interval [i, i+1),
 28 |     # so the last index should be moved by 1
 29 |     events = [(x, y+1) for (x,y) in events]
 30 |         
 31 |     return(events)
 32 | 
 33 | def infer_Trange(events_pred, events_gt):
 34 |     """
 35 |     Given the list of events events_pred and events_gt, get the
 36 |     smallest possible Trange corresponding to the start and stop indexes 
 37 |     of the whole series.
 38 |     Trange will not influence the measure of distances, but will impact the
 39 |     measures of probabilities.
 40 |     
 41 |     :param events_pred: a list of couples corresponding to predicted events
 42 |     :param events_gt: a list of couples corresponding to ground truth events
 43 |     :return: a couple corresponding to the smallest range containing the events
 44 |     """
 45 |     if len(events_gt) == 0:
 46 |         raise ValueError('The gt events should contain at least one event')
 47 |     if len(events_pred) == 0:
 48 |         # empty prediction, base Trange only on events_gt (which is non empty)
 49 |         return(infer_Trange(events_gt, events_gt))
 50 |         
 51 |     min_pred = min([x[0] for x in events_pred])
 52 |     min_gt = min([x[0] for x in events_gt])
 53 |     max_pred = max([x[1] for x in events_pred])
 54 |     max_gt = max([x[1] for x in events_gt])
 55 |     Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
 56 |     return(Trange)
 57 | 
 58 | def has_point_anomalies(events):
 59 |     """
 60 |     Checking whether events contain point anomalies, i.e.
 61 |     events starting and stopping at the same time.
 62 |     
 63 |     :param events: a list of couples corresponding to predicted events
 64 |     :return: True is the events have any point anomalies, False otherwise
 65 |     """
 66 |     if len(events) == 0:
 67 |         return(False)
 68 |     return(min([x[1] - x[0] for x in events]) == 0)
 69 | 
 70 | def _sum_wo_nan(vec):
 71 |     """
 72 |     Sum of elements, ignoring math.isnan ones
 73 |     
 74 |     :param vec: vector of floating numbers
 75 |     :return: sum of the elements, ignoring math.isnan ones
 76 |     """
 77 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 78 |     return(sum(vec_wo_nan))
 79 |     
 80 | def _len_wo_nan(vec):
 81 |     """
 82 |     Count of elements, ignoring math.isnan ones
 83 |     
 84 |     :param vec: vector of floating numbers
 85 |     :return: count of the elements, ignoring math.isnan ones
 86 |     """
 87 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 88 |     return(len(vec_wo_nan))
 89 | 
 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
 91 |     """
 92 |     Load a file compressed with gz, such that each line of the
 93 |     file is either 0 (representing a normal instance) or 1 (representing)
 94 |     an anomalous instance.
 95 |     :param filename: file path to the gz compressed file
 96 |     :return: list of integers with either 0 or 1
 97 |     """
 98 |     with gzip.open(filename, 'rb') as f:
 99 |         content = f.read().splitlines()
100 |     content = [int(x) for x in content]
101 |     return(content)
102 | 
103 | def read_all_as_events():
104 |     """
105 |     Load the files contained in the folder `data/` and convert
106 |     to events. The length of the series is kept.
107 |     The convention for the file name is: `dataset_algorithm.gz`
108 |     :return: two dictionaries:
109 |         - the first containing the list of events for each dataset and algorithm,
110 |         - the second containing the range of the series for each dataset
111 |     """
112 |     filepaths = glob.glob('data/*.gz')
113 |     datasets = dict()
114 |     Tranges = dict()
115 |     for filepath in filepaths:
116 |         vector = read_gz_data(filepath)
117 |         events = convert_vector_to_events(vector)
118 |         # ad hoc cut for those files
119 |         cut_filepath = (os.path.split(filepath)[1]).split('_')
120 |         data_name = cut_filepath[0]
121 |         algo_name = (cut_filepath[1]).split('.')[0]
122 |         if not data_name in datasets:
123 |             datasets[data_name] = dict()
124 |             Tranges[data_name] = (0, len(vector))
125 |         datasets[data_name][algo_name] = events
126 |     return(datasets, Tranges)
127 | 
128 | def f1_func(p, r):
129 |     """
130 |     Compute the f1 function
131 |     :param p: precision numeric value
132 |     :param r: recall numeric value
133 |     :return: f1 numeric value
134 |     """
135 |     return(2*p*r/(p+r))
136 | 


--------------------------------------------------------------------------------
/deepod/metrics/affiliation/metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from .generics import (
  4 |         infer_Trange,
  5 |         has_point_anomalies, 
  6 |         _len_wo_nan, 
  7 |         _sum_wo_nan,
  8 |         read_all_as_events)
  9 | from ._affiliation_zone import (
 10 |         get_all_E_gt_func, 
 11 |         affiliation_partition)
 12 | from ._single_ground_truth_event import (
 13 |         affiliation_precision_distance,
 14 |         affiliation_recall_distance,
 15 |         affiliation_precision_proba,
 16 |         affiliation_recall_proba)
 17 | 
 18 | def test_events(events):
 19 |     """
 20 |     Verify the validity of the input events
 21 |     :param events: list of events, each represented by a couple (start, stop)
 22 |     :return: None. Raise an error for incorrect formed or non ordered events
 23 |     """
 24 |     if type(events) is not list:
 25 |         raise TypeError('Input `events` should be a list of couples')
 26 |     if not all([type(x) is tuple for x in events]):
 27 |         raise TypeError('Input `events` should be a list of tuples')
 28 |     if not all([len(x) == 2 for x in events]):
 29 |         raise ValueError('Input `events` should be a list of couples (start, stop)')
 30 |     if not all([x[0] <= x[1] for x in events]):
 31 |         raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
 32 |     if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
 33 |         raise ValueError('Couples of input `events` should be disjoint and ordered')
 34 | 
 35 | def pr_from_events(events_pred, events_gt, Trange):
 36 |     """
 37 |     Compute the affiliation metrics including the precision/recall in [0,1],
 38 |     along with the individual precision/recall distances and probabilities
 39 |     
 40 |     :param events_pred: list of predicted events, each represented by a couple
 41 |     indicating the start and the stop of the event
 42 |     :param events_gt: list of ground truth events, each represented by a couple
 43 |     indicating the start and the stop of the event
 44 |     :param Trange: range of the series where events_pred and events_gt are included,
 45 |     represented as a couple (start, stop)
 46 |     :return: dictionary with precision, recall, and the individual metrics
 47 |     """
 48 |     # testing the inputs
 49 |     test_events(events_pred)
 50 |     test_events(events_gt)
 51 |     
 52 |     # other tests
 53 |     minimal_Trange = infer_Trange(events_pred, events_gt)
 54 |     if not Trange[0] <= minimal_Trange[0]:
 55 |         raise ValueError('`Trange` should include all the events')
 56 |     if not minimal_Trange[1] <= Trange[1]:
 57 |         raise ValueError('`Trange` should include all the events')
 58 |     
 59 |     if len(events_gt) == 0:
 60 |         raise ValueError('Input `events_gt` should have at least one event')
 61 | 
 62 |     if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
 63 |         raise ValueError('Cannot manage point anomalies currently')
 64 | 
 65 |     if Trange is None:
 66 |         # Set as default, but Trange should be indicated if probabilities are used
 67 |         raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
 68 | 
 69 |     E_gt = get_all_E_gt_func(events_gt, Trange)
 70 |     aff_partition = affiliation_partition(events_pred, E_gt)
 71 | 
 72 |     # Computing precision distance
 73 |     d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 74 |     
 75 |     # Computing recall distance
 76 |     d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 77 | 
 78 |     # Computing precision
 79 |     p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 80 | 
 81 |     # Computing recall
 82 |     p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 83 | 
 84 |     if _len_wo_nan(p_precision) > 0:
 85 |         p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
 86 |     else:
 87 |         p_precision_average = p_precision[0] # math.nan
 88 |     p_recall_average = sum(p_recall) / len(p_recall)
 89 | 
 90 |     dict_out = dict({'Affiliation_Precision': p_precision_average,
 91 |                      'Affiliation_Recall': p_recall_average,
 92 |                      'individual_precision_probabilities': p_precision,
 93 |                      'individual_recall_probabilities': p_recall,
 94 |                      'individual_precision_distances': d_precision,
 95 |                      'individual_recall_distances': d_recall})
 96 |     return(dict_out)
 97 | 
 98 | def produce_all_results():
 99 |     """
100 |     Produce the affiliation precision/recall for all files
101 |     contained in the `data` repository
102 |     :return: a dictionary indexed by data names, each containing a dictionary
103 |     indexed by algorithm names, each containing the results of the affiliation
104 |     metrics (precision, recall, individual probabilities and distances)
105 |     """
106 |     datasets, Tranges = read_all_as_events() # read all the events in folder `data`
107 |     results = dict()
108 |     for data_name in datasets.keys():
109 |         results_data = dict()
110 |         for algo_name in datasets[data_name].keys():
111 |             if algo_name != 'groundtruth':
112 |                 results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
113 |                                                          datasets[data_name]['groundtruth'],
114 |                                                          Tranges[data_name])
115 |         results[data_name] = results_data
116 |     return(results)
117 | 


--------------------------------------------------------------------------------
/deepod/metrics/vus/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/vus/__init__.py


--------------------------------------------------------------------------------
/deepod/metrics/vus/metrics.py:
--------------------------------------------------------------------------------
 1 | from deepod.metrics.vus.utils.metrics import metricor
 2 | 
 3 | 
 4 | def get_range_vus_roc(score, labels, slidingWindow):
 5 |     R_AUC_ROC, R_AUC_PR, _, _, _ = metricor().RangeAUC(labels=labels, score=score,
 6 |                                                        window=slidingWindow, plot_ROC=True)
 7 |     _, _, _, _, VUS_ROC, VUS_PR = metricor().RangeAUC_volume(labels_original=labels,
 8 |                                                  score=score,
 9 |                                                  windowSize=2*slidingWindow)
10 | 
11 |     metrics = {'R_AUC_ROC': R_AUC_ROC, 'R_AUC_PR': R_AUC_PR, 'VUS_ROC': VUS_ROC, 'VUS_PR': VUS_PR}
12 | 
13 |     return metrics
14 | 


--------------------------------------------------------------------------------
/deepod/metrics/vus/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/vus/utils/__init__.py


--------------------------------------------------------------------------------
/deepod/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from deepod.models.tabular.dsvdd import DeepSVDD
 2 | from deepod.models.tabular.rca import RCA
 3 | from deepod.models.tabular.dsad import DeepSAD
 4 | from deepod.models.tabular.repen import REPEN
 5 | from deepod.models.tabular.neutral import NeuTraL
 6 | from deepod.models.tabular.dif import DeepIsolationForest
 7 | from deepod.models.tabular.slad import SLAD
 8 | from deepod.models.tabular.rdp import RDP
 9 | from deepod.models.tabular.feawad import FeaWAD
10 | from deepod.models.tabular.devnet import DevNet
11 | from deepod.models.tabular.prenet import PReNet
12 | from deepod.models.tabular.goad import GOAD
13 | from deepod.models.tabular.icl import ICL
14 | from deepod.models.tabular.rosas import RoSAS
15 | 
16 | from deepod.models.time_series.prenet import PReNetTS
17 | from deepod.models.time_series.dsad import DeepSADTS
18 | from deepod.models.time_series.devnet import DevNetTS
19 | 
20 | from deepod.models.time_series.dif import DeepIsolationForestTS
21 | from deepod.models.time_series.dsvdd import DeepSVDDTS
22 | 
23 | from deepod.models.time_series.dcdetector import DCdetector
24 | from deepod.models.time_series.timesnet import TimesNet
25 | from deepod.models.time_series.anomalytransformer import AnomalyTransformer
26 | from deepod.models.time_series.ncad import NCAD
27 | from deepod.models.time_series.tranad import TranAD
28 | from deepod.models.time_series.couta import COUTA
29 | from deepod.models.time_series.usad import USAD
30 | from deepod.models.time_series.tcned import TcnED
31 | 
32 | __all__ = [
33 |     'RCA', 'DeepSVDD', 'GOAD', 'NeuTraL', 'RDP', 'ICL', 'SLAD', 'DeepIsolationForest',
34 |     'DeepSAD', 'DevNet', 'PReNet', 'FeaWAD', 'REPEN', 'RoSAS',
35 |     'DCdetector', 'TimesNet', 'AnomalyTransformer', 'NCAD',
36 |     'TranAD', 'COUTA', 'USAD', 'TcnED',
37 |     'DeepIsolationForestTS', 'DeepSVDDTS',
38 |     'PReNetTS', 'DeepSADTS', 'DevNetTS'
39 | ]


--------------------------------------------------------------------------------
/deepod/models/tabular/__init__.py:
--------------------------------------------------------------------------------
 1 | from .rca import RCA
 2 | from .dsvdd import DeepSVDD
 3 | from .dsad import DeepSAD
 4 | from .devnet import DevNet
 5 | from .prenet import PReNet
 6 | from .feawad import FeaWAD
 7 | from .repen import REPEN
 8 | from .goad import GOAD
 9 | from .rca import RCA
10 | from .rdp import RDP
11 | from .icl import ICL
12 | from .slad import SLAD
13 | from .neutral import NeuTraL
14 | from .dif import DeepIsolationForest
15 | from .rosas import RoSAS
16 | 
17 | __all__ = ['RCA', 'DeepSVDD', 'DeepSAD', 'DevNet', 'PReNet', 'FeaWAD',
18 |            'REPEN', 'GOAD', 'NeuTraL', 'RDP', 'ICL', 'SLAD', 'DeepIsolationForest', 'RoSAS']


--------------------------------------------------------------------------------
/deepod/models/tabular/devnet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Deep anomaly detection with deviation networks.
  4 | PyTorch's implementation
  5 | @Author: Hongzuo Xu <hongzuoxu@126.com, xuhongzuo13@nudt.edu.cn>
  6 | """
  7 | 
  8 | from deepod.core.base_model import BaseDeepAD
  9 | from deepod.core.networks.base_networks import MLPnet
 10 | from torch.utils.data import DataLoader, TensorDataset
 11 | from torch.utils.data.sampler import WeightedRandomSampler
 12 | import torch
 13 | import numpy as np
 14 | 
 15 | 
 16 | class DevNet(BaseDeepAD):
 17 |     """
 18 |     Deviation Networks for Weakly-supervised Anomaly Detection (KDD'19)
 19 |     :cite:`pang2019deep`
 20 | 
 21 |     Args:
 22 |         epochs (int, optional):
 23 |             number of training epochs (default: 100).
 24 |         batch_size (int, optional):
 25 |             number of samples in a mini-batch (default: 64)
 26 |         lr (float, optional):
 27 |             learning rate (default: 1e-3)
 28 |         rep_dim (int, optional):
 29 |             it is for consistency, unused in this model.
 30 |         hidden_dims (list, str or int, optional):
 31 |             number of neural units in hidden layers,
 32 |             If list, each item is a layer;
 33 |             If str, neural units of hidden layers are split by comma;
 34 |             If int, number of neural units of single hidden layer
 35 |             (default: '100,50')
 36 |         act (str, optional):
 37 |             activation layer name,
 38 |             choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh']
 39 |             (default='ReLU')
 40 |         bias (bool, optional):
 41 |             Additive bias in linear layer (default=False)
 42 |         margin (float, optional):
 43 |             margin value used in the deviation loss function (default=5.)
 44 |         l (int, optional):
 45 |             the size of samples of the Gaussian distribution
 46 |             used in the deviation loss function (default=5000.)
 47 |         epoch_steps (int, optional):
 48 |             Maximum steps in an epoch.
 49 |             If -1, all the batches will be processed
 50 |             (default=-1)
 51 |         prt_steps (int, optional):
 52 |             Number of epoch intervals per printing (default=10)
 53 |         device (str, optional):
 54 |             torch device (default='cuda').
 55 |         verbose (int, optional):
 56 |             Verbosity mode (default=1)
 57 |         random_state (int, optional):
 58 |             the seed used by the random  (default=42)
 59 |     """
 60 |     def __init__(self, epochs=100, batch_size=64, lr=1e-3,
 61 |                  network='MLP',
 62 |                  rep_dim=128, hidden_dims='100,50', act='ReLU', bias=False,
 63 |                  margin=5., l=5000,
 64 |                  epoch_steps=-1, prt_steps=10, device='cuda',
 65 |                  verbose=2, random_state=42):
 66 |         super(DevNet, self).__init__(
 67 |             data_type='tabular', model_name='DevNet', epochs=epochs, batch_size=batch_size, lr=lr,
 68 |             network=network,
 69 |             epoch_steps=epoch_steps, prt_steps=prt_steps, device=device,
 70 |             verbose=verbose, random_state=random_state
 71 |         )
 72 | 
 73 |         self.margin = margin
 74 |         self.l = l
 75 | 
 76 |         self.hidden_dims = hidden_dims
 77 |         self.act = act
 78 |         self.bias = bias
 79 | 
 80 |         return
 81 | 
 82 |     def training_prepare(self, X, y):
 83 |         """
 84 | 
 85 |         Args:
 86 |             X (np.array): input data array
 87 |             y (np.array): input data label
 88 | 
 89 |         Returns:
 90 |             train_loader (torch.DataLoader): data loader of training data
 91 |             net (torch.nn.Module): neural network
 92 |             criterion (torch.nn.Module): loss function
 93 | 
 94 |         """
 95 |         # loader: balanced loader, a mini-batch contains a half of normal data and a half of anomalies
 96 |         n_anom = np.where(y == 1)[0].shape[0]
 97 |         n_norm = self.n_samples - n_anom
 98 |         weight_map = {0: 1. / n_norm, 1: 1. / n_anom}
 99 | 
100 |         dataset = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long())
101 |         sampler = WeightedRandomSampler(weights=[weight_map[label.item()] for data, label in dataset],
102 |                                         num_samples=len(dataset), replacement=True)
103 |         train_loader = DataLoader(dataset, batch_size=self.batch_size, sampler=sampler)
104 | 
105 |         network_params = {
106 |             'n_features': self.n_features,
107 |             'n_hidden': self.hidden_dims,
108 |             'n_output': 1,
109 |             'activation': self.act,
110 |             'bias': self.bias
111 |         }
112 |         net = MLPnet(**network_params).to(self.device)
113 | 
114 |         criterion = DevLoss(margin=self.margin, l=self.l)
115 | 
116 |         if self.verbose >= 2:
117 |             print(net)
118 | 
119 |         return train_loader, net, criterion
120 | 
121 |     def inference_prepare(self, X):
122 |         test_loader = DataLoader(X, batch_size=self.batch_size,
123 |                                  drop_last=False, shuffle=False)
124 |         self.criterion.reduction = 'none'
125 |         return test_loader
126 | 
127 |     def training_forward(self, batch_x, net, criterion):
128 |         batch_x, batch_y = batch_x
129 |         batch_x = batch_x.float().to(self.device)
130 |         batch_y = batch_y.to(self.device)
131 |         pred = net(batch_x)
132 |         loss = criterion(batch_y, pred)
133 |         return loss
134 | 
135 |     def inference_forward(self, batch_x, net, criterion):
136 |         batch_x = batch_x.float().to(self.device)
137 |         s = net(batch_x)
138 |         s = s.view(-1)
139 |         batch_z = batch_x
140 |         return batch_z, s
141 | 
142 | 
143 | class DevLoss(torch.nn.Module):
144 |     """
145 |     Deviation Loss
146 | 
147 |     Parameters
148 |     ----------
149 |     margin: float, optional (default=5.)
150 |         Center of the pre-defined hyper-sphere in the representation space
151 | 
152 |     l: int, optional (default=5000.)
153 |         the size of samples of the Gaussian distribution used in the deviation loss function
154 | 
155 |     reduction: str, optional (default='mean')
156 |         choice = [``'none'`` | ``'mean'`` | ``'sum'``]
157 |             - If ``'none'``: no reduction will be applied;
158 |             - If ``'mean'``: the sum of the output will be divided by the number of
159 |             elements in the output;
160 |             - If ``'sum'``: the output will be summed
161 | 
162 |     """
163 |     def __init__(self, margin=5., l=5000, reduction='mean'):
164 |         super(DevLoss, self).__init__()
165 |         self.margin = margin
166 |         self.loss_l = l
167 |         self.reduction = reduction
168 |         return
169 | 
170 |     def forward(self, y_true, y_pred):
171 |         ref = torch.randn(self.loss_l)  # from the normal dataset
172 |         dev = (y_pred - torch.mean(ref)) / torch.std(ref)
173 |         inlier_loss = torch.abs(dev)
174 |         outlier_loss = torch.abs(torch.max(self.margin - dev, torch.zeros_like(dev)))
175 |         loss = (1 - y_true) * inlier_loss + y_true * outlier_loss
176 | 
177 |         if self.reduction == 'mean':
178 |             return torch.mean(loss)
179 |         elif self.reduction == 'sum':
180 |             return torch.sum(loss)
181 |         elif self.reduction == 'none':
182 |             return loss
183 | 
184 |         return loss
185 | 


--------------------------------------------------------------------------------
/deepod/models/tabular/neutral.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Neural Transformation Learning-based Anomaly Detection
  4 | this script is partially adapted from https://github.com/boschresearch/NeuTraL-AD (AGPL-3.0 license)
  5 | @Author: Hongzuo Xu <hongzuoxu@126.com, xuhongzuo13@nudt.edu.cn>
  6 | """
  7 | 
  8 | from deepod.core.base_model import BaseDeepAD
  9 | from deepod.core.networks.base_networks import MLPnet
 10 | from torch.utils.data import DataLoader
 11 | import torch.nn.functional as F
 12 | import torch
 13 | import numpy as np
 14 | 
 15 | 
 16 | class NeuTraL(BaseDeepAD):
 17 |     """
 18 |     Neural Transformation Learning-based Anomaly Detection (ICML'21)
 19 | 
 20 |     """
 21 |     def __init__(self, epochs=100, batch_size=64, lr=1e-3,
 22 |                  n_trans=11, trans_type='residual', temp=0.1,
 23 |                  rep_dim=128, hidden_dims='100,50', trans_hidden_dims=50,
 24 |                  act='LeakyReLU', bias=False,
 25 |                  epoch_steps=-1, prt_steps=10, device='cuda',
 26 |                  verbose=1, random_state=42):
 27 |         super(NeuTraL, self).__init__(
 28 |             model_name='NeuTraL', epochs=epochs, batch_size=batch_size, lr=lr,
 29 |             epoch_steps=epoch_steps, prt_steps=prt_steps, device=device,
 30 |             verbose=verbose, random_state=random_state
 31 |         )
 32 | 
 33 |         self.n_trans = n_trans
 34 |         self.trans_type = trans_type
 35 |         self.temp = temp
 36 | 
 37 |         self.trans_hidden_dims = trans_hidden_dims
 38 |         self.enc_hidden_dims = hidden_dims
 39 |         self.rep_dim = rep_dim
 40 |         self.act = act
 41 |         self.bias = bias
 42 |         return
 43 | 
 44 |     def training_prepare(self, X, y):
 45 |         train_loader = DataLoader(X, batch_size=self.batch_size, shuffle=True)
 46 | 
 47 |         net = TabNeutralADNet(
 48 |             n_features=self.n_features,
 49 |             n_trans=self.n_trans,
 50 |             trans_type=self.trans_type,
 51 |             enc_hidden_dims=self.enc_hidden_dims,
 52 |             trans_hidden_dims=self.trans_hidden_dims,
 53 |             activation=self.act,
 54 |             bias=self.bias,
 55 |             rep_dim=self.rep_dim,
 56 |             device=self.device
 57 |         )
 58 | 
 59 |         criterion = DCL(temperature=self.temp)
 60 | 
 61 |         if self.verbose >=2:
 62 |             print(net)
 63 | 
 64 |         return train_loader, net, criterion
 65 | 
 66 |     def inference_prepare(self, X):
 67 |         test_loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, shuffle=False)
 68 |         self.criterion.reduction = 'none'
 69 |         return test_loader
 70 | 
 71 |     def training_forward(self, batch_x, net, criterion):
 72 |         batch_x = batch_x.float().to(self.device)
 73 |         z = net(batch_x)
 74 |         loss = criterion(z)
 75 |         return loss
 76 | 
 77 |     def inference_forward(self, batch_x, net, criterion):
 78 |         batch_x = batch_x.float().to(self.device)
 79 |         batch_z = net(batch_x)
 80 |         s = criterion(batch_z)
 81 |         return batch_z, s
 82 | 
 83 | 
 84 | class TabNeutralADNet(torch.nn.Module):
 85 |     """
 86 |     network class of NeuTraL for tabular data
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     n_features: int
 91 |         dimensionality of input data
 92 | 
 93 |     n_trans: int
 94 |         the number of transformation times
 95 | 
 96 |     trans_type: str, default='residual'
 97 |         transformation type
 98 | 
 99 |     enc_hidden_dims: list or str or int
100 |         the number of neural units of hidden layers in encoder net
101 | 
102 |     trans_hidden_dims: list or str or int
103 |         the number of neural units of hidden layers in transformation net
104 | 
105 |     rep_dim: int
106 |         representation dimensionality
107 | 
108 |     activation: str
109 |         activation layer name
110 | 
111 |     device: str
112 |         device
113 |     """
114 |     def __init__(self, n_features, n_trans=11, trans_type='residual',
115 |                  enc_hidden_dims='24,24,24,24', trans_hidden_dims=24,
116 |                  rep_dim=24,
117 |                  activation='ReLU',
118 |                  bias=False,
119 |                  device='cuda'):
120 |         super(TabNeutralADNet, self).__init__()
121 | 
122 |         self.enc = MLPnet(
123 |             n_features=n_features,
124 |             n_hidden=enc_hidden_dims,
125 |             n_output=rep_dim,
126 |             activation=activation,
127 |             bias=bias,
128 |             batch_norm=False
129 |         )
130 |         self.trans = torch.nn.ModuleList(
131 |             [MLPnet(n_features=n_features,
132 |                     n_hidden=trans_hidden_dims,
133 |                     n_output=n_features,
134 |                     activation=activation,
135 |                     bias=bias,
136 |                     batch_norm=False) for _ in range(n_trans)]
137 |         )
138 | 
139 |         self.trans.to(device)
140 |         self.enc.to(device)
141 | 
142 |         self.n_trans = n_trans
143 |         self.trans_type = trans_type
144 |         self.z_dim = rep_dim
145 | 
146 |     def forward(self, x):
147 |         x_transform = torch.empty(x.shape[0], self.n_trans, x.shape[-1]).to(x)
148 | 
149 |         for i in range(self.n_trans):
150 |             mask = self.trans[i](x)
151 |             if self.trans_type == 'forward':
152 |                 x_transform[:, i] = mask
153 |             elif self.trans_type == 'mul':
154 |                 mask = torch.sigmoid(mask)
155 |                 x_transform[:, i] = mask * x
156 |             elif self.trans_type == 'residual':
157 |                 x_transform[:, i] = mask + x
158 | 
159 |         x_cat = torch.cat([x.unsqueeze(1), x_transform], 1)
160 |         zs = self.enc(x_cat.reshape(-1, x.shape[-1]))
161 |         zs = zs.reshape(x.shape[0], self.n_trans+1, self.z_dim)
162 |         return zs
163 | 
164 | 
165 | class DCL(torch.nn.Module):
166 |     def __init__(self, temperature=0.1, reduction='mean'):
167 |         super(DCL, self).__init__()
168 |         self.temp = temperature
169 |         self.reduction = reduction
170 | 
171 |     def forward(self, z):
172 |         z = F.normalize(z, p=2, dim=-1)
173 |         z_ori = z[:, 0]  # n,z
174 |         z_trans = z[:, 1:]  # n,k-1, z
175 |         batch_size, n_trans, z_dim = z.shape
176 | 
177 |         sim_matrix = torch.exp(torch.matmul(z, z.permute(0, 2, 1) / self.temp))  # n,k,k
178 |         mask = (torch.ones_like(sim_matrix).to(z) - torch.eye(n_trans).unsqueeze(0).to(z)).bool()
179 |         sim_matrix = sim_matrix.masked_select(mask).view(batch_size, n_trans, -1)
180 |         trans_matrix = sim_matrix[:, 1:].sum(-1)  # n,k-1
181 | 
182 |         pos_sim = torch.exp(torch.sum(z_trans * z_ori.unsqueeze(1), -1) / self.temp) # n,k-1
183 |         K = n_trans - 1
184 |         scale = 1 / np.abs(K*np.log(1.0 / K))
185 | 
186 |         loss = (torch.log(trans_matrix) - torch.log(pos_sim)) * scale
187 |         loss = loss.sum(1)
188 | 
189 |         reduction = self.reduction
190 |         if reduction == 'mean':
191 |             return torch.mean(loss)
192 |         elif reduction == 'sum':
193 |             return torch.sum(loss)
194 |         elif reduction == 'none':
195 |             return loss
196 | 
197 |         return loss
198 | 


--------------------------------------------------------------------------------
/deepod/models/tabular/rdp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Random distance prediction-based anomaly detection
  4 | this script is partially adapted from https://github.com/billhhh/RDP
  5 | @Author: Hongzuo Xu <hongzuoxu@126.com, xuhongzuo13@nudt.edu.cn>
  6 | """
  7 | 
  8 | from deepod.core.base_model import BaseDeepAD
  9 | from deepod.core.networks.base_networks import MLPnet
 10 | from torch.utils.data import DataLoader
 11 | import torch.nn.functional as F
 12 | import torch
 13 | import copy
 14 | 
 15 | 
 16 | class RDP(BaseDeepAD):
 17 |     """
 18 |     Unsupervised Representation Learning by Predicting Random Distances
 19 |     (IJCAI'20)
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     epochs: int, optional (default=100)
 24 |         Number of training epochs
 25 | 
 26 |     batch_size: int, optional (default=64)
 27 |         Number of samples in a mini-batch
 28 | 
 29 |     lr: float, optional (default=1e-3)
 30 |         Learning rate
 31 | 
 32 |     rep_dim: int, optional (default=128)
 33 |         Dimensionality of the representation space
 34 | 
 35 |     hidden_dims: list, str or int, optional (default='100,50')
 36 |         Number of neural units in hidden layers
 37 |             - If list, each item is a layer
 38 |             - If str, neural units of hidden layers are split by comma
 39 |             - If int, number of neural units of single hidden layer
 40 | 
 41 |     act: str, optional (default='ReLU')
 42 |         activation layer name
 43 |         choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh']
 44 | 
 45 |     bias: bool, optional (default=False)
 46 |         Additive bias in linear layer
 47 | 
 48 |     epoch_steps: int, optional (default=-1)
 49 |         Maximum steps in an epoch
 50 |             - If -1, all the batches will be processed
 51 | 
 52 |     prt_steps: int, optional (default=10)
 53 |         Number of epoch intervals per printing
 54 | 
 55 |     device: str, optional (default='cuda')
 56 |         torch device,
 57 | 
 58 |     verbose: int, optional (default=1)
 59 |         Verbosity mode
 60 | 
 61 |     random_state： int, optional (default=42)
 62 |         the seed used by the random
 63 |     """
 64 |     def __init__(self, epochs=100, batch_size=64, lr=1e-3,
 65 |                  rep_dim=128, hidden_dims='100,50', act='LeakyReLU', bias=False,
 66 |                  epoch_steps=-1, prt_steps=10, device='cuda',
 67 |                  verbose=2, random_state=42):
 68 |         super(RDP, self).__init__(
 69 |             model_name='RDP', epochs=epochs, batch_size=batch_size, lr=lr,
 70 |             epoch_steps=epoch_steps, prt_steps=prt_steps, device=device,
 71 |             verbose=verbose, random_state=random_state
 72 |         )
 73 | 
 74 |         self.hidden_dims = hidden_dims
 75 |         self.rep_dim = rep_dim
 76 |         self.act = act
 77 |         self.bias = bias
 78 |         return
 79 | 
 80 |     def training_prepare(self, X, y):
 81 |         train_loader = DataLoader(X, batch_size=self.batch_size, shuffle=True)
 82 | 
 83 |         net = MLPnet(
 84 |             n_features=self.n_features,
 85 |             n_hidden=self.hidden_dims, n_output=self.rep_dim,
 86 |             activation=self.act, bias=self.bias,
 87 |             skip_connection=None,
 88 |         ).to(self.device)
 89 | 
 90 |         rp_net = copy.deepcopy(net)
 91 |         criterion = RDPLoss(rp_net)
 92 | 
 93 |         if self.verbose >= 2:
 94 |             print(net)
 95 | 
 96 |         return train_loader, net, criterion
 97 | 
 98 |     def inference_prepare(self, X):
 99 |         test_loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, shuffle=False)
100 |         self.criterion.reduction = 'none'
101 |         return test_loader
102 | 
103 |     def training_forward(self, batch_x, net, criterion):
104 |         batch_x1 = batch_x[torch.randperm(batch_x.shape[0])]
105 |         batch_x = batch_x.float().to(self.device)
106 |         batch_x1 = batch_x1.float().to(self.device)
107 |         z, z1 = net(batch_x), net(batch_x1)
108 |         loss = criterion(z, z1, batch_x, batch_x1)
109 |         return loss
110 | 
111 |     def inference_forward(self, batch_x, net, criterion):
112 |         batch_x = batch_x.float().to(self.device)
113 |         batch_x1 = batch_x[torch.randperm(batch_x.shape[0])]
114 |         batch_z, batch_z1 = net(batch_x), net(batch_x1)
115 |         s = criterion(batch_z, batch_z1, batch_x, batch_x1)
116 |         return batch_z, s
117 | 
118 | 
119 | class RDPLoss(torch.nn.Module):
120 |     def __init__(self, random_projection_net, reduction='mean'):
121 |         super(RDPLoss, self).__init__()
122 |         self.rp_net = random_projection_net
123 |         self.mse = torch.nn.MSELoss(reduction=reduction)
124 |         self.reduction = reduction
125 | 
126 |     def forward(self, rep, rep1, x, x1):
127 |         rep_target = self.rp_net(x)
128 |         rep1_target = self.rp_net(x1)
129 | 
130 |         d_target = torch.sum(F.normalize(rep_target, p=1, dim=1) *
131 |                              F.normalize(rep1_target, p=1, dim=1), dim=1)
132 |         d_pred = torch.sum(F.normalize(rep, p=1, dim=1) *
133 |                            F.normalize(rep1, p=1, dim=1), dim=1)
134 | 
135 |         if self.reduction == 'mean' or self.reduction == 'sum':
136 |             gap_loss = self.mse(rep, rep_target)
137 |             rdp_loss = self.mse(d_target, d_pred)
138 | 
139 |         else:
140 |             gap_loss = torch.mean(F.mse_loss(rep, rep_target, reduction='none'), dim=1)
141 |             rdp_loss = F.mse_loss(d_target, d_pred, reduction='none')
142 | 
143 |         return gap_loss + rdp_loss
144 | 


--------------------------------------------------------------------------------
/deepod/models/time_series/__init__.py:
--------------------------------------------------------------------------------
 1 | # unsupervised
 2 | from .dif import DeepIsolationForestTS
 3 | from .dsvdd import DeepSVDDTS
 4 | from .tranad import TranAD
 5 | from .usad import USAD
 6 | from .couta import COUTA
 7 | from .tcned import TcnED
 8 | from .anomalytransformer import AnomalyTransformer
 9 | from .timesnet import TimesNet
10 | from .dcdetector import DCdetector
11 | 
12 | # weakly-supervised
13 | from .dsad import DeepSADTS
14 | from .devnet import DevNetTS
15 | from .prenet import PReNetTS
16 | 
17 | 
18 | __all__ = ['DeepIsolationForestTS', 'DeepSVDDTS', 'TranAD', 'USAD', 'COUTA',
19 |            'DeepSADTS', 'DevNetTS', 'PReNetTS', 'AnomalyTransformer', 'TimesNet', 'DCdetector']
20 | 


--------------------------------------------------------------------------------
/deepod/test/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-


--------------------------------------------------------------------------------
/deepod/test/test_anomalyTransformer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.anomalytransformer import AnomalyTransformer
 20 | 
 21 | 
 22 | class TestAnomalyTransformer(unittest.TestCase):
 23 |     def setUp(self):
 24 |         train_file = 'data/omi-1/omi-1_train.csv'
 25 |         test_file = 'data/omi-1/omi-1_test.csv'
 26 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 27 |         test_df = pd.read_csv(test_file, index_col=0)
 28 |         y = test_df['label'].values
 29 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 30 |         self.Xts_train = train_df.values
 31 |         self.Xts_test = test_df.values
 32 |         self.yts_test = y
 33 | 
 34 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 35 |         self.clf = AnomalyTransformer(seq_len=100, stride=1, epochs=2,
 36 |                                       batch_size=32, k=3, lr=1e-4,
 37 |                                       device=device, random_state=42)
 38 |         self.clf.fit(self.Xts_train)
 39 | 
 40 |     def test_parameters(self):
 41 |         assert (hasattr(self.clf, 'decision_scores_') and
 42 |                 self.clf.decision_scores_ is not None)
 43 |         assert (hasattr(self.clf, 'labels_') and
 44 |                 self.clf.labels_ is not None)
 45 |         assert (hasattr(self.clf, 'threshold_') and
 46 |                 self.clf.threshold_ is not None)
 47 | 
 48 |     def test_train_scores(self):
 49 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 50 | 
 51 |     def test_prediction_scores(self):
 52 |         pred_scores = self.clf.decision_function(self.Xts_test)
 53 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.Xts_test)
 57 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 80 | 
 81 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 82 |         assert_equal(confidence.shape, self.yts_test.shape)
 83 |         assert (confidence.min() >= 0)
 84 |         assert (confidence.max() <= 1)
 85 | 
 86 |     # def test_prediction_proba_linear_confidence(self):
 87 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 88 |     #                                                     method='linear',
 89 |     #                                                     return_confidence=True)
 90 |     #     assert (pred_proba.min() >= 0)
 91 |     #     assert (pred_proba.max() <= 1)
 92 |     #
 93 |     #     assert_equal(confidence.shape, self.y_test.shape)
 94 |     #     assert (confidence.min() >= 0)
 95 |     #     assert (confidence.max() <= 1)
 96 |     #
 97 |     # def test_fit_predict(self):
 98 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 99 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
100 |     #
101 |     # def test_fit_predict_score(self):
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
104 |     #                                scoring='roc_auc_score')
105 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
106 |     #                                scoring='prc_n_score')
107 |     #     with assert_raises(NotImplementedError):
108 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
109 |     #                                    scoring='something')
110 |     #
111 |     # def test_predict_rank(self):
112 |     #     pred_socres = self.clf.decision_function(self.X_test)
113 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
114 |     #
115 |     #     # assert the order is reserved
116 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
117 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
118 |     #     assert_array_less(-0.1, pred_ranks)
119 |     #
120 |     # def test_predict_rank_normalized(self):
121 |     #     pred_socres = self.clf.decision_function(self.X_test)
122 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
123 |     #
124 |     #     # assert the order is reserved
125 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
126 |     #     assert_array_less(pred_ranks, 1.01)
127 |     #     assert_array_less(-0.1, pred_ranks)
128 | 
129 |     # def test_plot(self):
130 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
131 |     #     assert_array_less(0, os)
132 | 
133 |     # def test_model_clone(self):
134 |     #     clone_clf = clone(self.clf)
135 | 
136 |     def tearDown(self):
137 |         pass
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_couta.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | import numpy as np
  9 | 
 10 | # noinspection PyProtectedMember
 11 | from numpy.testing import assert_equal
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.couta import COUTA
 20 | from deepod.metrics import ts_metrics_enhanced
 21 | 
 22 | 
 23 | class TestCOUTA(unittest.TestCase):
 24 |     def setUp(self):
 25 |         train_file = 'data/omi-1/omi-1_train.csv'
 26 |         test_file = 'data/omi-1/omi-1_test.csv'
 27 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 28 |         test_df = pd.read_csv(test_file, index_col=0)
 29 |         y = test_df['label'].values
 30 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 31 |         self.Xts_train = train_df.values
 32 |         self.Xts_test = test_df.values
 33 |         self.yts_test = y
 34 | 
 35 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 36 |         self.clf = COUTA(seq_len=100, stride=5,
 37 |                          epochs=5, hidden_dims=50,
 38 |                          device=device, random_state=42)
 39 |         self.clf.fit(self.Xts_train)
 40 | 
 41 |     def test_parameters(self):
 42 |         assert (hasattr(self.clf, 'decision_scores_') and
 43 |                 self.clf.decision_scores_ is not None)
 44 |         assert (hasattr(self.clf, 'labels_') and
 45 |                 self.clf.labels_ is not None)
 46 |         assert (hasattr(self.clf, 'threshold_') and
 47 |                 self.clf.threshold_ is not None)
 48 | 
 49 |     def test_train_scores(self):
 50 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 51 | 
 52 |     def test_prediction_scores(self):
 53 |         pred_scores = self.clf.decision_function(self.Xts_test)
 54 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 55 | 
 56 |     def test_metric(self):
 57 |         pred_scores = self.clf.decision_function(self.Xts_test)
 58 | 
 59 |         anomaly_ratio = 1
 60 |         thresh = np.percentile(pred_scores, 100 - anomaly_ratio)
 61 |         pred = (pred_scores > thresh).astype(int)
 62 |         metrics = ts_metrics_enhanced(self.yts_test, pred_scores, pred)
 63 |         print("metrics", metrics)
 64 | 
 65 |     def test_prediction_labels(self):
 66 |         pred_labels = self.clf.predict(self.Xts_test)
 67 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 68 | 
 69 |     # def test_prediction_proba(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_linear(self):
 75 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 76 |     #     assert (pred_proba.min() >= 0)
 77 |     #     assert (pred_proba.max() <= 1)
 78 |     #
 79 |     # def test_prediction_proba_unify(self):
 80 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 81 |     #     assert (pred_proba.min() >= 0)
 82 |     #     assert (pred_proba.max() <= 1)
 83 |     #
 84 |     # def test_prediction_proba_parameter(self):
 85 |     #     with assert_raises(ValueError):
 86 |     #         self.clf.predict_proba(self.X_test, method='something')
 87 | 
 88 |     def test_prediction_labels_confidence(self):
 89 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 90 | 
 91 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 92 |         assert_equal(confidence.shape, self.yts_test.shape)
 93 |         assert (confidence.min() >= 0)
 94 |         assert (confidence.max() <= 1)
 95 | 
 96 | 
 97 |     # def test_prediction_proba_linear_confidence(self):
 98 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 99 |     #                                                     method='linear',
100 |     #                                                     return_confidence=True)
101 |     #     assert (pred_proba.min() >= 0)
102 |     #     assert (pred_proba.max() <= 1)
103 |     #
104 |     #     assert_equal(confidence.shape, self.y_test.shape)
105 |     #     assert (confidence.min() >= 0)
106 |     #     assert (confidence.max() <= 1)
107 |     #
108 |     # def test_fit_predict(self):
109 |     #     pred_labels = self.clf.fit_predict(self.X_train)
110 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
111 |     #
112 |     # def test_fit_predict_score(self):
113 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
114 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
115 |     #                                scoring='roc_auc_score')
116 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
117 |     #                                scoring='prc_n_score')
118 |     #     with assert_raises(NotImplementedError):
119 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
120 |     #                                    scoring='something')
121 |     #
122 |     # def test_predict_rank(self):
123 |     #     pred_socres = self.clf.decision_function(self.X_test)
124 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
125 |     #
126 |     #     # assert the order is reserved
127 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
128 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
129 |     #     assert_array_less(-0.1, pred_ranks)
130 |     #
131 |     # def test_predict_rank_normalized(self):
132 |     #     pred_socres = self.clf.decision_function(self.X_test)
133 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
134 |     #
135 |     #     # assert the order is reserved
136 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
137 |     #     assert_array_less(pred_ranks, 1.01)
138 |     #     assert_array_less(-0.1, pred_ranks)
139 | 
140 |     # def test_plot(self):
141 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
142 |     #     assert_array_less(0, os)
143 | 
144 |     # def test_model_clone(self):
145 |     #     clone_clf = clone(self.clf)
146 | 
147 |     def tearDown(self):
148 |         pass
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_dcdetector.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.dcdetector import DCdetector
 20 | 
 21 | 
 22 | class TestDCdetector(unittest.TestCase):
 23 |     def setUp(self):
 24 |         train_file = 'data/omi-1/omi-1_train.csv'
 25 |         test_file = 'data/omi-1/omi-1_test.csv'
 26 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 27 |         test_df = pd.read_csv(test_file, index_col=0)
 28 |         y = test_df['label'].values
 29 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 30 |         self.Xts_train = train_df.values
 31 |         self.Xts_test = test_df.values
 32 |         self.yts_test = y
 33 | 
 34 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 35 |         self.clf = DCdetector(seq_len=90, stride=1, epochs=2,
 36 |                               batch_size=32, lr=1e-4, patch_size=[5],
 37 |                               device=device, random_state=42)
 38 |         self.clf.fit(self.Xts_train)
 39 | 
 40 |     def test_parameters(self):
 41 |         assert (hasattr(self.clf, 'decision_scores_') and
 42 |                 self.clf.decision_scores_ is not None)
 43 |         assert (hasattr(self.clf, 'labels_') and
 44 |                 self.clf.labels_ is not None)
 45 |         assert (hasattr(self.clf, 'threshold_') and
 46 |                 self.clf.threshold_ is not None)
 47 | 
 48 |     def test_train_scores(self):
 49 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 50 | 
 51 |     def test_prediction_scores(self):
 52 |         pred_scores = self.clf.decision_function(self.Xts_test)
 53 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.Xts_test, return_confidence=False)
 57 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 80 | 
 81 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 82 |         assert_equal(confidence.shape, self.yts_test.shape)
 83 |         assert (confidence.min() >= 0)
 84 |         assert (confidence.max() <= 1)
 85 | 
 86 |     # def test_prediction_proba_linear_confidence(self):
 87 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 88 |     #                                                     method='linear',
 89 |     #                                                     return_confidence=True)
 90 |     #     assert (pred_proba.min() >= 0)
 91 |     #     assert (pred_proba.max() <= 1)
 92 |     #
 93 |     #     assert_equal(confidence.shape, self.y_test.shape)
 94 |     #     assert (confidence.min() >= 0)
 95 |     #     assert (confidence.max() <= 1)
 96 |     #
 97 |     # def test_fit_predict(self):
 98 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 99 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
100 |     #
101 |     # def test_fit_predict_score(self):
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
104 |     #                                scoring='roc_auc_score')
105 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
106 |     #                                scoring='prc_n_score')
107 |     #     with assert_raises(NotImplementedError):
108 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
109 |     #                                    scoring='something')
110 |     #
111 |     # def test_predict_rank(self):
112 |     #     pred_socres = self.clf.decision_function(self.X_test)
113 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
114 |     #
115 |     #     # assert the order is reserved
116 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
117 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
118 |     #     assert_array_less(-0.1, pred_ranks)
119 |     #
120 |     # def test_predict_rank_normalized(self):
121 |     #     pred_socres = self.clf.decision_function(self.X_test)
122 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
123 |     #
124 |     #     # assert the order is reserved
125 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
126 |     #     assert_array_less(pred_ranks, 1.01)
127 |     #     assert_array_less(-0.1, pred_ranks)
128 | 
129 |     # def test_plot(self):
130 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
131 |     #     assert_array_less(0, os)
132 | 
133 |     # def test_model_clone(self):
134 |     #     clone_clf = clone(self.clf)
135 | 
136 |     def tearDown(self):
137 |         pass
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_dif.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.tabular.dif import DeepIsolationForest
 20 | from deepod.models.time_series.dif import DeepIsolationForestTS
 21 | from deepod.utils.data import generate_data
 22 | 
 23 | 
 24 | class TestDIF(unittest.TestCase):
 25 |     def setUp(self):
 26 |         self.n_train = 1000
 27 |         self.n_test = 600
 28 |         self.contamination = 0.1
 29 |         self.roc_floor = 0.8
 30 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 31 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 32 |             contamination=self.contamination, random_state=42
 33 |         )
 34 | 
 35 |         train_file = 'data/omi-1/omi-1_train.csv'
 36 |         test_file = 'data/omi-1/omi-1_test.csv'
 37 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 38 |         test_df = pd.read_csv(test_file, index_col=0)
 39 |         y = test_df['label'].values
 40 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 41 |         self.Xts_train = train_df.values
 42 |         self.Xts_test = test_df.values
 43 |         self.yts_test = y
 44 | 
 45 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 46 |         self.clf = DeepIsolationForest(device=device, n_ensemble=50, n_estimators=6)
 47 |         self.clf.fit(self.X_train)
 48 | 
 49 |         self.clf2 = DeepIsolationForestTS(seq_len=100, stride=5,
 50 |                                           epochs=20, hidden_dims='50',
 51 |                                           device=device,
 52 |                                           random_state=42)
 53 |         self.clf2.fit(self.Xts_train)
 54 | 
 55 |     def test_parameters(self):
 56 |         assert (hasattr(self.clf, 'decision_scores_') and
 57 |                 self.clf.decision_scores_ is not None)
 58 |         assert (hasattr(self.clf, 'labels_') and
 59 |                 self.clf.labels_ is not None)
 60 |         assert (hasattr(self.clf, 'threshold_') and
 61 |                 self.clf.threshold_ is not None)
 62 | 
 63 |     # def test_train_scores(self):
 64 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 65 | 
 66 |     def test_prediction_scores(self):
 67 |         pred_scores = self.clf.decision_function(self.X_test)
 68 |         pred_scores2 = self.clf2.decision_function(self.Xts_test)
 69 | 
 70 |         # check score shapes
 71 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 72 |         assert_equal(pred_scores2.shape[0], self.Xts_test.shape[0])
 73 | 
 74 |         # check performance
 75 |         assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 76 |         # adj_eval_info = cal_metrics(self.yts_test, pred_scores2, pa=True)
 77 |         # assert (adj_eval_info[2] >= self.ts_f1_floor)
 78 | 
 79 |     def test_prediction_labels(self):
 80 |         pred_labels = self.clf.predict(self.X_test)
 81 |         pred_labels2 = self.clf2.predict(self.Xts_test)
 82 |         assert_equal(pred_labels.shape, self.y_test.shape)
 83 |         assert_equal(pred_labels2.shape, self.yts_test.shape)
 84 | 
 85 |     # def test_prediction_proba(self):
 86 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 87 |     #     assert (pred_proba.min() >= 0)
 88 |     #     assert (pred_proba.max() <= 1)
 89 |     #
 90 |     # def test_prediction_proba_linear(self):
 91 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 92 |     #     assert (pred_proba.min() >= 0)
 93 |     #     assert (pred_proba.max() <= 1)
 94 |     #
 95 |     # def test_prediction_proba_unify(self):
 96 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 97 |     #     assert (pred_proba.min() >= 0)
 98 |     #     assert (pred_proba.max() <= 1)
 99 |     #
100 |     # def test_prediction_proba_parameter(self):
101 |     #     with assert_raises(ValueError):
102 |     #         self.clf.predict_proba(self.X_test, method='something')
103 | 
104 |     def test_prediction_labels_confidence(self):
105 |         pred_labels, confidence = self.clf.predict(self.X_test, return_confidence=True)
106 | 
107 |         assert_equal(pred_labels.shape, self.y_test.shape)
108 |         assert_equal(confidence.shape, self.y_test.shape)
109 |         assert (confidence.min() >= 0)
110 |         assert (confidence.max() <= 1)
111 | 
112 |     # def test_prediction_proba_linear_confidence(self):
113 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
114 |     #                                                     method='linear',
115 |     #                                                     return_confidence=True)
116 |     #     assert (pred_proba.min() >= 0)
117 |     #     assert (pred_proba.max() <= 1)
118 |     #
119 |     #     assert_equal(confidence.shape, self.y_test.shape)
120 |     #     assert (confidence.min() >= 0)
121 |     #     assert (confidence.max() <= 1)
122 |     #
123 |     # def test_fit_predict(self):
124 |     #     pred_labels = self.clf.fit_predict(self.X_train)
125 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
126 |     #
127 |     # def test_fit_predict_score(self):
128 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
129 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
130 |     #                                scoring='roc_auc_score')
131 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
132 |     #                                scoring='prc_n_score')
133 |     #     with assert_raises(NotImplementedError):
134 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
135 |     #                                    scoring='something')
136 |     #
137 |     # def test_predict_rank(self):
138 |     #     pred_socres = self.clf.decision_function(self.X_test)
139 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
140 |     #
141 |     #     # assert the order is reserved
142 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
143 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
144 |     #     assert_array_less(-0.1, pred_ranks)
145 |     #
146 |     # def test_predict_rank_normalized(self):
147 |     #     pred_socres = self.clf.decision_function(self.X_test)
148 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
149 |     #
150 |     #     # assert the order is reserved
151 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
152 |     #     assert_array_less(pred_ranks, 1.01)
153 |     #     assert_array_less(-0.1, pred_ranks)
154 | 
155 |     # def test_plot(self):
156 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
157 |     #     assert_array_less(0, os)
158 | 
159 |     # def test_model_clone(self):
160 |     #     clone_clf = clone(self.clf)
161 | 
162 |     def tearDown(self):
163 |         pass
164 | 
165 | 
166 | if __name__ == '__main__':
167 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_dsvdd.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.tabular.dsvdd import DeepSVDD
 20 | from deepod.models.time_series.dsvdd import DeepSVDDTS
 21 | from deepod.utils.data import generate_data
 22 | 
 23 | 
 24 | class TestDeepSVDD(unittest.TestCase):
 25 |     def setUp(self):
 26 |         self.n_train = 200
 27 |         self.n_test = 100
 28 |         self.contamination = 0.1
 29 |         self.roc_floor = 0.8
 30 |         self.ts_f1_floor = 0.8
 31 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 32 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 33 |             contamination=self.contamination, random_state=42
 34 |         )
 35 | 
 36 |         train_file = 'data/omi-1/omi-1_train.csv'
 37 |         test_file = 'data/omi-1/omi-1_test.csv'
 38 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 39 |         test_df = pd.read_csv(test_file, index_col=0)
 40 |         y = test_df['label'].values
 41 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 42 |         self.Xts_train = train_df.values
 43 |         self.Xts_test = test_df.values
 44 |         self.yts_test = y
 45 | 
 46 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 47 |         self.clf = DeepSVDD(device=device, random_state=42)
 48 |         self.clf.fit(self.X_train)
 49 | 
 50 |         self.clf2 = DeepSVDDTS(seq_len=100, stride=5, epochs=20, hidden_dims='100,50',
 51 |                                device=device, network='TCN', random_state=42)
 52 |         self.clf2.fit(self.Xts_train)
 53 | 
 54 |         self.clf3 = DeepSVDDTS(seq_len=100, stride=5, epochs=20, hidden_dims='100,50',
 55 |                                device=device, network='ConvSeq', random_state=42)
 56 |         self.clf3.fit(self.Xts_train)
 57 | 
 58 |     def test_parameters(self):
 59 |         assert (hasattr(self.clf, 'decision_scores_') and
 60 |                 self.clf.decision_scores_ is not None)
 61 |         assert (hasattr(self.clf, 'labels_') and
 62 |                 self.clf.labels_ is not None)
 63 |         assert (hasattr(self.clf, 'threshold_') and
 64 |                 self.clf.threshold_ is not None)
 65 | 
 66 |     def test_train_scores(self):
 67 |         assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 68 |         assert_equal(len(self.clf2.decision_scores_), self.Xts_train.shape[0])
 69 |         assert_equal(len(self.clf3.decision_scores_), self.Xts_train.shape[0])
 70 | 
 71 |     def test_prediction_scores(self):
 72 |         pred_scores = self.clf.decision_function(self.X_test)
 73 |         pred_scores2 = self.clf2.decision_function(self.Xts_test)
 74 |         pred_scores3 = self.clf3.decision_function(self.Xts_test)
 75 | 
 76 |         # check score shapes
 77 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 78 |         assert_equal(pred_scores2.shape[0], self.Xts_test.shape[0])
 79 |         assert_equal(pred_scores3.shape[0], self.Xts_test.shape[0])
 80 | 
 81 |     def test_prediction_labels(self):
 82 |         pred_labels = self.clf.predict(self.X_test)
 83 |         pred_labels2 = self.clf2.predict(self.Xts_test)
 84 |         assert_equal(pred_labels.shape, self.y_test.shape)
 85 |         assert_equal(pred_labels2.shape, self.yts_test.shape)
 86 | 
 87 |     # def test_prediction_proba(self):
 88 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 89 |     #     assert (pred_proba.min() >= 0)
 90 |     #     assert (pred_proba.max() <= 1)
 91 |     #
 92 |     # def test_prediction_proba_linear(self):
 93 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 94 |     #     assert (pred_proba.min() >= 0)
 95 |     #     assert (pred_proba.max() <= 1)
 96 |     #
 97 |     # def test_prediction_proba_unify(self):
 98 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 99 |     #     assert (pred_proba.min() >= 0)
100 |     #     assert (pred_proba.max() <= 1)
101 |     #
102 |     # def test_prediction_proba_parameter(self):
103 |     #     with assert_raises(ValueError):
104 |     #         self.clf.predict_proba(self.X_test, method='something')
105 | 
106 |     def test_prediction_labels_confidence(self):
107 |         pred_labels, confidence = self.clf.predict(self.X_test, return_confidence=True)
108 | 
109 |         assert_equal(pred_labels.shape, self.y_test.shape)
110 |         assert_equal(confidence.shape, self.y_test.shape)
111 |         assert (confidence.min() >= 0)
112 |         assert (confidence.max() <= 1)
113 | 
114 |     # def test_prediction_proba_linear_confidence(self):
115 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
116 |     #                                                     method='linear',
117 |     #                                                     return_confidence=True)
118 |     #     assert (pred_proba.min() >= 0)
119 |     #     assert (pred_proba.max() <= 1)
120 |     #
121 |     #     assert_equal(confidence.shape, self.y_test.shape)
122 |     #     assert (confidence.min() >= 0)
123 |     #     assert (confidence.max() <= 1)
124 |     #
125 |     # def test_fit_predict(self):
126 |     #     pred_labels = self.clf.fit_predict(self.X_train)
127 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
128 |     #
129 |     # def test_fit_predict_score(self):
130 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
131 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
132 |     #                                scoring='roc_auc_score')
133 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
134 |     #                                scoring='prc_n_score')
135 |     #     with assert_raises(NotImplementedError):
136 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
137 |     #                                    scoring='something')
138 |     #
139 |     # def test_predict_rank(self):
140 |     #     pred_socres = self.clf.decision_function(self.X_test)
141 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
142 |     #
143 |     #     # assert the order is reserved
144 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
145 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
146 |     #     assert_array_less(-0.1, pred_ranks)
147 |     #
148 |     # def test_predict_rank_normalized(self):
149 |     #     pred_socres = self.clf.decision_function(self.X_test)
150 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
151 |     #
152 |     #     # assert the order is reserved
153 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
154 |     #     assert_array_less(pred_ranks, 1.01)
155 |     #     assert_array_less(-0.1, pred_ranks)
156 | 
157 |     # def test_plot(self):
158 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
159 |     #     assert_array_less(0, os)
160 | 
161 |     # def test_model_clone(self):
162 |     #     clone_clf = clone(self.clf)
163 | 
164 |     def tearDown(self):
165 |         pass
166 | 
167 | 
168 | if __name__ == '__main__':
169 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_feawad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | 
 13 | # temporary solution for relative imports in case pyod is not installed
 14 | # if deepod is installed, no need to use the following line
 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | from deepod.models.tabular.feawad import FeaWAD
 18 | from deepod.utils.data import generate_data
 19 | import numpy as np
 20 | 
 21 | 
 22 | class TestFeaWAD(unittest.TestCase):
 23 |     def setUp(self):
 24 |         self.n_train = 200
 25 |         self.n_test = 100
 26 |         self.contamination = 0.1
 27 |         self.roc_floor = 0.8
 28 | 
 29 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 30 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 31 |             contamination=self.contamination, random_state=42)
 32 | 
 33 |         self.Xts_train = np.random.randn(1000, 19)
 34 |         self.yts_train = np.zeros(1000, dtype=int)
 35 |         self.yts_train[200:250] = 1
 36 |         self.Xts_test = self.Xts_train.copy()
 37 |         self.yts_test = self.yts_train.copy()
 38 | 
 39 |         anom_id = np.where(self.y_train == 1)[0]
 40 |         known_anom_id = np.random.choice(anom_id, 10, replace=False)
 41 |         y_semi = np.zeros_like(self.y_train, dtype=int)
 42 |         y_semi[known_anom_id] = 1
 43 | 
 44 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 45 |         self.clf = FeaWAD(epochs=20, device=device)
 46 |         self.clf.fit(self.X_train, y_semi)
 47 | 
 48 |     def test_parameters(self):
 49 |         assert (hasattr(self.clf, 'decision_scores_') and
 50 |                 self.clf.decision_scores_ is not None)
 51 |         assert (hasattr(self.clf, 'labels_') and
 52 |                 self.clf.labels_ is not None)
 53 |         assert (hasattr(self.clf, 'threshold_') and
 54 |                 self.clf.threshold_ is not None)
 55 | 
 56 |     # def test_train_scores(self):
 57 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 58 | 
 59 |     def test_train_scores(self):
 60 |         assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 61 | 
 62 |     def test_prediction_scores(self):
 63 |         pred_scores = self.clf.decision_function(self.X_test)
 64 | 
 65 |         # check score shapes
 66 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 67 | 
 68 |         # # check performance
 69 |         # auc = roc_auc_score(self.y_test, pred_scores)
 70 |         # assert (auc >= self.roc_floor), f'auc is {auc}'
 71 | 
 72 |     def test_prediction_labels(self):
 73 |         pred_labels = self.clf.predict(self.X_test)
 74 |         assert_equal(pred_labels.shape, self.y_test.shape)
 75 | 
 76 |     # def test_prediction_proba(self):
 77 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 78 |     #     assert (pred_proba.min() >= 0)
 79 |     #     assert (pred_proba.max() <= 1)
 80 |     #
 81 |     # def test_prediction_proba_linear(self):
 82 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 83 |     #     assert (pred_proba.min() >= 0)
 84 |     #     assert (pred_proba.max() <= 1)
 85 |     #
 86 |     # def test_prediction_proba_unify(self):
 87 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 88 |     #     assert (pred_proba.min() >= 0)
 89 |     #     assert (pred_proba.max() <= 1)
 90 |     #
 91 |     # def test_prediction_proba_parameter(self):
 92 |     #     with assert_raises(ValueError):
 93 |     #         self.clf.predict_proba(self.X_test, method='something')
 94 | 
 95 |     def test_prediction_labels_confidence(self):
 96 |         pred_labels, confidence = self.clf.predict(self.X_test,
 97 |                                                    return_confidence=True)
 98 | 
 99 |         assert_equal(pred_labels.shape, self.y_test.shape)
100 |         assert_equal(confidence.shape, self.y_test.shape)
101 |         assert (confidence.min() >= 0)
102 |         assert (confidence.max() <= 1)
103 | 
104 |     # def test_prediction_proba_linear_confidence(self):
105 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
106 |     #                                                     method='linear',
107 |     #                                                     return_confidence=True)
108 |     #     assert (pred_proba.min() >= 0)
109 |     #     assert (pred_proba.max() <= 1)
110 |     #
111 |     #     assert_equal(confidence.shape, self.y_test.shape)
112 |     #     assert (confidence.min() >= 0)
113 |     #     assert (confidence.max() <= 1)
114 |     #
115 |     # def test_fit_predict(self):
116 |     #     pred_labels = self.clf.fit_predict(self.X_train)
117 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
118 |     #
119 |     # def test_fit_predict_score(self):
120 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
121 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
122 |     #                                scoring='roc_auc_score')
123 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
124 |     #                                scoring='prc_n_score')
125 |     #     with assert_raises(NotImplementedError):
126 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
127 |     #                                    scoring='something')
128 |     #
129 |     # def test_predict_rank(self):
130 |     #     pred_socres = self.clf.decision_function(self.X_test)
131 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
132 |     #
133 |     #     # assert the order is reserved
134 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
135 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
136 |     #     assert_array_less(-0.1, pred_ranks)
137 |     #
138 |     # def test_predict_rank_normalized(self):
139 |     #     pred_socres = self.clf.decision_function(self.X_test)
140 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
141 |     #
142 |     #     # assert the order is reserved
143 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
144 |     #     assert_array_less(pred_ranks, 1.01)
145 |     #     assert_array_less(-0.1, pred_ranks)
146 | 
147 |     # def test_plot(self):
148 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
149 |     #     assert_array_less(0, os)
150 | 
151 |     # def test_model_clone(self):
152 |     #     clone_clf = clone(self.clf)
153 | 
154 |     def tearDown(self):
155 |         pass
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_goad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_allclose
 11 | from numpy.testing import assert_array_less
 12 | from numpy.testing import assert_equal
 13 | from numpy.testing import assert_raises
 14 | from scipy.stats import rankdata
 15 | from sklearn.base import clone
 16 | from sklearn.metrics import roc_auc_score
 17 | import torch
 18 | 
 19 | # temporary solution for relative imports in case pyod is not installed
 20 | # if deepod is installed, no need to use the following line
 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 22 | 
 23 | from deepod.models import GOAD
 24 | from deepod.utils.data import generate_data
 25 | 
 26 | 
 27 | class TestGOAD(unittest.TestCase):
 28 |     def setUp(self):
 29 |         self.n_train = 200
 30 |         self.n_test = 100
 31 |         self.contamination = 0.1
 32 |         self.roc_floor = 0.8
 33 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 34 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 35 |             contamination=self.contamination, random_state=42)
 36 | 
 37 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 38 |         self.clf = GOAD(epochs=1, device=device, n_trans=64)
 39 |         self.clf.fit(self.X_train)
 40 | 
 41 |     def test_parameters(self):
 42 |         assert (hasattr(self.clf, 'decision_scores_') and
 43 |                 self.clf.decision_scores_ is not None)
 44 |         assert (hasattr(self.clf, 'labels_') and
 45 |                 self.clf.labels_ is not None)
 46 |         assert (hasattr(self.clf, 'threshold_') and
 47 |                 self.clf.threshold_ is not None)
 48 | 
 49 |     # def test_train_scores(self):
 50 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 51 | 
 52 |     def test_prediction_scores(self):
 53 |         pred_scores = self.clf.decision_function(self.X_test)
 54 | 
 55 |         # check score shapes
 56 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 57 | 
 58 |         # check performance
 59 |         # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 60 | 
 61 |     def test_prediction_labels(self):
 62 |         pred_labels = self.clf.predict(self.X_test)
 63 |         assert_equal(pred_labels.shape, self.y_test.shape)
 64 | 
 65 |     # def test_prediction_proba(self):
 66 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 67 |     #     assert (pred_proba.min() >= 0)
 68 |     #     assert (pred_proba.max() <= 1)
 69 |     #
 70 |     # def test_prediction_proba_linear(self):
 71 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 72 |     #     assert (pred_proba.min() >= 0)
 73 |     #     assert (pred_proba.max() <= 1)
 74 |     #
 75 |     # def test_prediction_proba_unify(self):
 76 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 77 |     #     assert (pred_proba.min() >= 0)
 78 |     #     assert (pred_proba.max() <= 1)
 79 |     #
 80 |     # def test_prediction_proba_parameter(self):
 81 |     #     with assert_raises(ValueError):
 82 |     #         self.clf.predict_proba(self.X_test, method='something')
 83 | 
 84 |     def test_prediction_labels_confidence(self):
 85 |         pred_labels, confidence = self.clf.predict(self.X_test,
 86 |                                                    return_confidence=True)
 87 | 
 88 |         assert_equal(pred_labels.shape, self.y_test.shape)
 89 |         assert_equal(confidence.shape, self.y_test.shape)
 90 |         assert (confidence.min() >= 0)
 91 |         assert (confidence.max() <= 1)
 92 | 
 93 |     # def test_prediction_proba_linear_confidence(self):
 94 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 95 |     #                                                     method='linear',
 96 |     #                                                     return_confidence=True)
 97 |     #     assert (pred_proba.min() >= 0)
 98 |     #     assert (pred_proba.max() <= 1)
 99 |     #
100 |     #     assert_equal(confidence.shape, self.y_test.shape)
101 |     #     assert (confidence.min() >= 0)
102 |     #     assert (confidence.max() <= 1)
103 |     #
104 |     # def test_fit_predict(self):
105 |     #     pred_labels = self.clf.fit_predict(self.X_train)
106 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
107 |     #
108 |     # def test_fit_predict_score(self):
109 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
110 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
111 |     #                                scoring='roc_auc_score')
112 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                scoring='prc_n_score')
114 |     #     with assert_raises(NotImplementedError):
115 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
116 |     #                                    scoring='something')
117 |     #
118 |     # def test_predict_rank(self):
119 |     #     pred_socres = self.clf.decision_function(self.X_test)
120 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
121 |     #
122 |     #     # assert the order is reserved
123 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
124 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
125 |     #     assert_array_less(-0.1, pred_ranks)
126 |     #
127 |     # def test_predict_rank_normalized(self):
128 |     #     pred_socres = self.clf.decision_function(self.X_test)
129 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
130 |     #
131 |     #     # assert the order is reserved
132 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
133 |     #     assert_array_less(pred_ranks, 1.01)
134 |     #     assert_array_less(-0.1, pred_ranks)
135 | 
136 |     # def test_plot(self):
137 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
138 |     #     assert_array_less(0, os)
139 | 
140 |     # def test_model_clone(self):
141 |     #     clone_clf = clone(self.clf)
142 | 
143 |     def tearDown(self):
144 |         pass
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_icl.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | 
 13 | # temporary solution for relative imports in case pyod is not installed
 14 | # if deepod is installed, no need to use the following line
 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | from deepod.models.tabular.icl import ICL
 18 | from deepod.utils.data import generate_data
 19 | 
 20 | 
 21 | class TestICL(unittest.TestCase):
 22 |     def setUp(self):
 23 |         self.n_train = 200
 24 |         self.n_test = 100
 25 |         self.contamination = 0.1
 26 |         self.roc_floor = 0.8
 27 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 28 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 29 |             contamination=self.contamination, random_state=42)
 30 | 
 31 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 32 |         self.clf = ICL(epochs=1, device=device, n_ensemble='auto')
 33 |         self.clf.fit(self.X_train)
 34 | 
 35 |     def test_parameters(self):
 36 |         assert (hasattr(self.clf, 'decision_scores_') and
 37 |                 self.clf.decision_scores_ is not None)
 38 |         assert (hasattr(self.clf, 'labels_') and
 39 |                 self.clf.labels_ is not None)
 40 |         assert (hasattr(self.clf, 'threshold_') and
 41 |                 self.clf.threshold_ is not None)
 42 | 
 43 |     # def test_train_scores(self):
 44 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 45 | 
 46 |     def test_prediction_scores(self):
 47 |         pred_scores = self.clf.decision_function(self.X_test)
 48 | 
 49 |         # check score shapes
 50 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 51 | 
 52 |         # check performance
 53 |         # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.X_test)
 57 |         assert_equal(pred_labels.shape, self.y_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.X_test,
 80 |                                                    return_confidence=True)
 81 | 
 82 |         assert_equal(pred_labels.shape, self.y_test.shape)
 83 |         assert_equal(confidence.shape, self.y_test.shape)
 84 |         assert (confidence.min() >= 0)
 85 |         assert (confidence.max() <= 1)
 86 | 
 87 |     # def test_prediction_proba_linear_confidence(self):
 88 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 89 |     #                                                     method='linear',
 90 |     #                                                     return_confidence=True)
 91 |     #     assert (pred_proba.min() >= 0)
 92 |     #     assert (pred_proba.max() <= 1)
 93 |     #
 94 |     #     assert_equal(confidence.shape, self.y_test.shape)
 95 |     #     assert (confidence.min() >= 0)
 96 |     #     assert (confidence.max() <= 1)
 97 |     #
 98 |     # def test_fit_predict(self):
 99 |     #     pred_labels = self.clf.fit_predict(self.X_train)
100 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
101 |     #
102 |     # def test_fit_predict_score(self):
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='roc_auc_score')
106 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
107 |     #                                scoring='prc_n_score')
108 |     #     with assert_raises(NotImplementedError):
109 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
110 |     #                                    scoring='something')
111 |     #
112 |     # def test_predict_rank(self):
113 |     #     pred_socres = self.clf.decision_function(self.X_test)
114 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
115 |     #
116 |     #     # assert the order is reserved
117 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
118 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
119 |     #     assert_array_less(-0.1, pred_ranks)
120 |     #
121 |     # def test_predict_rank_normalized(self):
122 |     #     pred_socres = self.clf.decision_function(self.X_test)
123 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
124 |     #
125 |     #     # assert the order is reserved
126 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
127 |     #     assert_array_less(pred_ranks, 1.01)
128 |     #     assert_array_less(-0.1, pred_ranks)
129 | 
130 |     # def test_plot(self):
131 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
132 |     #     assert_array_less(0, os)
133 | 
134 |     # def test_model_clone(self):
135 |     #     clone_clf = clone(self.clf)
136 | 
137 |     def tearDown(self):
138 |         pass
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_ncad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.ncad import NCAD
 20 | 
 21 | 
 22 | class TestDCdetector(unittest.TestCase):
 23 |     def setUp(self):
 24 |         train_file = 'data/omi-1/omi-1_train.csv'
 25 |         test_file = 'data/omi-1/omi-1_test.csv'
 26 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 27 |         test_df = pd.read_csv(test_file, index_col=0)
 28 |         y = test_df['label'].values
 29 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 30 |         self.Xts_train = train_df.values
 31 |         self.Xts_test = test_df.values
 32 |         self.yts_test = y
 33 | 
 34 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 35 |         self.clf = NCAD(seq_len=100, stride=1, epochs=2,
 36 |                         batch_size=32, lr=1e-4,
 37 |                         device=device, random_state=42)
 38 |         self.clf.fit(self.Xts_train)
 39 | 
 40 |     def test_parameters(self):
 41 |         assert (hasattr(self.clf, 'decision_scores_') and
 42 |                 self.clf.decision_scores_ is not None)
 43 |         assert (hasattr(self.clf, 'labels_') and
 44 |                 self.clf.labels_ is not None)
 45 |         assert (hasattr(self.clf, 'threshold_') and
 46 |                 self.clf.threshold_ is not None)
 47 | 
 48 |     def test_train_scores(self):
 49 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 50 | 
 51 |     def test_prediction_scores(self):
 52 |         pred_scores = self.clf.decision_function(self.Xts_test)
 53 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.Xts_test)
 57 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 80 | 
 81 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 82 |         assert_equal(confidence.shape, self.yts_test.shape)
 83 |         assert (confidence.min() >= 0)
 84 |         assert (confidence.max() <= 1)
 85 | 
 86 |     # def test_prediction_proba_linear_confidence(self):
 87 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 88 |     #                                                     method='linear',
 89 |     #                                                     return_confidence=True)
 90 |     #     assert (pred_proba.min() >= 0)
 91 |     #     assert (pred_proba.max() <= 1)
 92 |     #
 93 |     #     assert_equal(confidence.shape, self.y_test.shape)
 94 |     #     assert (confidence.min() >= 0)
 95 |     #     assert (confidence.max() <= 1)
 96 |     #
 97 |     # def test_fit_predict(self):
 98 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 99 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
100 |     #
101 |     # def test_fit_predict_score(self):
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
104 |     #                                scoring='roc_auc_score')
105 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
106 |     #                                scoring='prc_n_score')
107 |     #     with assert_raises(NotImplementedError):
108 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
109 |     #                                    scoring='something')
110 |     #
111 |     # def test_predict_rank(self):
112 |     #     pred_socres = self.clf.decision_function(self.X_test)
113 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
114 |     #
115 |     #     # assert the order is reserved
116 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
117 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
118 |     #     assert_array_less(-0.1, pred_ranks)
119 |     #
120 |     # def test_predict_rank_normalized(self):
121 |     #     pred_socres = self.clf.decision_function(self.X_test)
122 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
123 |     #
124 |     #     # assert the order is reserved
125 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
126 |     #     assert_array_less(pred_ranks, 1.01)
127 |     #     assert_array_less(-0.1, pred_ranks)
128 | 
129 |     # def test_plot(self):
130 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
131 |     #     assert_array_less(0, os)
132 | 
133 |     # def test_model_clone(self):
134 |     #     clone_clf = clone(self.clf)
135 | 
136 |     def tearDown(self):
137 |         pass
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_neutral.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_allclose
 11 | from numpy.testing import assert_array_less
 12 | from numpy.testing import assert_equal
 13 | from numpy.testing import assert_raises
 14 | from scipy.stats import rankdata
 15 | from sklearn.base import clone
 16 | from sklearn.metrics import roc_auc_score
 17 | import torch
 18 | 
 19 | # temporary solution for relative imports in case pyod is not installed
 20 | # if deepod is installed, no need to use the following line
 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 22 | 
 23 | from deepod.models import NeuTraL
 24 | from deepod.utils.data import generate_data
 25 | 
 26 | 
 27 | class TestNeuTral(unittest.TestCase):
 28 |     def setUp(self):
 29 |         self.n_train = 200
 30 |         self.n_test = 100
 31 |         self.contamination = 0.1
 32 |         self.roc_floor = 0.8
 33 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 34 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 35 |             contamination=self.contamination, random_state=42)
 36 | 
 37 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 38 |         self.clf = NeuTraL(epochs=1, device=device)
 39 |         self.clf.fit(self.X_train)
 40 | 
 41 |     def test_parameters(self):
 42 |         assert (hasattr(self.clf, 'decision_scores_') and
 43 |                 self.clf.decision_scores_ is not None)
 44 |         assert (hasattr(self.clf, 'labels_') and
 45 |                 self.clf.labels_ is not None)
 46 |         assert (hasattr(self.clf, 'threshold_') and
 47 |                 self.clf.threshold_ is not None)
 48 | 
 49 |     # def test_train_scores(self):
 50 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 51 | 
 52 |     def test_prediction_scores(self):
 53 |         pred_scores = self.clf.decision_function(self.X_test)
 54 | 
 55 |         # check score shapes
 56 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 57 | 
 58 |         # check performance
 59 |         # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 60 | 
 61 |     def test_prediction_labels(self):
 62 |         pred_labels = self.clf.predict(self.X_test)
 63 |         assert_equal(pred_labels.shape, self.y_test.shape)
 64 | 
 65 |     # def test_prediction_proba(self):
 66 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 67 |     #     assert (pred_proba.min() >= 0)
 68 |     #     assert (pred_proba.max() <= 1)
 69 |     #
 70 |     # def test_prediction_proba_linear(self):
 71 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 72 |     #     assert (pred_proba.min() >= 0)
 73 |     #     assert (pred_proba.max() <= 1)
 74 |     #
 75 |     # def test_prediction_proba_unify(self):
 76 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 77 |     #     assert (pred_proba.min() >= 0)
 78 |     #     assert (pred_proba.max() <= 1)
 79 |     #
 80 |     # def test_prediction_proba_parameter(self):
 81 |     #     with assert_raises(ValueError):
 82 |     #         self.clf.predict_proba(self.X_test, method='something')
 83 | 
 84 |     def test_prediction_labels_confidence(self):
 85 |         pred_labels, confidence = self.clf.predict(self.X_test,
 86 |                                                    return_confidence=True)
 87 | 
 88 |         assert_equal(pred_labels.shape, self.y_test.shape)
 89 |         assert_equal(confidence.shape, self.y_test.shape)
 90 |         assert (confidence.min() >= 0)
 91 |         assert (confidence.max() <= 1)
 92 | 
 93 |     # def test_prediction_proba_linear_confidence(self):
 94 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 95 |     #                                                     method='linear',
 96 |     #                                                     return_confidence=True)
 97 |     #     assert (pred_proba.min() >= 0)
 98 |     #     assert (pred_proba.max() <= 1)
 99 |     #
100 |     #     assert_equal(confidence.shape, self.y_test.shape)
101 |     #     assert (confidence.min() >= 0)
102 |     #     assert (confidence.max() <= 1)
103 |     #
104 |     # def test_fit_predict(self):
105 |     #     pred_labels = self.clf.fit_predict(self.X_train)
106 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
107 |     #
108 |     # def test_fit_predict_score(self):
109 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
110 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
111 |     #                                scoring='roc_auc_score')
112 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                scoring='prc_n_score')
114 |     #     with assert_raises(NotImplementedError):
115 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
116 |     #                                    scoring='something')
117 |     #
118 |     # def test_predict_rank(self):
119 |     #     pred_socres = self.clf.decision_function(self.X_test)
120 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
121 |     #
122 |     #     # assert the order is reserved
123 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
124 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
125 |     #     assert_array_less(-0.1, pred_ranks)
126 |     #
127 |     # def test_predict_rank_normalized(self):
128 |     #     pred_socres = self.clf.decision_function(self.X_test)
129 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
130 |     #
131 |     #     # assert the order is reserved
132 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
133 |     #     assert_array_less(pred_ranks, 1.01)
134 |     #     assert_array_less(-0.1, pred_ranks)
135 | 
136 |     # def test_plot(self):
137 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
138 |     #     assert_array_less(0, os)
139 | 
140 |     # def test_model_clone(self):
141 |     #     clone_clf = clone(self.clf)
142 | 
143 |     def tearDown(self):
144 |         pass
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_rca.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | 
 13 | # temporary solution for relative imports in case pyod is not installed
 14 | # if deepod is installed, no need to use the following line
 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | from deepod.models.tabular.rca import RCA
 18 | from deepod.utils.data import generate_data
 19 | 
 20 | 
 21 | class TestRCA(unittest.TestCase):
 22 |     def setUp(self):
 23 |         self.n_train = 200
 24 |         self.n_test = 100
 25 |         self.contamination = 0.1
 26 |         self.roc_floor = 0.8
 27 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 28 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 29 |             contamination=self.contamination, random_state=42)
 30 | 
 31 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 32 |         self.clf = RCA(epochs=1, device=device, act='LeakyReLU')
 33 |         self.clf.fit(self.X_train)
 34 | 
 35 |     def test_parameters(self):
 36 |         assert (hasattr(self.clf, 'decision_scores_') and
 37 |                 self.clf.decision_scores_ is not None)
 38 |         assert (hasattr(self.clf, 'labels_') and
 39 |                 self.clf.labels_ is not None)
 40 |         assert (hasattr(self.clf, 'threshold_') and
 41 |                 self.clf.threshold_ is not None)
 42 | 
 43 |     # def test_train_scores(self):
 44 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 45 | 
 46 |     def test_prediction_scores(self):
 47 |         pred_scores = self.clf.decision_function(self.X_test)
 48 | 
 49 |         # check score shapes
 50 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 51 | 
 52 |         # check performance
 53 |         # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.X_test)
 57 |         assert_equal(pred_labels.shape, self.y_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.X_test,
 80 |                                                    return_confidence=True)
 81 | 
 82 |         assert_equal(pred_labels.shape, self.y_test.shape)
 83 |         assert_equal(confidence.shape, self.y_test.shape)
 84 |         assert (confidence.min() >= 0)
 85 |         assert (confidence.max() <= 1)
 86 | 
 87 |     # def test_prediction_proba_linear_confidence(self):
 88 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 89 |     #                                                     method='linear',
 90 |     #                                                     return_confidence=True)
 91 |     #     assert (pred_proba.min() >= 0)
 92 |     #     assert (pred_proba.max() <= 1)
 93 |     #
 94 |     #     assert_equal(confidence.shape, self.y_test.shape)
 95 |     #     assert (confidence.min() >= 0)
 96 |     #     assert (confidence.max() <= 1)
 97 |     #
 98 |     # def test_fit_predict(self):
 99 |     #     pred_labels = self.clf.fit_predict(self.X_train)
100 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
101 |     #
102 |     # def test_fit_predict_score(self):
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='roc_auc_score')
106 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
107 |     #                                scoring='prc_n_score')
108 |     #     with assert_raises(NotImplementedError):
109 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
110 |     #                                    scoring='something')
111 |     #
112 |     # def test_predict_rank(self):
113 |     #     pred_socres = self.clf.decision_function(self.X_test)
114 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
115 |     #
116 |     #     # assert the order is reserved
117 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
118 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
119 |     #     assert_array_less(-0.1, pred_ranks)
120 |     #
121 |     # def test_predict_rank_normalized(self):
122 |     #     pred_socres = self.clf.decision_function(self.X_test)
123 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
124 |     #
125 |     #     # assert the order is reserved
126 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
127 |     #     assert_array_less(pred_ranks, 1.01)
128 |     #     assert_array_less(-0.1, pred_ranks)
129 | 
130 |     # def test_plot(self):
131 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
132 |     #     assert_array_less(0, os)
133 | 
134 |     # def test_model_clone(self):
135 |     #     clone_clf = clone(self.clf)
136 | 
137 |     def tearDown(self):
138 |         pass
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_rdp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_allclose
 11 | from numpy.testing import assert_array_less
 12 | from numpy.testing import assert_equal
 13 | from numpy.testing import assert_raises
 14 | from scipy.stats import rankdata
 15 | from sklearn.base import clone
 16 | from sklearn.metrics import roc_auc_score
 17 | import torch
 18 | 
 19 | # temporary solution for relative imports in case pyod is not installed
 20 | # if deepod is installed, no need to use the following line
 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 22 | 
 23 | from deepod.models import RDP
 24 | from deepod.utils.data import generate_data
 25 | 
 26 | 
 27 | class TestRDP(unittest.TestCase):
 28 |     def setUp(self):
 29 |         self.n_train = 200
 30 |         self.n_test = 100
 31 |         self.contamination = 0.1
 32 |         self.roc_floor = 0.8
 33 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 34 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 35 |             contamination=self.contamination, random_state=42)
 36 | 
 37 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 38 |         self.clf = RDP(hidden_dims=100, device=device)
 39 |         self.clf.fit(self.X_train)
 40 | 
 41 |     def test_parameters(self):
 42 |         assert (hasattr(self.clf, 'decision_scores_') and
 43 |                 self.clf.decision_scores_ is not None)
 44 |         assert (hasattr(self.clf, 'labels_') and
 45 |                 self.clf.labels_ is not None)
 46 |         assert (hasattr(self.clf, 'threshold_') and
 47 |                 self.clf.threshold_ is not None)
 48 | 
 49 |     # def test_train_scores(self):
 50 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 51 | 
 52 |     def test_prediction_scores(self):
 53 |         pred_scores = self.clf.decision_function(self.X_test)
 54 | 
 55 |         # check score shapes
 56 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 57 | 
 58 |     def test_prediction_labels(self):
 59 |         pred_labels = self.clf.predict(self.X_test)
 60 |         assert_equal(pred_labels.shape, self.y_test.shape)
 61 | 
 62 |     # def test_prediction_proba(self):
 63 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 64 |     #     assert (pred_proba.min() >= 0)
 65 |     #     assert (pred_proba.max() <= 1)
 66 |     #
 67 |     # def test_prediction_proba_linear(self):
 68 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 69 |     #     assert (pred_proba.min() >= 0)
 70 |     #     assert (pred_proba.max() <= 1)
 71 |     #
 72 |     # def test_prediction_proba_unify(self):
 73 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 74 |     #     assert (pred_proba.min() >= 0)
 75 |     #     assert (pred_proba.max() <= 1)
 76 |     #
 77 |     # def test_prediction_proba_parameter(self):
 78 |     #     with assert_raises(ValueError):
 79 |     #         self.clf.predict_proba(self.X_test, method='something')
 80 | 
 81 |     def test_prediction_labels_confidence(self):
 82 |         pred_labels, confidence = self.clf.predict(self.X_test,
 83 |                                                    return_confidence=True)
 84 | 
 85 |         assert_equal(pred_labels.shape, self.y_test.shape)
 86 |         assert_equal(confidence.shape, self.y_test.shape)
 87 |         assert (confidence.min() >= 0)
 88 |         assert (confidence.max() <= 1)
 89 | 
 90 |     # def test_prediction_proba_linear_confidence(self):
 91 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 92 |     #                                                     method='linear',
 93 |     #                                                     return_confidence=True)
 94 |     #     assert (pred_proba.min() >= 0)
 95 |     #     assert (pred_proba.max() <= 1)
 96 |     #
 97 |     #     assert_equal(confidence.shape, self.y_test.shape)
 98 |     #     assert (confidence.min() >= 0)
 99 |     #     assert (confidence.max() <= 1)
100 |     #
101 |     # def test_fit_predict(self):
102 |     #     pred_labels = self.clf.fit_predict(self.X_train)
103 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
104 |     #
105 |     # def test_fit_predict_score(self):
106 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
107 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
108 |     #                                scoring='roc_auc_score')
109 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
110 |     #                                scoring='prc_n_score')
111 |     #     with assert_raises(NotImplementedError):
112 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                    scoring='something')
114 |     #
115 |     # def test_predict_rank(self):
116 |     #     pred_socres = self.clf.decision_function(self.X_test)
117 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
118 |     #
119 |     #     # assert the order is reserved
120 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
121 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
122 |     #     assert_array_less(-0.1, pred_ranks)
123 |     #
124 |     # def test_predict_rank_normalized(self):
125 |     #     pred_socres = self.clf.decision_function(self.X_test)
126 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
127 |     #
128 |     #     # assert the order is reserved
129 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
130 |     #     assert_array_less(pred_ranks, 1.01)
131 |     #     assert_array_less(-0.1, pred_ranks)
132 | 
133 |     # def test_plot(self):
134 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
135 |     #     assert_array_less(0, os)
136 | 
137 |     # def test_model_clone(self):
138 |     #     clone_clf = clone(self.clf)
139 | 
140 |     def tearDown(self):
141 |         pass
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_repen.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | import pandas as pd
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | 
 14 | # temporary solution for relative imports in case pyod is not installed
 15 | # if deepod is installed, no need to use the following line
 16 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 17 | 
 18 | from deepod.models.tabular.repen import REPEN
 19 | from deepod.utils.data import generate_data
 20 | 
 21 | 
 22 | class TestREPEN(unittest.TestCase):
 23 |     def setUp(self):
 24 |         self.n_train = 200
 25 |         self.n_test = 100
 26 |         self.contamination = 0.1
 27 |         self.roc_floor = 0.8
 28 |         self.ts_f1_floor = 0.0
 29 | 
 30 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 31 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 32 |             contamination=self.contamination, random_state=42)
 33 | 
 34 |         train_file = 'data/omi-1/omi-1_train.csv'
 35 |         test_file = 'data/omi-1/omi-1_test.csv'
 36 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 37 |         test_df = pd.read_csv(test_file, index_col=0)
 38 |         y = test_df['label'].values
 39 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 40 |         self.Xts_train = train_df.values
 41 |         self.Xts_test = test_df.values
 42 |         self.yts_test = y
 43 | 
 44 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 45 |         self.clf = REPEN(epochs=5, device=device)
 46 |         self.clf.fit(self.X_train)
 47 | 
 48 |     def test_parameters(self):
 49 |         assert (hasattr(self.clf, 'decision_scores_') and
 50 |                 self.clf.decision_scores_ is not None)
 51 |         assert (hasattr(self.clf, 'labels_') and
 52 |                 self.clf.labels_ is not None)
 53 |         assert (hasattr(self.clf, 'threshold_') and
 54 |                 self.clf.threshold_ is not None)
 55 | 
 56 |     def test_train_scores(self):
 57 |         assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 58 | 
 59 |     def test_prediction_scores(self):
 60 |         pred_scores = self.clf.decision_function(self.X_test)
 61 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 62 | 
 63 |     def test_prediction_labels(self):
 64 |         pred_labels = self.clf.predict(self.X_test)
 65 |         assert_equal(pred_labels.shape, self.y_test.shape)
 66 | 
 67 |     # def test_prediction_proba(self):
 68 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 69 |     #     assert (pred_proba.min() >= 0)
 70 |     #     assert (pred_proba.max() <= 1)
 71 |     #
 72 |     # def test_prediction_proba_linear(self):
 73 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 74 |     #     assert (pred_proba.min() >= 0)
 75 |     #     assert (pred_proba.max() <= 1)
 76 |     #
 77 |     # def test_prediction_proba_unify(self):
 78 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 79 |     #     assert (pred_proba.min() >= 0)
 80 |     #     assert (pred_proba.max() <= 1)
 81 |     #
 82 |     # def test_prediction_proba_parameter(self):
 83 |     #     with assert_raises(ValueError):
 84 |     #         self.clf.predict_proba(self.X_test, method='something')
 85 | 
 86 |     def test_prediction_labels_confidence(self):
 87 |         pred_labels, confidence = self.clf.predict(self.X_test,
 88 |                                                    return_confidence=True)
 89 | 
 90 |         assert_equal(pred_labels.shape, self.y_test.shape)
 91 |         assert_equal(confidence.shape, self.y_test.shape)
 92 |         assert (confidence.min() >= 0)
 93 |         assert (confidence.max() <= 1)
 94 | 
 95 |     # def test_prediction_proba_linear_confidence(self):
 96 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 97 |     #                                                     method='linear',
 98 |     #                                                     return_confidence=True)
 99 |     #     assert (pred_proba.min() >= 0)
100 |     #     assert (pred_proba.max() <= 1)
101 |     #
102 |     #     assert_equal(confidence.shape, self.y_test.shape)
103 |     #     assert (confidence.min() >= 0)
104 |     #     assert (confidence.max() <= 1)
105 |     #
106 |     # def test_fit_predict(self):
107 |     #     pred_labels = self.clf.fit_predict(self.X_train)
108 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
109 |     #
110 |     # def test_fit_predict_score(self):
111 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
112 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                scoring='roc_auc_score')
114 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
115 |     #                                scoring='prc_n_score')
116 |     #     with assert_raises(NotImplementedError):
117 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
118 |     #                                    scoring='something')
119 |     #
120 |     # def test_predict_rank(self):
121 |     #     pred_socres = self.clf.decision_function(self.X_test)
122 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
123 |     #
124 |     #     # assert the order is reserved
125 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
126 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
127 |     #     assert_array_less(-0.1, pred_ranks)
128 |     #
129 |     # def test_predict_rank_normalized(self):
130 |     #     pred_socres = self.clf.decision_function(self.X_test)
131 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
132 |     #
133 |     #     # assert the order is reserved
134 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
135 |     #     assert_array_less(pred_ranks, 1.01)
136 |     #     assert_array_less(-0.1, pred_ranks)
137 | 
138 |     # def test_plot(self):
139 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
140 |     #     assert_array_less(0, os)
141 | 
142 |     # def test_model_clone(self):
143 |     #     clone_clf = clone(self.clf)
144 | 
145 |     def tearDown(self):
146 |         pass
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_rosas.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | 
 13 | # temporary solution for relative imports in case pyod is not installed
 14 | # if deepod is installed, no need to use the following line
 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | from deepod.models.tabular.rosas import RoSAS
 18 | from deepod.utils.data import generate_data
 19 | import numpy as np
 20 | 
 21 | 
 22 | class TestDevNet(unittest.TestCase):
 23 |     def setUp(self):
 24 |         self.n_train = 200
 25 |         self.n_test = 100
 26 |         self.contamination = 0.1
 27 |         self.roc_floor = 0.8
 28 | 
 29 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 30 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 31 |             contamination=self.contamination, random_state=42)
 32 | 
 33 |         anom_id = np.where(self.y_train == 1)[0]
 34 |         known_anom_id = np.random.choice(anom_id, 10, replace=False)
 35 |         y_semi = np.zeros_like(self.y_train, dtype=int)
 36 |         y_semi[known_anom_id] = 1
 37 | 
 38 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 39 |         self.clf = RoSAS(epochs=1, hidden_dims=20, device=device, random_state=42)
 40 |         self.clf.fit(self.X_train, y_semi)
 41 | 
 42 |     def test_parameters(self):
 43 |         assert (hasattr(self.clf, 'decision_scores_') and
 44 |                 self.clf.decision_scores_ is not None)
 45 |         assert (hasattr(self.clf, 'labels_') and
 46 |                 self.clf.labels_ is not None)
 47 |         assert (hasattr(self.clf, 'threshold_') and
 48 |                 self.clf.threshold_ is not None)
 49 | 
 50 |     def test_train_scores(self):
 51 |         assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 52 | 
 53 |     def test_prediction_scores(self):
 54 |         pred_scores = self.clf.decision_function(self.X_test)
 55 | 
 56 |         # check score shapes
 57 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 58 | 
 59 |     def test_prediction_labels(self):
 60 |         pred_labels = self.clf.predict(self.X_test)
 61 |         assert_equal(pred_labels.shape, self.y_test.shape)
 62 | 
 63 | 
 64 |     # def test_prediction_proba(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_linear(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_unify(self):
 75 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 76 |     #     assert (pred_proba.min() >= 0)
 77 |     #     assert (pred_proba.max() <= 1)
 78 |     #
 79 |     # def test_prediction_proba_parameter(self):
 80 |     #     with assert_raises(ValueError):
 81 |     #         self.clf.predict_proba(self.X_test, method='something')
 82 | 
 83 |     def test_prediction_labels_confidence(self):
 84 |         pred_labels, confidence = self.clf.predict(self.X_test,
 85 |                                                    return_confidence=True)
 86 | 
 87 |         assert_equal(pred_labels.shape, self.y_test.shape)
 88 |         assert_equal(confidence.shape, self.y_test.shape)
 89 |         assert (confidence.min() >= 0)
 90 |         assert (confidence.max() <= 1)
 91 | 
 92 | 
 93 |     # def test_prediction_proba_linear_confidence(self):
 94 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 95 |     #                                                     method='linear',
 96 |     #                                                     return_confidence=True)
 97 |     #     assert (pred_proba.min() >= 0)
 98 |     #     assert (pred_proba.max() <= 1)
 99 |     #
100 |     #     assert_equal(confidence.shape, self.y_test.shape)
101 |     #     assert (confidence.min() >= 0)
102 |     #     assert (confidence.max() <= 1)
103 |     #
104 |     # def test_fit_predict(self):
105 |     #     pred_labels = self.clf.fit_predict(self.X_train)
106 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
107 |     #
108 |     # def test_fit_predict_score(self):
109 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
110 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
111 |     #                                scoring='roc_auc_score')
112 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                scoring='prc_n_score')
114 |     #     with assert_raises(NotImplementedError):
115 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
116 |     #                                    scoring='something')
117 |     #
118 |     # def test_predict_rank(self):
119 |     #     pred_socres = self.clf.decision_function(self.X_test)
120 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
121 |     #
122 |     #     # assert the order is reserved
123 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
124 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
125 |     #     assert_array_less(-0.1, pred_ranks)
126 |     #
127 |     # def test_predict_rank_normalized(self):
128 |     #     pred_socres = self.clf.decision_function(self.X_test)
129 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
130 |     #
131 |     #     # assert the order is reserved
132 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
133 |     #     assert_array_less(pred_ranks, 1.01)
134 |     #     assert_array_less(-0.1, pred_ranks)
135 | 
136 |     # def test_plot(self):
137 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
138 |     #     assert_array_less(0, os)
139 | 
140 |     # def test_model_clone(self):
141 |     #     clone_clf = clone(self.clf)
142 | 
143 |     def tearDown(self):
144 |         pass
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_slad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | 
 13 | # temporary solution for relative imports in case pyod is not installed
 14 | # if deepod is installed, no need to use the following line
 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 16 | 
 17 | from deepod.models.tabular.slad import SLAD
 18 | from deepod.utils.data import generate_data
 19 | 
 20 | 
 21 | class TestSLAD(unittest.TestCase):
 22 |     def setUp(self):
 23 |         self.n_train = 200
 24 |         self.n_test = 100
 25 |         self.contamination = 0.1
 26 |         self.roc_floor = 0.8
 27 |         self.X_train, self.X_test, self.y_train, self.y_test = generate_data(
 28 |             n_train=self.n_train, n_test=self.n_test, n_features=10,
 29 |             contamination=self.contamination, random_state=42)
 30 | 
 31 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 32 |         self.clf = SLAD(epochs=2, device=device)
 33 |         self.clf.fit(self.X_train)
 34 | 
 35 |     def test_parameters(self):
 36 |         assert (hasattr(self.clf, 'decision_scores_') and
 37 |                 self.clf.decision_scores_ is not None)
 38 |         assert (hasattr(self.clf, 'labels_') and
 39 |                 self.clf.labels_ is not None)
 40 |         assert (hasattr(self.clf, 'threshold_') and
 41 |                 self.clf.threshold_ is not None)
 42 | 
 43 |     # def test_train_scores(self):
 44 |     #     assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0])
 45 | 
 46 |     def test_prediction_scores(self):
 47 |         pred_scores = self.clf.decision_function(self.X_test)
 48 | 
 49 |         # check score shapes
 50 |         assert_equal(pred_scores.shape[0], self.X_test.shape[0])
 51 | 
 52 |         # check performance
 53 |         # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor)
 54 | 
 55 |     def test_prediction_labels(self):
 56 |         pred_labels = self.clf.predict(self.X_test)
 57 |         assert_equal(pred_labels.shape, self.y_test.shape)
 58 | 
 59 |     # def test_prediction_proba(self):
 60 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 61 |     #     assert (pred_proba.min() >= 0)
 62 |     #     assert (pred_proba.max() <= 1)
 63 |     #
 64 |     # def test_prediction_proba_linear(self):
 65 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 66 |     #     assert (pred_proba.min() >= 0)
 67 |     #     assert (pred_proba.max() <= 1)
 68 |     #
 69 |     # def test_prediction_proba_unify(self):
 70 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 71 |     #     assert (pred_proba.min() >= 0)
 72 |     #     assert (pred_proba.max() <= 1)
 73 |     #
 74 |     # def test_prediction_proba_parameter(self):
 75 |     #     with assert_raises(ValueError):
 76 |     #         self.clf.predict_proba(self.X_test, method='something')
 77 | 
 78 |     def test_prediction_labels_confidence(self):
 79 |         pred_labels, confidence = self.clf.predict(self.X_test,
 80 |                                                    return_confidence=True)
 81 | 
 82 |         assert_equal(pred_labels.shape, self.y_test.shape)
 83 |         assert_equal(confidence.shape, self.y_test.shape)
 84 |         assert (confidence.min() >= 0)
 85 |         assert (confidence.max() <= 1)
 86 | 
 87 |     # def test_prediction_proba_linear_confidence(self):
 88 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 89 |     #                                                     method='linear',
 90 |     #                                                     return_confidence=True)
 91 |     #     assert (pred_proba.min() >= 0)
 92 |     #     assert (pred_proba.max() <= 1)
 93 |     #
 94 |     #     assert_equal(confidence.shape, self.y_test.shape)
 95 |     #     assert (confidence.min() >= 0)
 96 |     #     assert (confidence.max() <= 1)
 97 |     #
 98 |     # def test_fit_predict(self):
 99 |     #     pred_labels = self.clf.fit_predict(self.X_train)
100 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
101 |     #
102 |     # def test_fit_predict_score(self):
103 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='roc_auc_score')
106 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
107 |     #                                scoring='prc_n_score')
108 |     #     with assert_raises(NotImplementedError):
109 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
110 |     #                                    scoring='something')
111 |     #
112 |     # def test_predict_rank(self):
113 |     #     pred_socres = self.clf.decision_function(self.X_test)
114 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
115 |     #
116 |     #     # assert the order is reserved
117 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
118 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
119 |     #     assert_array_less(-0.1, pred_ranks)
120 |     #
121 |     # def test_predict_rank_normalized(self):
122 |     #     pred_socres = self.clf.decision_function(self.X_test)
123 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
124 |     #
125 |     #     # assert the order is reserved
126 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
127 |     #     assert_array_less(pred_ranks, 1.01)
128 |     #     assert_array_less(-0.1, pred_ranks)
129 | 
130 |     # def test_plot(self):
131 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
132 |     #     assert_array_less(0, os)
133 | 
134 |     # def test_model_clone(self):
135 |     #     clone_clf = clone(self.clf)
136 | 
137 |     def tearDown(self):
138 |         pass
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_tcned.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | import torch
 12 | import pandas as pd
 13 | 
 14 | # temporary solution for relative imports in case pyod is not installed
 15 | # if deepod is installed, no need to use the following line
 16 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 17 | 
 18 | from deepod.models.time_series.tcned import TcnED
 19 | 
 20 | 
 21 | class TestTcnED(unittest.TestCase):
 22 |     def setUp(self):
 23 |         train_file = 'data/omi-1/omi-1_train.csv'
 24 |         test_file = 'data/omi-1/omi-1_test.csv'
 25 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 26 |         test_df = pd.read_csv(test_file, index_col=0)
 27 |         y = test_df['label'].values
 28 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 29 |         self.Xts_train = train_df.values
 30 |         self.Xts_test = test_df.values
 31 |         self.yts_test = y
 32 | 
 33 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 34 |         self.clf = TcnED(seq_len=100, stride=5,
 35 |                          epochs=5, hidden_dims=50,
 36 |                          device=device, random_state=42)
 37 |         self.clf.fit(self.Xts_train)
 38 | 
 39 |     def test_parameters(self):
 40 |         assert (hasattr(self.clf, 'decision_scores_') and
 41 |                 self.clf.decision_scores_ is not None)
 42 |         assert (hasattr(self.clf, 'labels_') and
 43 |                 self.clf.labels_ is not None)
 44 |         assert (hasattr(self.clf, 'threshold_') and
 45 |                 self.clf.threshold_ is not None)
 46 | 
 47 |     def test_train_scores(self):
 48 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 49 | 
 50 |     def test_prediction_scores(self):
 51 |         pred_scores = self.clf.decision_function(self.Xts_test)
 52 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 53 | 
 54 |     def test_prediction_labels(self):
 55 |         pred_labels = self.clf.predict(self.Xts_test)
 56 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 57 | 
 58 |     # def test_prediction_proba(self):
 59 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 60 |     #     assert (pred_proba.min() >= 0)
 61 |     #     assert (pred_proba.max() <= 1)
 62 |     #
 63 |     # def test_prediction_proba_linear(self):
 64 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 65 |     #     assert (pred_proba.min() >= 0)
 66 |     #     assert (pred_proba.max() <= 1)
 67 |     #
 68 |     # def test_prediction_proba_unify(self):
 69 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 70 |     #     assert (pred_proba.min() >= 0)
 71 |     #     assert (pred_proba.max() <= 1)
 72 |     #
 73 |     # def test_prediction_proba_parameter(self):
 74 |     #     with assert_raises(ValueError):
 75 |     #         self.clf.predict_proba(self.X_test, method='something')
 76 | 
 77 |     def test_prediction_labels_confidence(self):
 78 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 79 | 
 80 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 81 |         assert_equal(confidence.shape, self.yts_test.shape)
 82 |         assert (confidence.min() >= 0)
 83 |         assert (confidence.max() <= 1)
 84 | 
 85 |     # def test_prediction_proba_linear_confidence(self):
 86 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 87 |     #                                                     method='linear',
 88 |     #                                                     return_confidence=True)
 89 |     #     assert (pred_proba.min() >= 0)
 90 |     #     assert (pred_proba.max() <= 1)
 91 |     #
 92 |     #     assert_equal(confidence.shape, self.y_test.shape)
 93 |     #     assert (confidence.min() >= 0)
 94 |     #     assert (confidence.max() <= 1)
 95 |     #
 96 |     # def test_fit_predict(self):
 97 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 98 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
 99 |     #
100 |     # def test_fit_predict_score(self):
101 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
103 |     #                                scoring='roc_auc_score')
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='prc_n_score')
106 |     #     with assert_raises(NotImplementedError):
107 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
108 |     #                                    scoring='something')
109 |     #
110 |     # def test_predict_rank(self):
111 |     #     pred_socres = self.clf.decision_function(self.X_test)
112 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
113 |     #
114 |     #     # assert the order is reserved
115 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
116 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
117 |     #     assert_array_less(-0.1, pred_ranks)
118 |     #
119 |     # def test_predict_rank_normalized(self):
120 |     #     pred_socres = self.clf.decision_function(self.X_test)
121 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
122 |     #
123 |     #     # assert the order is reserved
124 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
125 |     #     assert_array_less(pred_ranks, 1.01)
126 |     #     assert_array_less(-0.1, pred_ranks)
127 | 
128 |     # def test_plot(self):
129 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
130 |     #     assert_array_less(0, os)
131 | 
132 |     # def test_model_clone(self):
133 |     #     clone_clf = clone(self.clf)
134 | 
135 |     def tearDown(self):
136 |         pass
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_timesnet.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.timesnet import TimesNet
 20 | 
 21 | 
 22 | class TestTimesNet(unittest.TestCase):
 23 |     def setUp(self):
 24 |         train_file = 'data/omi-1/omi-1_train.csv'
 25 |         test_file = 'data/omi-1/omi-1_test.csv'
 26 |         # test_file = 'data/omi-1/omi-1_test.csv'
 27 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 28 |         test_df = pd.read_csv(test_file, index_col=0)
 29 |         y = test_df['label'].values
 30 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 31 |         self.Xts_train = train_df.values
 32 |         self.Xts_test = test_df.values
 33 |         self.yts_test = y
 34 | 
 35 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 36 |         self.clf = TimesNet(
 37 |             seq_len=100, stride=1, epochs=2,
 38 |             batch_size=32, lr=1e-4,
 39 |             device=device, random_state=42
 40 |         )
 41 | 
 42 |         self.clf.fit(self.Xts_train)
 43 | 
 44 |     def test_parameters(self):
 45 |         assert (hasattr(self.clf, 'decision_scores_') and
 46 |                 self.clf.decision_scores_ is not None)
 47 |         assert (hasattr(self.clf, 'labels_') and
 48 |                 self.clf.labels_ is not None)
 49 |         assert (hasattr(self.clf, 'threshold_') and
 50 |                 self.clf.threshold_ is not None)
 51 | 
 52 |     def test_train_scores(self):
 53 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 54 | 
 55 |     def test_prediction_scores(self):
 56 |         pred_scores = self.clf.decision_function(self.Xts_test)
 57 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 58 | 
 59 |     def test_prediction_labels(self):
 60 |         pred_labels = self.clf.predict(self.Xts_test)
 61 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 62 | 
 63 |     # def test_prediction_proba(self):
 64 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 65 |     #     assert (pred_proba.min() >= 0)
 66 |     #     assert (pred_proba.max() <= 1)
 67 |     #
 68 |     # def test_prediction_proba_linear(self):
 69 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 70 |     #     assert (pred_proba.min() >= 0)
 71 |     #     assert (pred_proba.max() <= 1)
 72 |     #
 73 |     # def test_prediction_proba_unify(self):
 74 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 75 |     #     assert (pred_proba.min() >= 0)
 76 |     #     assert (pred_proba.max() <= 1)
 77 |     #
 78 |     # def test_prediction_proba_parameter(self):
 79 |     #     with assert_raises(ValueError):
 80 |     #         self.clf.predict_proba(self.X_test, method='something')
 81 | 
 82 |     def test_prediction_labels_confidence(self):
 83 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 84 | 
 85 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 86 |         assert_equal(confidence.shape, self.yts_test.shape)
 87 |         assert (confidence.min() >= 0)
 88 |         assert (confidence.max() <= 1)
 89 | 
 90 |     # def test_prediction_proba_linear_confidence(self):
 91 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 92 |     #                                                     method='linear',
 93 |     #                                                     return_confidence=True)
 94 |     #     assert (pred_proba.min() >= 0)
 95 |     #     assert (pred_proba.max() <= 1)
 96 |     #
 97 |     #     assert_equal(confidence.shape, self.y_test.shape)
 98 |     #     assert (confidence.min() >= 0)
 99 |     #     assert (confidence.max() <= 1)
100 |     #
101 |     # def test_fit_predict(self):
102 |     #     pred_labels = self.clf.fit_predict(self.X_train)
103 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
104 |     #
105 |     # def test_fit_predict_score(self):
106 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
107 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
108 |     #                                scoring='roc_auc_score')
109 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
110 |     #                                scoring='prc_n_score')
111 |     #     with assert_raises(NotImplementedError):
112 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
113 |     #                                    scoring='something')
114 |     #
115 |     # def test_predict_rank(self):
116 |     #     pred_socres = self.clf.decision_function(self.X_test)
117 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
118 |     #
119 |     #     # assert the order is reserved
120 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
121 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
122 |     #     assert_array_less(-0.1, pred_ranks)
123 |     #
124 |     # def test_predict_rank_normalized(self):
125 |     #     pred_socres = self.clf.decision_function(self.X_test)
126 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
127 |     #
128 |     #     # assert the order is reserved
129 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
130 |     #     assert_array_less(pred_ranks, 1.01)
131 |     #     assert_array_less(-0.1, pred_ranks)
132 | 
133 |     # def test_plot(self):
134 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
135 |     #     assert_array_less(0, os)
136 | 
137 |     # def test_model_clone(self):
138 |     #     clone_clf = clone(self.clf)
139 | 
140 |     def tearDown(self):
141 |         pass
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_tranad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.tranad import TranAD
 20 | 
 21 | 
 22 | class TestTranAD(unittest.TestCase):
 23 |     def setUp(self):
 24 |         train_file = 'data/omi-1/omi-1_train.csv'
 25 |         test_file = 'data/omi-1/omi-1_test.csv'
 26 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 27 |         test_df = pd.read_csv(test_file, index_col=0)
 28 |         y = test_df['label'].values
 29 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 30 |         self.Xts_train = train_df.values
 31 |         self.Xts_test = test_df.values
 32 |         self.yts_test = y
 33 | 
 34 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 35 |         self.clf = TranAD(seq_len=100, stride=5, epochs=2,
 36 |                           device=device, random_state=42)
 37 |         self.clf.fit(self.Xts_train)
 38 | 
 39 |     def test_parameters(self):
 40 |         assert (hasattr(self.clf, 'decision_scores_') and
 41 |                 self.clf.decision_scores_ is not None)
 42 |         assert (hasattr(self.clf, 'labels_') and
 43 |                 self.clf.labels_ is not None)
 44 |         assert (hasattr(self.clf, 'threshold_') and
 45 |                 self.clf.threshold_ is not None)
 46 | 
 47 |     def test_train_scores(self):
 48 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 49 | 
 50 |     def test_prediction_scores(self):
 51 |         pred_scores = self.clf.decision_function(self.Xts_test)
 52 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 53 | 
 54 |     def test_prediction_labels(self):
 55 |         pred_labels = self.clf.predict(self.Xts_test)
 56 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 57 | 
 58 |     # def test_prediction_proba(self):
 59 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 60 |     #     assert (pred_proba.min() >= 0)
 61 |     #     assert (pred_proba.max() <= 1)
 62 |     #
 63 |     # def test_prediction_proba_linear(self):
 64 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 65 |     #     assert (pred_proba.min() >= 0)
 66 |     #     assert (pred_proba.max() <= 1)
 67 |     #
 68 |     # def test_prediction_proba_unify(self):
 69 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 70 |     #     assert (pred_proba.min() >= 0)
 71 |     #     assert (pred_proba.max() <= 1)
 72 |     #
 73 |     # def test_prediction_proba_parameter(self):
 74 |     #     with assert_raises(ValueError):
 75 |     #         self.clf.predict_proba(self.X_test, method='something')
 76 | 
 77 |     def test_prediction_labels_confidence(self):
 78 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 79 | 
 80 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 81 |         assert_equal(confidence.shape, self.yts_test.shape)
 82 |         assert (confidence.min() >= 0)
 83 |         assert (confidence.max() <= 1)
 84 | 
 85 |     # def test_prediction_proba_linear_confidence(self):
 86 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 87 |     #                                                     method='linear',
 88 |     #                                                     return_confidence=True)
 89 |     #     assert (pred_proba.min() >= 0)
 90 |     #     assert (pred_proba.max() <= 1)
 91 |     #
 92 |     #     assert_equal(confidence.shape, self.y_test.shape)
 93 |     #     assert (confidence.min() >= 0)
 94 |     #     assert (confidence.max() <= 1)
 95 |     #
 96 |     # def test_fit_predict(self):
 97 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 98 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
 99 |     #
100 |     # def test_fit_predict_score(self):
101 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
103 |     #                                scoring='roc_auc_score')
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='prc_n_score')
106 |     #     with assert_raises(NotImplementedError):
107 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
108 |     #                                    scoring='something')
109 |     #
110 |     # def test_predict_rank(self):
111 |     #     pred_socres = self.clf.decision_function(self.X_test)
112 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
113 |     #
114 |     #     # assert the order is reserved
115 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
116 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
117 |     #     assert_array_less(-0.1, pred_ranks)
118 |     #
119 |     # def test_predict_rank_normalized(self):
120 |     #     pred_socres = self.clf.decision_function(self.X_test)
121 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
122 |     #
123 |     #     # assert the order is reserved
124 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
125 |     #     assert_array_less(pred_ranks, 1.01)
126 |     #     assert_array_less(-0.1, pred_ranks)
127 | 
128 |     # def test_plot(self):
129 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
130 |     #     assert_array_less(0, os)
131 | 
132 |     # def test_model_clone(self):
133 |     #     clone_clf = clone(self.clf)
134 | 
135 |     def tearDown(self):
136 |         pass
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/test/test_usad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import sys
  7 | import unittest
  8 | 
  9 | # noinspection PyProtectedMember
 10 | from numpy.testing import assert_equal
 11 | from sklearn.metrics import roc_auc_score
 12 | import torch
 13 | import pandas as pd
 14 | 
 15 | # temporary solution for relative imports in case pyod is not installed
 16 | # if deepod is installed, no need to use the following line
 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 18 | 
 19 | from deepod.models.time_series.usad import USAD
 20 | 
 21 | class TestUSAD(unittest.TestCase):
 22 |     def setUp(self):
 23 |         train_file = 'data/omi-1/omi-1_train.csv'
 24 |         test_file = 'data/omi-1/omi-1_test.csv'
 25 |         train_df = pd.read_csv(train_file, sep=',', index_col=0)
 26 |         test_df = pd.read_csv(test_file, index_col=0)
 27 |         y = test_df['label'].values
 28 |         train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1)
 29 |         self.Xts_train = train_df.values
 30 |         self.Xts_test = test_df.values
 31 |         self.yts_test = y
 32 | 
 33 |         device = 'cuda' if torch.cuda.is_available() else 'cpu'
 34 |         self.clf = USAD(seq_len=100, stride=5,
 35 |                         epochs=5, hidden_dims=50,
 36 |                         device=device, random_state=42)
 37 |         self.clf.fit(self.Xts_train)
 38 | 
 39 |     def test_parameters(self):
 40 |         assert (hasattr(self.clf, 'decision_scores_') and
 41 |                 self.clf.decision_scores_ is not None)
 42 |         assert (hasattr(self.clf, 'labels_') and
 43 |                 self.clf.labels_ is not None)
 44 |         assert (hasattr(self.clf, 'threshold_') and
 45 |                 self.clf.threshold_ is not None)
 46 | 
 47 |     def test_train_scores(self):
 48 |         assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0])
 49 | 
 50 |     def test_prediction_scores(self):
 51 |         pred_scores = self.clf.decision_function(self.Xts_test)
 52 |         assert_equal(pred_scores.shape[0], self.Xts_test.shape[0])
 53 | 
 54 |     def test_prediction_labels(self):
 55 |         pred_labels = self.clf.predict(self.Xts_test)
 56 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 57 | 
 58 |     # def test_prediction_proba(self):
 59 |     #     pred_proba = self.clf.predict_proba(self.X_test)
 60 |     #     assert (pred_proba.min() >= 0)
 61 |     #     assert (pred_proba.max() <= 1)
 62 |     #
 63 |     # def test_prediction_proba_linear(self):
 64 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='linear')
 65 |     #     assert (pred_proba.min() >= 0)
 66 |     #     assert (pred_proba.max() <= 1)
 67 |     #
 68 |     # def test_prediction_proba_unify(self):
 69 |     #     pred_proba = self.clf.predict_proba(self.X_test, method='unify')
 70 |     #     assert (pred_proba.min() >= 0)
 71 |     #     assert (pred_proba.max() <= 1)
 72 |     #
 73 |     # def test_prediction_proba_parameter(self):
 74 |     #     with assert_raises(ValueError):
 75 |     #         self.clf.predict_proba(self.X_test, method='something')
 76 | 
 77 |     def test_prediction_labels_confidence(self):
 78 |         pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True)
 79 | 
 80 |         assert_equal(pred_labels.shape, self.yts_test.shape)
 81 |         assert_equal(confidence.shape, self.yts_test.shape)
 82 |         assert (confidence.min() >= 0)
 83 |         assert (confidence.max() <= 1)
 84 | 
 85 |     # def test_prediction_proba_linear_confidence(self):
 86 |     #     pred_proba, confidence = self.clf.predict_proba(self.X_test,
 87 |     #                                                     method='linear',
 88 |     #                                                     return_confidence=True)
 89 |     #     assert (pred_proba.min() >= 0)
 90 |     #     assert (pred_proba.max() <= 1)
 91 |     #
 92 |     #     assert_equal(confidence.shape, self.y_test.shape)
 93 |     #     assert (confidence.min() >= 0)
 94 |     #     assert (confidence.max() <= 1)
 95 |     #
 96 |     # def test_fit_predict(self):
 97 |     #     pred_labels = self.clf.fit_predict(self.X_train)
 98 |     #     assert_equal(pred_labels.shape, self.y_train.shape)
 99 |     #
100 |     # def test_fit_predict_score(self):
101 |     #     self.clf.fit_predict_score(self.X_test, self.y_test)
102 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
103 |     #                                scoring='roc_auc_score')
104 |     #     self.clf.fit_predict_score(self.X_test, self.y_test,
105 |     #                                scoring='prc_n_score')
106 |     #     with assert_raises(NotImplementedError):
107 |     #         self.clf.fit_predict_score(self.X_test, self.y_test,
108 |     #                                    scoring='something')
109 |     #
110 |     # def test_predict_rank(self):
111 |     #     pred_socres = self.clf.decision_function(self.X_test)
112 |     #     pred_ranks = self.clf._predict_rank(self.X_test)
113 |     #
114 |     #     # assert the order is reserved
115 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
116 |     #     assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
117 |     #     assert_array_less(-0.1, pred_ranks)
118 |     #
119 |     # def test_predict_rank_normalized(self):
120 |     #     pred_socres = self.clf.decision_function(self.X_test)
121 |     #     pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)
122 |     #
123 |     #     # assert the order is reserved
124 |     #     assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3)
125 |     #     assert_array_less(pred_ranks, 1.01)
126 |     #     assert_array_less(-0.1, pred_ranks)
127 | 
128 |     # def test_plot(self):
129 |     #     os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1)
130 |     #     assert_array_less(0, os)
131 | 
132 |     # def test_model_clone(self):
133 |     #     clone_clf = clone(self.clf)
134 | 
135 |     def tearDown(self):
136 |         pass
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     unittest.main()


--------------------------------------------------------------------------------
/deepod/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/utils/__init__.py


--------------------------------------------------------------------------------
/deepod/utils/data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Utility functions for manipulating data
  3 | """
  4 | # Author: Yue Zhao <zhaoy@cmu.edu>
  5 | # Author: Yahya Almardeny <almardeny@gmail.com>
  6 | # License: BSD 2 clause
  7 | 
  8 | from warnings import warn
  9 | from sklearn.utils import check_X_y
 10 | from sklearn.utils import check_random_state
 11 | from sklearn.utils import check_consistent_length
 12 | import numpy as np
 13 | 
 14 | 
 15 | def _generate_data(n_inliers, n_outliers, n_features, coef, offset,
 16 |                    random_state, n_nan=0, n_inf=0):
 17 |     """Internal function to generate data samples.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     n_inliers : int
 22 |         The number of inliers.
 23 | 
 24 |     n_outliers : int
 25 |         The number of outliers.
 26 | 
 27 |     n_features : int
 28 |         The number of features (dimensions).
 29 | 
 30 |     coef : float in range [0,1)+0.001
 31 |         The coefficient of data generation.
 32 | 
 33 |     offset : int
 34 |         Adjust the value range of Gaussian and Uniform.
 35 | 
 36 |     random_state : int, RandomState instance or None, optional (default=None)
 37 |         If int, random_state is the seed used by the random number generator;
 38 |         If RandomState instance, random_state is the random number generator;
 39 |         If None, the random number generator is the RandomState instance used
 40 |         by `np.random`.
 41 | 
 42 |     n_nan : int
 43 |         The number of values that are missing (np.NaN). Defaults to zero.
 44 | 
 45 |     n_inf : int
 46 |         The number of values that are infinite. (np.infty). Defaults to zero.
 47 | 
 48 |     Returns
 49 |     -------
 50 |     X : numpy array of shape (n_train, n_features)
 51 |         Data.
 52 | 
 53 |     y : numpy array of shape (n_train,)
 54 |         Ground truth.
 55 |     """
 56 | 
 57 |     inliers = coef * random_state.randn(n_inliers, n_features) + offset
 58 |     outliers = random_state.uniform(low=-1 * offset, high=offset,
 59 |                                     size=(n_outliers, n_features))
 60 |     X = np.r_[inliers, outliers]
 61 | 
 62 |     y = np.r_[np.zeros((n_inliers,)), np.ones((n_outliers,))]
 63 | 
 64 |     if n_nan > 0:
 65 |         X = np.r_[X, np.full((n_nan, n_features), np.NaN)]
 66 |         y = np.r_[y, np.full((n_nan), np.NaN)]
 67 | 
 68 |     if n_inf > 0:
 69 |         X = np.r_[X, np.full((n_inf, n_features), np.infty)]
 70 |         y = np.r_[y, np.full((n_inf), np.infty)]
 71 | 
 72 |     return X, y
 73 | 
 74 | 
 75 | def generate_data(n_train=1000, n_test=500, n_features=2, contamination=0.1,
 76 |                   train_only=False, offset=10,
 77 |                   random_state=None, n_nan=0, n_inf=0):
 78 |     """Utility function to generate synthesized data.
 79 |     Normal data is generated by a multivariate Gaussian distribution and
 80 |     outliers are generated by a uniform distribution.
 81 |     "X_train, X_test, y_train, y_test" are returned.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     n_train : int, (default=1000)
 86 |         The number of training points to generate.
 87 | 
 88 |     n_test : int, (default=500)
 89 |         The number of test points to generate.
 90 | 
 91 |     n_features : int, optional (default=2)
 92 |         The number of features (dimensions).
 93 | 
 94 |     contamination : float in (0., 0.5), optional (default=0.1)
 95 |         The amount of contamination of the data set, i.e.
 96 |         the proportion of outliers in the data set. Used when fitting to
 97 |         define the threshold on the decision function.
 98 | 
 99 |     train_only : bool, optional (default=False)
100 |         If true, generate train data only.
101 | 
102 |     offset : int, optional (default=10)
103 |         Adjust the value range of Gaussian and Uniform.
104 | 
105 |     random_state : int, RandomState instance or None, optional (default=None)
106 |         If int, random_state is the seed used by the random number generator;
107 |         If RandomState instance, random_state is the random number generator;
108 |         If None, the random number generator is the RandomState instance used
109 |         by `np.random`.
110 | 
111 |     n_nan : int
112 |         The number of values that are missing (np.NaN). Defaults to zero.
113 | 
114 |     n_inf : int
115 |         The number of values that are infinite. (np.infty). Defaults to zero.
116 | 
117 |     Returns
118 |     -------
119 |     X_train : numpy array of shape (n_train, n_features)
120 |         Training data.
121 | 
122 |     X_test : numpy array of shape (n_test, n_features)
123 |         Test data.
124 | 
125 |     y_train : numpy array of shape (n_train,)
126 |         Training ground truth.
127 | 
128 |     y_test : numpy array of shape (n_test,)
129 |         Test ground truth.
130 | 
131 |     """
132 | 
133 |     # initialize a random state and seeds for the instance
134 |     random_state = check_random_state(random_state)
135 |     offset_ = random_state.randint(low=offset)
136 |     coef_ = random_state.random_sample() + 0.001  # in case of underflow
137 | 
138 |     n_outliers_train = int(n_train * contamination)
139 |     n_inliers_train = int(n_train - n_outliers_train)
140 | 
141 |     X_train, y_train = _generate_data(n_inliers_train, n_outliers_train,
142 |                                       n_features, coef_, offset_, random_state,
143 |                                       n_nan, n_inf)
144 | 
145 |     if train_only:
146 |         return X_train, y_train
147 | 
148 |     n_outliers_test = int(n_test * contamination)
149 |     n_inliers_test = int(n_test - n_outliers_test)
150 | 
151 |     X_test, y_test = _generate_data(n_inliers_test, n_outliers_test,
152 |                                     n_features, coef_, offset_, random_state,
153 |                                     n_nan, n_inf)
154 | 
155 |     return X_train, X_test, y_train, y_test
156 | 
157 | 


--------------------------------------------------------------------------------
/deepod/utils/utility.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import metrics
 3 | 
 4 | 
 5 | def get_sub_seqs(x_arr, seq_len=100, stride=1):
 6 |     """
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     x_arr: np.array, required
11 |         input original data with shape [time_length, channels]
12 | 
13 |     seq_len: int, optional (default=100)
14 |         Size of window used to create subsequences from the data
15 | 
16 |     stride: int, optional (default=1)
17 |         number of time points the window will move between two subsequences
18 | 
19 |     Returns
20 |     -------
21 |     x_seqs: np.array
22 |         Split sub-sequences of input time-series data
23 |     """
24 | 
25 |     if x_arr.shape[0] < seq_len:
26 |         seq_len = x_arr.shape[0]
27 |     seq_starts = np.arange(0, x_arr.shape[0] - seq_len + 1, stride)
28 |     x_seqs = np.array([x_arr[i:i + seq_len] for i in seq_starts])
29 | 
30 |     return x_seqs
31 | 
32 | 
33 | def get_sub_seqs_label(y, seq_len=100, stride=1):
34 |     """
35 | 
36 |     Parameters
37 |     ----------
38 |     y: np.array, required
39 |         data labels
40 | 
41 |     seq_len: int, optional (default=100)
42 |         Size of window used to create subsequences from the data
43 | 
44 |     stride: int, optional (default=1)
45 |         number of time points the window will move between two subsequences
46 | 
47 |     Returns
48 |     -------
49 |     y_seqs: np.array
50 |         Split label of each sequence
51 |     """
52 |     if y.shape[0] < seq_len:
53 |         seq_len = y.shape[0]
54 | 
55 |     seq_starts = np.arange(0, y.shape[0] - seq_len + 1, stride)
56 |     ys = np.array([y[i:i + seq_len] for i in seq_starts])
57 |     y = np.sum(ys, axis=1) / seq_len
58 | 
59 |     y_binary = np.zeros_like(y)
60 |     y_binary[np.where(y!=0)[0]] = 1
61 |     return y_binary
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/deepod/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ``deepod`` is a deep learning-based outlier/anomaly detection framework.
 3 | """
 4 | # Based on pysad package, pyod package
 5 | # License:
 6 | 
 7 | # PEP0440 compatible formatted version, see:
 8 | # https://www.python.org/dev/peps/pep-0440/
 9 | #
10 | # Generic release markers:
11 | # X.Y
12 | # X.Y.Z # For bugfix releases
13 | #
14 | # Admissible pre-release markers:
15 | # X.YaN # Alpha release
16 | # X.YbN # Beta release
17 | # X.YrcN # Release Candidate
18 | # X.Y # Final release
19 | #
20 | # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
21 | # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
22 | #
23 | 
24 | __version__ = '0.5.0'
25 | 


--------------------------------------------------------------------------------
/docs/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://gems.ruby-china.com"
2 | gem "jekyll-rtd-theme"
3 | 
4 | gem "github-pages", group: :jekyll_plugins


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | DEBUG=JEKYLL_GITHUB_TOKEN=blank PAGES_API_URL=http://0.0.0.0
 2 | 
 3 | default:
 4 | 	@gem install jekyll bundler && bundle install
 5 | 
 6 | update:
 7 | 	@bundle update
 8 | 
 9 | clean:
10 | 	@bundle exec jekyll clean
11 | 
12 | build: clean
13 | 	@${DEBUG} bundle exec jekyll build --profile --config _config.yml,.debug.yml
14 | 
15 | server: clean
16 | 	@${DEBUG} bundle exec jekyll server --livereload --config _config.yml,.debug.yml


--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname }}
 2 | {{ underline }}
 3 | .. currentmodule:: {{ module }}
 4 | .. autoclass:: {{ objname }}
 5 |    {% block methods %}
 6 |    {% if methods %}
 7 |    .. rubric:: Methods
 8 |    .. autosummary::
 9 |    {% for item in methods %}
10 |       ~{{ name }}.{{ item }}
11 |    {%- endfor %}
12 |    {% endif %}
13 |    {% endblock %}
14 |    {% block attributes %}
15 |    {% if attributes %}
16 |    .. rubric:: Attributes
17 |    .. autosummary::
18 |    {% for item in attributes %}
19 |       ~{{ name }}.{{ item }}
20 |    {%- endfor %}
21 |    {% endif %}
22 |    {% endblock %}


--------------------------------------------------------------------------------
/docs/additional.contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | =============
 3 | 
 4 | Everyone are very welcome to contribute. 
 5 | 
 6 | We share the same values of the `scikit-learn <https://scikit-learn.org/stable/developers/contributing.html>`_ community
 7 | 
 8 | 
 9 | .. note::
10 |     We are a community based on openness and friendly, didactic, discussions.
11 | 
12 |     We aspire to treat everybody equally, and value their contributions. We are particularly seeking people 
13 |     from underrepresented backgrounds in Open Source Software and scikit-learn in particular to participate 
14 |     and contribute their expertise and experience.
15 | 
16 |     Decisions are made based on technical merit and consensus.
17 | 
18 |     Code is not the only way to help the project. Reviewing pull requests, 
19 |     answering questions to help others on mailing lists or issues, organizing and teaching tutorials, 
20 |     working on the website, improving the documentation, are all priceless contributions.
21 | 
22 |     We abide by the principles of openness, respect, and consideration of others of the Python 
23 |     Software Foundation: https://www.python.org/psf/codeofconduct/
24 | 
25 |     In case you experience issues using this package, do not hesitate to submit a ticket to the GitHub issue tracker. 
26 |     You are also welcome to post feature requests or pull requests.
27 | 
28 | 
29 | 
30 | For any questions, you may open issue on Github or drop me an email at hongzuoxu(at)126.com.
31 | 
32 | 
33 | TODO list
34 | ---------
35 | We attach a TODO list below, we are very pleased if you can contribute anything on this list. 
36 | 


--------------------------------------------------------------------------------
/docs/additional.license.rst:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | 
 4 | 
 5 | This project is licensed under the BSD 2-Clause License.
 6 | 
 7 | .. code-block:: 
 8 |     
 9 |     BSD 2-Clause License
10 | 
11 |     Copyright (c) 2023, Hongzuo Xu All rights reserved. 
12 | 
13 |     Redistribution and use in source and binary forms, with or without
14 |     modification, are permitted provided that the following conditions are met:
15 | 
16 |     1. Redistributions of source code must retain the above copyright notice, this
17 |     list of conditions and the following disclaimer.
18 | 
19 |     2. Redistributions in binary form must reproduce the above copyright notice,
20 |     this list of conditions and the following disclaimer in the documentation
21 |     and/or other materials provided with the distribution.
22 | 
23 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
26 |     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
27 |     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 |     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 |     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 |     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 |     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 |     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 
34 | 


--------------------------------------------------------------------------------
/docs/additional.star_history.rst:
--------------------------------------------------------------------------------
 1 | Star History on Github
 2 | ======================
 3 | 
 4 | Thanks goes to those who give us stars and fork our repository. 
 5 | 
 6 | 
 7 | .. image:: https://api.star-history.com/svg?repos=xuhongzuo/DeepOD&type=Date
 8 |    :target: https://star-history.com/#xuhongzuo/DeepOD&Date
 9 |    :align: center
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/api_cc.rst:
--------------------------------------------------------------------------------
 1 | API CheatSheet
 2 | ==============
 3 | 
 4 | The following APIs are applicable for all detector models for easy use.
 5 | 
 6 | * :func:`deepod.core.base_model.BaseDeepAD.fit`: Fit detector. y is ignored in unsupervised methods.
 7 | * :func:`deepod.core.base_model.BaseDeepAD.decision_function`: Predict raw anomaly score of X using the fitted detector.
 8 | * :func:`deepod.core.base_model.BaseDeepAD.predict`: Predict if a particular sample is an outlier or not using the fitted detector.
 9 | 
10 | 
11 | Key Attributes of a fitted model:
12 | 
13 | * :attr:`deepod.core.base_model.BaseDeepAD.decision_scores_`: The outlier scores of the training data. The higher, the more abnormal.
14 |   Outliers tend to have higher scores.
15 | * :attr:`deepod.core.base_model.BaseDeepAD.labels_`: The binary labels of the training data. 0 stands for inliers and 1 for outliers/anomalies.
16 | 
17 | 
18 | See base class definition below:
19 | 
20 | deepod.core.base_model module
21 | -----------------------
22 | 
23 | .. automodule:: deepod.core.base_model
24 |     :members:
25 |     :undoc-members:
26 |     :show-inheritance:
27 |     :inherited-members:
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/api_reference.base_networks.rst:
--------------------------------------------------------------------------------
 1 | Network Architectures
 2 | ------------------------------------
 3 | 
 4 | 
 5 | 
 6 | .. currentmodule:: deepod
 7 | 
 8 | .. autosummary::
 9 |     :nosignatures:
10 |     :template: class.rst
11 |     :toctree: generated
12 | 
13 |     core.networks.MLPnet
14 |     core.networks.MlpAE
15 |     core.networks.GRUNet
16 |     core.networks.LSTMNet
17 |     core.networks.ConvSeqEncoder
18 |     core.networks.ConvNet
19 |     core.networks.TcnAE
20 |     core.networks.TCNnet
21 |     core.networks.TSTransformerEncoder
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/docs/api_reference.metrics.rst:
--------------------------------------------------------------------------------
 1 | Evaluation Metrics
 2 | ===================
 3 | 
 4 | 
 5 | 
 6 | 
 7 | .. automodule:: deepod.metrics
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 |     :inherited-members:
12 | 
13 | .. rubric:: References
14 | 
15 | .. bibliography::
16 |    :cited:
17 |    :labelprefix: B


--------------------------------------------------------------------------------
/docs/api_reference.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | -------------
 3 | 
 4 | This is the API documentation for ``DeepOD``. 
 5 | 
 6 | 
 7 | .. toctree::
 8 | 
 9 |     api_reference.tabular
10 |     api_reference.time_series
11 |     api_reference.base_networks
12 |     api_reference.metrics
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/docs/api_reference.tabular.rst:
--------------------------------------------------------------------------------
 1 | Models for Tabular Data
 2 | ------------------------------------------
 3 | 
 4 | 
 5 | .. automodule:: deepod.models.tabular
 6 |     :no-members:
 7 |     :no-inherited-members:
 8 | 
 9 | 
10 | .. currentmodule:: deepod
11 | 
12 | 
13 | Unsupervised Models
14 | ^^^^^^^^^^^^^^^^^^^^^
15 | implemented unsupervised anomaly detection models
16 | 
17 | .. autosummary::
18 |    :nosignatures:
19 |    :template: class.rst
20 |    :toctree: generated
21 | 
22 |     models.DeepSVDD
23 |     models.RCA
24 |     models.DevNet
25 |     models.DeepIsolationForest
26 |     models.REPEN
27 |     models.SLAD
28 |     models.ICL
29 |     models.RDP
30 |     models.GOAD
31 |     models.NeuTraL
32 | 
33 | Weakly-supervised Models
34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
35 | implemented weakly-sueprvised anomaly detection models
36 | 
37 | .. autosummary::
38 |    :nosignatures:
39 |    :template: class.rst
40 |    :toctree: generated
41 | 
42 |     models.DevNet
43 |     models.DeepSAD
44 |     models.FeaWAD
45 |     models.RoSAS
46 |     models.PReNet
47 | 
48 | 
49 | .. rubric:: References
50 | 
51 | .. bibliography::
52 |    :cited:
53 |    :labelprefix: B


--------------------------------------------------------------------------------
/docs/api_reference.time_series.rst:
--------------------------------------------------------------------------------
 1 | Models for Time Series
 2 | ========================================
 3 | 
 4 | 
 5 | .. automodule:: deepod.models.time_series
 6 |     :no-members:
 7 |     :no-inherited-members:
 8 | 
 9 | 
10 | .. currentmodule:: deepod
11 | 
12 | 
13 | 
14 | implemented unsupervised anomaly detection models for time series data. 
15 | 
16 | .. autosummary::
17 |    :nosignatures:
18 |    :template: class.rst
19 |    :toctree: generated
20 | 
21 |     models.TimesNet
22 |     models.DCdetector
23 |     models.AnomalyTransformer
24 |     models.NCAD
25 |     models.TranAD
26 |     models.COUTA
27 |     models.TcnED
28 |     models.DeepIsolationForestTS
29 |     models.DeepSVDDTS
30 |     models.DeepSADTS
31 |     models.DevNetTS
32 |     models.PReNetTS
33 | 
34 | 
35 | 
36 | .. rubric:: References
37 | 
38 | .. bibliography::
39 |    :cited:
40 |    :labelprefix: B
41 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | 
 16 | import os
 17 | import sys
 18 | from os.path import dirname, abspath
 19 | 
 20 | sys.path.insert(0, abspath('..'))
 21 | deepod_dir = dirname(dirname(abspath(__file__)))
 22 | 
 23 | version_path = os.path.join(deepod_dir, 'deepod', 'version.py')
 24 | exec(open(version_path).read())
 25 | # -- Project information -----------------------------------------------------
 26 | 
 27 | project = 'DeepOD'
 28 | copyright = '2023, Hongzuo Xu'
 29 | author = 'Hongzuo Xu'
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.doctest',
 44 |     'sphinx.ext.intersphinx',
 45 |     'sphinx.ext.coverage',
 46 |     'sphinx.ext.imgmath',
 47 |     'sphinx.ext.viewcode',
 48 |     'sphinx.ext.autosummary',
 49 |     'sphinxcontrib.bibtex',
 50 |     # 'sphinx.ext.napoleon',
 51 |     'sphinx_rtd_theme',
 52 |     'sphinx.ext.napoleon'
 53 | ]
 54 | 
 55 | bibtex_bibfiles = ['zreferences.bib']
 56 | 
 57 | # Add any paths that contain templates here, relative to this directory.
 58 | templates_path = ['_templates']
 59 | 
 60 | # The suffix(es) of source filenames.
 61 | # You can specify multiple suffix as a list of string:
 62 | #
 63 | source_suffix = ['.rst', '.md']
 64 | 
 65 | # The master toctree document.
 66 | master_doc = 'index'
 67 | 
 68 | # The language for content autogenerated by Sphinx. Refer to documentation
 69 | # for a list of supported languages.
 70 | #
 71 | # This is also used if you do content translation via gettext catalogs.
 72 | # Usually you set "language" from the command line for these cases.
 73 | language = 'en'
 74 | 
 75 | # List of patterns, relative to source directory, that match files and
 76 | # directories to ignore when looking for source files.
 77 | # This pattern also affects html_static_path and html_extra_path .
 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'pyod.test.rst']
 79 | 
 80 | # The name of the Pygments (syntax highlighting) style to use.
 81 | pygments_style = 'sphinx'
 82 | 
 83 | # -- Options for HTML output -------------------------------------------------
 84 | 
 85 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 86 | # a list of builtin themes.
 87 | # https://www.sphinx-doc.org/en/master/usage/theming.html#themes#Themes
 88 | # html_theme = 'default'
 89 | # html_theme = "alabaster"
 90 | # html_theme = 'sphinxawesome_theme'
 91 | html_theme = 'furo'
 92 | # html_theme = 'sphinx_rtd_theme'
 93 | 
 94 | # Theme options are theme-specific and customize the look and feel of a theme
 95 | # further.  For a list of options available for each theme, see the
 96 | # documentation.
 97 | #
 98 | # html_theme_options = {
 99 | #     'canonical_url': '',
100 | #     'logo_only': False,
101 | #     'display_version': True,
102 | #     'prev_next_buttons_location': 'bottom',
103 | #     'style_external_links': False,
104 | #     #'vcs_pageview_mode': '',
105 | #     #'style_nav_header_background': 'white',
106 | #     # Toc options
107 | #     'collapse_navigation': True,
108 | #     'sticky_navigation': True,
109 | #     'navigation_depth': 7,
110 | #     'includehidden': True,
111 | #     'titles_only': False,
112 | # }
113 | 
114 | # Add any paths that contain custom static files (such as style sheets) here,
115 | # relative to this directory. They are copied after the builtin static files,
116 | # so a file named "default.css" will overwrite the builtin "default.css".
117 | html_static_path = ['_static']
118 | 
119 | autosummary_generate = True
120 | autodoc_default_options = {'members': True,
121 |                            'inherited-members': True,
122 |                            }
123 | autodoc_typehints = "none"
124 | 
125 | 
126 | # Custom sidebar templates, must be a dictionary that maps document names
127 | # to template names.
128 | #
129 | # The default sidebars (for documents that don't match any pattern) are
130 | # defined by theme itself.  Builtin themes are using these templates by
131 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
132 | # 'searchbox.html']``.
133 | #
134 | # html_sidebars = {}
135 | # html_sidebars = {'**': ['globaltoc.html', 'relations.html', 'sourcelink.html',
136 | #                         'searchbox.html']}
137 | 
138 | # -- Options for HTMLHelp output ---------------------------------------------
139 | 
140 | # Output file base name for HTML help builder.
141 | htmlhelp_basename = 'deepoddoc'
142 | 
143 | # -- Options for LaTeX output ------------------------------------------------
144 | 
145 | latex_elements = {
146 |     # The paper size ('letterpaper' or 'a4paper').
147 |     #
148 |     # 'papersize': 'letterpaper',
149 | 
150 |     # The font size ('10pt', '11pt' or '12pt').
151 |     #
152 |     # 'pointsize': '10pt',
153 | 
154 |     # Additional stuff for the LaTeX preamble.
155 |     #
156 |     # 'preamble': '',
157 | 
158 |     # Latex figure (float) alignment
159 |     #
160 |     # 'figure_align': 'htbp',
161 | }
162 | 
163 | # Grouping the document tree_ into LaTeX files. List of tuples
164 | # (source start file, target name, title,
165 | #  author, documentclass [howto, manual, or own class]).
166 | latex_documents = [
167 |     (master_doc, 'deepod.tex', 'deepod Documentation',
168 |      'Hongzuo Xu', 'manual'),
169 | ]
170 | 
171 | # -- Options for manual page output ------------------------------------------
172 | 
173 | # One entry per manual page. List of tuples
174 | # (source start file, name, description, authors, manual section).
175 | man_pages = [
176 |     (master_doc, 'pyod', 'pyod Documentation',
177 |      [author], 1)
178 | ]
179 | 
180 | # -- Options for Texinfo output ----------------------------------------------
181 | 
182 | # Grouping the document tree_ into Texinfo files. List of tuples
183 | # (source start file, target name, title, author,
184 | #  dir menu entry, description, category)
185 | texinfo_documents = [
186 |     (master_doc, 'DeepOD', 'DeepOD Documentation',
187 |      author, 'DeepOD', 'One line description of project.',
188 |      'Miscellaneous'),
189 | ]
190 | 
191 | # -- Extension configuration -------------------------------------------------
192 | 
193 | # -- Options for intersphinx extension ---------------------------------------
194 | 
195 | # Example configuration for intersphinx: refer to the Python standard library.
196 | intersphinx_mapping = {'https://docs.python.org/': None}
197 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. DeepOD documentation master file, created by
  3 |    sphinx-quickstart on Tue Nov  7 21:28:52 2023.
  4 |    You can adapt this file completely to your liking, but it should at least
  5 |    contain the root `toctree` directive.
  6 | 
  7 | 
  8 | Welcome to DeepOD documentation!
  9 | ==================================
 10 | 
 11 | 
 12 | .. image:: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml/badge.svg
 13 |    :target: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml
 14 |    :alt: testing2
 15 | 
 16 | .. image:: https://readthedocs.org/projects/deepod/badge/?version=latest
 17 |     :target: https://deepod.readthedocs.io/en/latest/?badge=latest
 18 |     :alt: Documentation Status
 19 | 
 20 | .. image:: https://coveralls.io/repos/github/xuhongzuo/DeepOD/badge.svg?branch=main
 21 |     :target: https://coveralls.io/github/xuhongzuo/DeepOD?branch=main
 22 |     :alt: coveralls
 23 | 
 24 | .. image:: https://static.pepy.tech/personalized-badge/deepod?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads
 25 |    :target: https://pepy.tech/project/deepod
 26 |    :alt: downloads
 27 | 
 28 | 
 29 | 
 30 | 
 31 | ``DeepOD`` is an open-source python library for Deep Learning-based `Outlier Detection <https://en.wikipedia.org/wiki/Anomaly_detection>`_
 32 | and `Anomaly Detection <https://en.wikipedia.org/wiki/Anomaly_detection>`_. ``DeepOD`` supports tabular anomaly detection and time-series anomaly detection.
 33 | 
 34 | 
 35 | DeepOD includes **27** deep outlier detection / anomaly detection algorithms (in unsupervised/weakly-supervised paradigm).
 36 | More baseline algorithms will be included later.
 37 | 
 38 | 
 39 | 
 40 | **DeepOD is featured for**:
 41 | 
 42 | * **Unified APIs** across various algorithms.
 43 | * **SOTA models** includes reconstruction-, representation-learning-, and self-superivsed-based latest deep learning methods.
 44 | * **Comprehensive Testbed** that can be used to directly test different models on benchmark datasets (highly recommend for academic research).
 45 | * **Versatile** in different data types including tabular and time-series data (DeepOD will support other data types like images, graph, log, trace, etc. in the future, welcome PR :telescope:).
 46 | * **Diverse Network Structures** can be plugged into detection models, we now support LSTM, GRU, TCN, Conv, and Transformer for time-series data.  (welcome PR as well :sparkles:)
 47 | 
 48 | 
 49 | If you are interested in our project, we are pleased to have your stars and forks :thumbsup: :beers: .
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | Citation
 58 | ~~~~~~~~~~~~~~~~~
 59 | If you use this library in your work, please cite this paper:
 60 | 
 61 | Hongzuo Xu, Guansong Pang, Yijie Wang and Yongjun Wang, "Deep Isolation Forest for Anomaly Detection," in IEEE Transactions on Knowledge and Data Engineering, doi: 10.1109/TKDE.2023.3270293.
 62 | 
 63 | 
 64 | You can also use the BibTex entry below for citation.
 65 | 
 66 | .. code-block:: bibtex
 67 | 
 68 |    @ARTICLE{xu2023deep,
 69 |       author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun},
 70 |       journal={IEEE Transactions on Knowledge and Data Engineering}, 
 71 |       title={Deep Isolation Forest for Anomaly Detection}, 
 72 |       year={2023},
 73 |       volume={},
 74 |       number={},
 75 |       pages={1-14},
 76 |       doi={10.1109/TKDE.2023.3270293}
 77 |    }
 78 | 
 79 | 
 80 | 
 81 | 
 82 | ----
 83 | 
 84 | 
 85 | .. toctree::
 86 |    :maxdepth: 2
 87 |    :hidden:
 88 |    :caption: Getting Started
 89 | 
 90 |    start.install
 91 |    start.examples
 92 |    start.model_save
 93 | 
 94 | .. toctree::
 95 |    :maxdepth: 2
 96 |    :hidden:
 97 |    :caption: Documentation
 98 | 
 99 |    api_reference
100 |    api_cc
101 | 
102 | .. toctree::
103 |    :maxdepth: 2
104 |    :hidden:
105 |    :caption: Additional Information
106 | 
107 |    additional.contributing
108 |    additional.license
109 |    additional.star_history


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.19
 2 | scipy>=1.5.1
 3 | scikit_learn>=0.20.0
 4 | pandas>=1.0.0
 5 | torch>=1.10.0,<1.13.1
 6 | tqdm>=4.62.3
 7 | ray==2.6.1
 8 | pyarrow>=11.0.0
 9 | einops
10 | sphinx-rtd-theme==1.3.0
11 | sphinxawesome-theme==4.1.0
12 | sphinxcontrib-bibtex==2.5.0
13 | furo==2023.9.10


--------------------------------------------------------------------------------
/docs/start.examples.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | =========
 3 | 
 4 | 
 5 | Directly Use Detection Models
 6 | ------------------------------
 7 | 
 8 | 
 9 | DeepOD can be used in a few lines of code. 
10 | This API style is the same with `Sklean <https://github.com/scikit-learn/scikit-learn>`_ and `PyOD <https://github.com/yzhao062/pyod>`_.
11 | 
12 | 
13 | **for tabular anomaly detection:**
14 | 
15 | .. code-block:: python
16 | 
17 | 
18 |     # unsupervised methods
19 |     from deepod.models.tabular import DeepSVDD
20 |     clf = DeepSVDD()
21 |     clf.fit(X_train, y=None)
22 |     scores = clf.decision_function(X_test)
23 | 
24 |     # weakly-supervised methods
25 |     from deepod.models.tabular import DevNet
26 |     clf = DevNet()
27 |     clf.fit(X_train, y=semi_y) # semi_y uses 1 for known anomalies, and 0 for unlabeled data
28 |     scores = clf.decision_function(X_test)
29 | 
30 |     # evaluation of tabular anomaly detection
31 |     from deepod.metrics import tabular_metrics
32 |     auc, ap, f1 = tabular_metrics(y_test, scores)
33 | 
34 | 
35 | **for time series anomaly detection:**
36 | 
37 | 
38 | .. code-block:: python
39 | 
40 | 
41 |     # time series anomaly detection methods
42 |     from deepod.models.time_series import TimesNet
43 |     clf = TimesNet()
44 |     clf.fit(X_train)
45 |     scores = clf.decision_function(X_test)
46 | 
47 |     # evaluation of time series anomaly detection
48 |     from deepod.metrics import ts_metrics
49 |     from deepod.metrics import point_adjustment # execute point adjustment for time series ad
50 |     eval_metrics = ts_metrics(labels, scores)
51 |     adj_eval_metrics = ts_metrics(labels, point_adjustment(labels, scores))
52 |     
53 | 
54 | 
55 | Testbed
56 | --------
57 | 
58 | 
59 | 
60 | Testbed contains the whole process of testing an anomaly detection model, including data loading, preprocessing, anomaly detection, and evaluation. 
61 | 
62 | Please refer to ``testbed/``
63 | 
64 | * ``testbed/testbed_unsupervised_ad.py`` is for testing unsupervised tabular anomaly detection models.
65 |  
66 | * ``testbed/testbed_unsupervised_tsad.py`` is for testing unsupervised time-series anomaly detection models.
67 | 
68 | 
69 | Key arguments:
70 | 
71 | * ``--input_dir``: name of the folder that contains datasets (.csv, .npy)
72 | 
73 | * ``--dataset``: "FULL" represents testing all the files within the folder, or a list of dataset names using commas to split them (e.g., "10_cover*,20_letter*")
74 | 
75 | * ``--model``: anomaly detection model name
76 | 
77 | * ``--runs``: how many times running the detection model, finally report an average performance with standard deviation values
78 | 
79 | 
80 | Example: 
81 | 
82 | 1. Download `ADBench <https://github.com/Minqi824/ADBench/tree/main/adbench/datasets/>`_ datasets.
83 | 2. modify the ``dataset_root`` variable as the directory of the dataset.
84 | 3. ``input_dir`` is the sub-folder name of the ``dataset_root``, e.g., ``Classical`` or ``NLP_by_BERT``.  
85 | 4. use the following command in the bash
86 | 
87 | 
88 | .. code-block:: bash
89 | 
90 |     
91 |     cd DeepOD
92 |     pip install .
93 |     cd testbed
94 |     python testbed_unsupervised_ad.py --model DeepIsolationForest --runs 5 --input_dir ADBench
95 |    
96 | 


--------------------------------------------------------------------------------
/docs/start.install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | It is recommended to use **pip** for installation. Please make sure
 5 | **the latest version** is installed, as DeepOD is updated frequently:
 6 | 
 7 | .. code-block:: bash
 8 | 
 9 |    pip install deepod            # normal install
10 |    pip install --upgrade deepod  # or update if needed
11 | 
12 | 
13 | Alternatively, you could clone and run setup.py file:
14 | 
15 | .. code-block:: bash
16 | 
17 |    git clone https://github.com/xuhongzuo/deepod.git
18 |    cd pyod
19 |    pip install .
20 | 
21 | 
22 | **Required Dependencies**\ :
23 | 
24 | 
25 | * Python 3.7+
26 | * numpy>=1.19
27 | * scipy>=1.5.1
28 | * scikit_learn>=0.20.0
29 | * pandas>=1.0.0
30 | * torch>1.10.0,<1.13.1
31 | * ray==2.6.1
32 | * pyarrow>=11.0.0
33 | * einops
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/start.model_save.rst:
--------------------------------------------------------------------------------
 1 | Model Save & Load 
 2 | ==================
 3 | 
 4 | The detection model class has ``save_model`` and ``load_model`` functions. 
 5 | 
 6 | We take the `DeepSVDD` model for example. 
 7 | 
 8 | .. code-block:: python
 9 |     
10 |     from deepod.models import DeepSVDD
11 | 
12 |     # training an anomaly detection model
13 |     model = DeepSVDD() # or any other models in DeepOD
14 |     model.fit(X_train) # training
15 | 
16 |     path = 'save_file.pkl'
17 |     model.save_model(path) # save trained model at the assigned path
18 | 
19 |     # directly load trained model from path
20 |     model = DeepSVDD.load_model(path)
21 |     model.decision_function(X_test)
22 |     # or
23 |     model.predict(X_test)
24 | 
25 | 
26 | 
27 | You can also directly use pickle for saving and loading DeepOD models. 
28 | 
29 | .. code-block:: python
30 |     
31 |     import pickle
32 |     from deepod.models import DeepSVDD
33 | 
34 |     model = DeepSVDD()
35 |     model.fit(X_train)
36 | 
37 |     with open('save_file.pkl', 'wb'):
38 |         pickle.dump(model)
39 | 
40 |     with open('save_file.pkl', 'rb')
41 |         model = pickle.load(f)
42 | 
43 |     model.decision_function(X_test)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/zreferences.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{liu2008isolation,
 2 |   title={Isolation forest},
 3 |   author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},
 4 |   booktitle={Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on},
 5 |   pages={413--422},
 6 |   year={2008},
 7 |   organization={IEEE}
 8 | }
 9 | 
10 | @article{liu2012isolation,
11 |   title={Isolation-based anomaly detection},
12 |   author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua},
13 |   journal={ACM Transactions on Knowledge Discovery from Data (TKDD)},
14 |   volume={6},
15 |   number={1},
16 |   pages={3},
17 |   year={2012},
18 |   publisher={ACM}
19 | }
20 | 
21 | 
22 | @article{xu2023dif,
23 |   author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun},
24 |   journal={IEEE Transactions on Knowledge and Data Engineering},
25 |   title={Deep Isolation Forest for Anomaly Detection},
26 |   year={2023},
27 |   volume={},
28 |   number={},
29 |   pages={1-14},
30 |   doi={10.1109/TKDE.2023.3270293}
31 | }
32 | 
33 | 
34 | @article{ruff2018deepsvdd,
35 |   title={Deep One-Class Classification},
36 |   author={Ruff, Lukas and Vandermeulen, Robert and Görnitz, Nico and Deecke, Lucas and Siddiqui, Shoaib and Binder, Alexander and Müller, Emmanuel and Kloft, Marius},
37 |   journal={International conference on machine learning},
38 |   year={2018}
39 | }
40 | 
41 | 
42 | @inproceedings{pang2019deep,
43 |   title={Deep anomaly detection with deviation networks},
44 |   author={Pang, Guansong and Shen, Chunhua and van den Hengel, Anton},
45 |   booktitle={Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery \& data mining},
46 |   pages={353--362},
47 |   year={2019}
48 | }


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deepod
 2 | dependencies:
 3 |   - numpy
 4 |   - pandas
 5 |   - scikit-learn
 6 |   - scipy
 7 |   - pytorch
 8 |   - tqdm
 9 |   - ray
10 |   - pyarrow
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/examples/__init__.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.19
 2 | scipy>=1.5.1
 3 | scikit_learn>=0.20.0
 4 | pandas>=1.0.0
 5 | torch>=1.10.0,<1.13.1
 6 | tqdm>=4.62.3
 7 | ray==2.6.1
 8 | pyarrow>=11.0.0
 9 | einops
10 | statsmodels
11 | arch


--------------------------------------------------------------------------------
/requirements_ci.yml:
--------------------------------------------------------------------------------
1 | pytest==5.2.1
2 | pytest-cov==2.10.0
3 | coveralls==2.1.1
4 | setuptools>=49.6.0
5 | tqdm>=4.62.3
6 | einops
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | import os
 3 | 
 4 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.rst'), encoding='utf-8') as f:
 5 |     readme = f.read()
 6 | 
 7 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'requirements.txt'), encoding='utf-8') as f:
 8 |     requirements = f.read().splitlines()
 9 | 
10 | setuptools.setup(
11 |     name="deepod",
12 |     version="0.4.1",
13 |     author="Hongzuo Xu",
14 |     author_email="hongzuoxu@126.com",
15 |     description="",
16 |     long_description=readme,
17 |     long_description_content_type="text/x-rst",
18 |     license='MIT License',
19 |     url="https://github.com/xuhongzuo/DeepOD",
20 |     keywords=['outlier detection', 'anomaly detection', 'deep anomaly detection',
21 |               'deep learning', 'data mining'],
22 |     packages=setuptools.find_packages(exclude=['test']),
23 |     install_requires=requirements,
24 |     classifiers=[
25 |         "Programming Language :: Python :: 3.7",
26 |         "Programming Language :: Python :: 3.8",
27 |         "Programming Language :: Python :: 3.9",
28 |         "Programming Language :: Python :: 3.10",
29 |         "License :: OSI Approved :: BSD License",
30 |     ],
31 | )
32 | 


--------------------------------------------------------------------------------
/testbed/configs.yaml:
--------------------------------------------------------------------------------
 1 | DIF:
 2 |   rep_dim: 20
 3 |   hidden_dims: 32
 4 |   n_ensemble: 50
 5 |   n_estimators: 6
 6 | 
 7 | DeepSVDDTS:
 8 |   network: Transformer
 9 |   rep_dim: 64
10 |   hidden_dims: 512
11 |   act: GELU
12 |   lr: 0.00001
13 |   epochs: 20
14 |   batch_size: 128
15 |   epoch_steps: -1
16 | 
17 | TranAD:
18 |   lr: 0.001
19 |   epochs: 10
20 |   batch_size: 128
21 |   epoch_steps: -1
22 | 
23 | USAD:
24 |   hidden_dims: 100
25 |   lr: 1e-3
26 |   epochs: 10
27 |   batch_size: 128
28 | 
29 | COUTA:
30 |   neg_batch_ratio: 0.2
31 |   alpha: 0.1
32 |   rep_dim: 16
33 |   hidden_dims: 16
34 |   lr: 0.0001
35 |   epochs: 20
36 |   batch_size: 64
37 | 
38 | TcnED:
39 |   epochs: 100
40 |   lr: 0.00015
41 |   dropout: 0.42
42 |   kernel_size: 2
43 |   rep_dim: 8
44 |   hidden_dims: '8,8'
45 |   batch_size: 512
46 | 
47 | AnomalyTransformer:
48 |   lr: 0.0001
49 |   epochs: 10
50 |   batch_size: 32
51 |   k: 3
52 | 
53 | TimesNet:
54 |   lr: 0.0001
55 |   batch_size: 128
56 |   epochs: 10
57 |   pred_len: 0
58 |   e_layers: 2
59 |   d_model: 64
60 |   d_ff: 64
61 |   dropout: 0.1
62 |   top_k: 5
63 |   num_kernels: 6
64 | 
65 | 
66 | 
67 | RoSAS:
68 |   epoch_steps: 16
69 |   epochs: 200
70 |   batch_size: 32
71 |   lr: 0.005
72 |   n_emb: 128
73 |   alpha: 0.5
74 |   margin: 1
75 |   beta: 1
76 | 
77 | DCdetector:
78 |   patch_size: [1,3,5]
79 |   batch_size: 128
80 |   epochs: 10
81 |   lr: 0.0001
82 |   n_heads: 1
83 |   d_model: 256
84 |   e_layers: 3
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/testbed/testbed_unsupervised_ad.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | testbed of unsupervised tabular anomaly detection
  4 | @Author: Hongzuo Xu <hongzuoxu@126.com, xuhongzuo13@nudt.edu.cn>
  5 | """
  6 | 
  7 | import os
  8 | import sys
  9 | 
 10 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 11 | 
 12 | import warnings
 13 | import argparse
 14 | import getpass
 15 | import time
 16 | import numpy as np
 17 | import importlib as imp
 18 | from utils import get_data_lst, read_data
 19 | from deepod.metrics import tabular_metrics
 20 | 
 21 | 
 22 | dataset_root = f'/home/{getpass.getuser()}/dataset/1-tabular/'
 23 | 
 24 | parser = argparse.ArgumentParser()
 25 | parser.add_argument("--runs", type=int, default=5,
 26 |                     help="how many times we repeat the experiments to obtain the average performance")
 27 | parser.add_argument("--input_dir", type=str,
 28 |                     default='ADBench-classical',
 29 |                     help="the path of the data sets")
 30 | parser.add_argument("--output_dir", type=str, default='@record/',
 31 |                     help="the output file path")
 32 | parser.add_argument("--dataset", type=str, default='FULL',
 33 |                     help="FULL represents all the csv file in the folder, "
 34 |                          "or a list of data set names split by comma")
 35 | parser.add_argument("--model", type=str, default='DeepSVDD', help="",)
 36 | parser.add_argument("--auto_hyper", default=True, action='store_true', help="")
 37 | 
 38 | parser.add_argument("--normalization", type=str, default='min-max', help="",)
 39 | parser.add_argument('--silent_header', action='store_true')
 40 | parser.add_argument("--flag", type=str, default='')
 41 | args = parser.parse_args()
 42 | 
 43 | 
 44 | os.makedirs(args.output_dir, exist_ok=True)
 45 | data_lst = get_data_lst(os.path.join(dataset_root, args.input_dir), args.dataset)
 46 | print(os.path.join(dataset_root, args.input_dir))
 47 | print(data_lst)
 48 | 
 49 | module = imp.import_module('deepod.models.tabular')
 50 | model_class = getattr(module, args.model)
 51 | 
 52 | cur_time = time.strftime("%m-%d %H.%M.%S", time.localtime())
 53 | result_file = os.path.join(args.output_dir, f'{args.model}.{args.input_dir}.{args.flag}.csv')
 54 | 
 55 | if not args.silent_header:
 56 |     f = open(result_file, 'a')
 57 |     print('\n---------------------------------------------------------', file=f)
 58 |     print(f'model: {args.model}, collection: {args.input_dir}, '
 59 |           f'datasets: {args.dataset}, normalization: {args.normalization}, {args.runs}runs, ', file=f)
 60 |     print('---------------------------------------------------------', file=f)
 61 |     print('data, auc-roc, std, auc-pr, std, f1, std, time', file=f)
 62 |     f.close()
 63 | 
 64 | 
 65 | for file in data_lst:
 66 |     dataset_name = os.path.splitext(os.path.split(file)[1])[0]
 67 | 
 68 |     print(f'\n-------------------------{dataset_name}-----------------------')
 69 | 
 70 |     split = '50%-normal'
 71 |     print(f'train-test split: {split}, normalization: {args.normalization}')
 72 |     x_train, y_train, x_test, y_test = read_data(file=file, split=split,
 73 |                                                  normalization=args.normalization,
 74 |                                                  seed=42)
 75 |     if x_train is None:
 76 |         continue
 77 | 
 78 |     auc_lst, ap_lst, f1_lst = np.zeros(args.runs), np.zeros(args.runs), np.zeros(args.runs)
 79 |     t1_lst, t2_lst = [], []
 80 |     runs = args.runs
 81 | 
 82 |     model_configs = {}
 83 |     if args.auto_hyper:
 84 |         clf = model_class(random_state=42)
 85 | 
 86 |         # check whether the anomaly detection model supports ray tuning
 87 |         if not hasattr(clf, 'fit_auto_hyper'):
 88 |             warnings.warn(f'anomaly detection model {args.model} '
 89 |                           f'does not support auto tuning hyper-parameters currently.')
 90 |             break
 91 | 
 92 |         print(f'\nRunning [1/{args.runs}] of [{args.model}] on Dataset [{dataset_name}] (rat tune)')
 93 |         tuned_model_configs = clf.fit_auto_hyper(X=x_train,
 94 |                                                  X_test=x_test, y_test=y_test,
 95 |                                                  n_ray_samples=1, time_budget_s=None)
 96 |         model_configs = tuned_model_configs
 97 |         print(f'model parameter configure update to: {model_configs}')
 98 |         scores = clf.decision_function(x_test)
 99 | 
100 |         auc, ap, f1 = tabular_metrics(y_test, scores)
101 | 
102 |         print(f'{dataset_name}, {auc:.4f}, {ap:.4f}, {f1:.4f}, {args.model}')
103 | 
104 |     for i in range(runs):
105 |         start_time = time.time()
106 |         print(f'\nRunning [{i+1}/{args.runs}] of [{args.model}] on Dataset [{dataset_name}]')
107 | 
108 |         clf = model_class(**model_configs, random_state=42+i)
109 |         clf.fit(x_train)
110 | 
111 |         train_time = time.time()
112 |         scores = clf.decision_function(x_test)
113 |         done_time = time.time()
114 | 
115 |         auc, ap, f1 = tabular_metrics(y_test, scores)
116 |         auc_lst[i], ap_lst[i], f1_lst[i] = auc, ap, f1
117 |         t1_lst.append(train_time - start_time)
118 |         t2_lst.append(done_time - start_time)
119 | 
120 |         print(f'{dataset_name}, {auc_lst[i]:.4f}, {ap_lst[i]:.4f}, {f1_lst[i]:.4f}, '
121 |               f'{t1_lst[i]:.1f}/{t2_lst[i]:.1f}, {args.model}, {str(model_configs)}')
122 | 
123 |     avg_auc, avg_ap, avg_f1 = np.average(auc_lst), np.average(ap_lst), np.average(f1_lst)
124 |     std_auc, std_ap, std_f1 = np.std(auc_lst), np.std(ap_lst), np.std(f1_lst)
125 |     avg_time1 = np.average(t1_lst)
126 |     avg_time2 = np.average(t2_lst)
127 | 
128 |     f = open(result_file, 'a')
129 |     txt = f'{dataset_name}, ' \
130 |           f'{avg_auc:.4f}, {std_auc:.4f}, ' \
131 |           f'{avg_ap:.4f}, {std_ap:.4f}, ' \
132 |           f'{avg_f1:.4f}, {std_f1:.4f}, ' \
133 |           f'{avg_time1:.1f}/{avg_time2:.1f}, {args.model}, {str(model_configs)}'
134 |     print(txt, file=f)
135 |     print(txt)
136 |     f.close()
137 | 


--------------------------------------------------------------------------------