├── .github └── workflows │ └── testing.yml ├── .gitignore ├── .readthedocs.yaml ├── .travis.yml ├── LICENSE ├── README.rst ├── TODO.md ├── data ├── 38_thyroid.npz └── omi-1 │ ├── omi-1_test.csv │ └── omi-1_train.csv ├── deepod ├── __init__.py ├── core │ ├── __init__.py │ ├── base_model.py │ └── networks │ │ ├── __init__.py │ │ ├── base_networks.py │ │ ├── network_utility.py │ │ ├── ts_network_dilated_conv.py │ │ ├── ts_network_tcn.py │ │ └── ts_network_transformer.py ├── metrics │ ├── __init__.py │ ├── _anomaly_detection.py │ ├── _tsad_adjustment.py │ ├── affiliation │ │ ├── __init__.py │ │ ├── _affiliation_zone.py │ │ ├── _integral_interval.py │ │ ├── _single_ground_truth_event.py │ │ ├── generics.py │ │ └── metrics.py │ └── vus │ │ ├── __init__.py │ │ ├── metrics.py │ │ └── utils │ │ ├── __init__.py │ │ └── metrics.py ├── models │ ├── __init__.py │ ├── tabular │ │ ├── __init__.py │ │ ├── devnet.py │ │ ├── dif.py │ │ ├── dsad.py │ │ ├── dsvdd.py │ │ ├── feawad.py │ │ ├── goad.py │ │ ├── icl.py │ │ ├── neutral.py │ │ ├── prenet.py │ │ ├── rca.py │ │ ├── rdp.py │ │ ├── repen.py │ │ ├── rosas.py │ │ └── slad.py │ └── time_series │ │ ├── __init__.py │ │ ├── anomalytransformer.py │ │ ├── couta.py │ │ ├── dcdetector.py │ │ ├── devnet.py │ │ ├── dif.py │ │ ├── dsad.py │ │ ├── dsvdd.py │ │ ├── ncad.py │ │ ├── prenet.py │ │ ├── tcned.py │ │ ├── timesnet.py │ │ ├── tranad.py │ │ └── usad.py ├── test │ ├── __init__.py │ ├── test_anomalyTransformer.py │ ├── test_couta.py │ ├── test_dcdetector.py │ ├── test_devnet.py │ ├── test_dif.py │ ├── test_dsad.py │ ├── test_dsvdd.py │ ├── test_feawad.py │ ├── test_goad.py │ ├── test_icl.py │ ├── test_ncad.py │ ├── test_neutral.py │ ├── test_prenet.py │ ├── test_rca.py │ ├── test_rdp.py │ ├── test_repen.py │ ├── test_rosas.py │ ├── test_slad.py │ ├── test_tcned.py │ ├── test_timesnet.py │ ├── test_tranad.py │ └── test_usad.py ├── utils │ ├── __init__.py │ ├── data.py │ └── utility.py └── version.py ├── docs ├── Gemfile ├── Makefile ├── _templates │ └── class.rst ├── additional.contributing.rst ├── additional.license.rst ├── additional.star_history.rst ├── api_cc.rst ├── api_reference.base_networks.rst ├── api_reference.metrics.rst ├── api_reference.rst ├── api_reference.tabular.rst ├── api_reference.time_series.rst ├── conf.py ├── index.rst ├── requirements.txt ├── start.examples.rst ├── start.install.rst ├── start.model_save.rst └── zreferences.bib ├── environment.yml ├── examples └── __init__.py ├── requirements.txt ├── requirements_ci.yml ├── setup.py └── testbed ├── configs.yaml ├── testbed_unsupervised_ad.py ├── testbed_unsupervised_tsad.py └── utils.py /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Python Package using pip 10 | 11 | on: 12 | push: 13 | branches: 14 | - main 15 | - dev 16 | pull_request: 17 | branches: 18 | - main 19 | - dev 20 | 21 | permissions: 22 | contents: read 23 | 24 | jobs: 25 | deploy: 26 | 27 | runs-on: ubuntu-latest 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | os: [ubuntu-latest, windows-latest, macos-latest] 33 | python-version: ["3.7", "3.8", "3.9", "3.10"] 34 | 35 | steps: 36 | - uses: actions/checkout@v3 37 | - name: Python ${{ matrix.python-version }} 38 | uses: actions/setup-python@v3 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | - name: Install dependencies 42 | run: | 43 | python -m pip install --upgrade pip 44 | pip install -r requirements.txt 45 | pip install pytest 46 | pip install coverage 47 | pip install build 48 | pip install coveralls 49 | 50 | - name: Test with pytest 51 | run: | 52 | coverage run --source=deepod -m pytest 53 | 54 | - name: coverage report 55 | env: 56 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 57 | run: | 58 | coveralls --service=github 59 | 60 | # - name: Build package 61 | # run: python -m build 62 | 63 | # - name: Publish package 64 | # uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 65 | # with: 66 | # user: __token__ 67 | # password: ${{ secrets.PYPI_API_TOKEN }} 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | deepod.egg-info/ 2 | dist/ 3 | build/ 4 | .idea 5 | **/__pycache__ 6 | docs_output 7 | docs/generated 8 | .vscode 9 | sphinx-build 10 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.8" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: xenial 3 | 4 | python: 5 | # - "3.6" 6 | # - "3.7" 7 | - "3.8" 8 | - "3.9" 9 | 10 | install: 11 | - pip install --upgrade pip 12 | - pip list 13 | - pip install importlib-metadata>=4.0.0 14 | - pip install setuptools>=49.6.0 15 | - pip install -r requirements.txt 16 | - pip install -r requirements_ci.yml 17 | 18 | # command to run tests 19 | script: 20 | pytest --cov=deepod/ 21 | 22 | after_success: 23 | - coveralls 24 | 25 | notifications: 26 | email: 27 | recipients: 28 | - hongzuoxu@126.com 29 | on_success: never # default: change 30 | on_failure: always # default: always 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2023, Hongzuo Xu 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | in the skeleton of base deep ad models: 4 | - add early stopping mechanism 5 | - add adversarial training? 6 | 7 | models 8 | - add AE as a baseline 9 | - add GAN-based method as a baseline 10 | - add one-class-based methods, e.g., DROCC, HRN, .. 11 | 12 | model selection 13 | - add the pretrained method for model selection 14 | - add more model selection methods -------------------------------------------------------------------------------- /data/38_thyroid.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/data/38_thyroid.npz -------------------------------------------------------------------------------- /deepod/__init__.py: -------------------------------------------------------------------------------- 1 | from deepod.version import __version__ 2 | from . import core, models, metrics 3 | 4 | __all__ = ['__version__', 'core', 'models', 'metrics'] -------------------------------------------------------------------------------- /deepod/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`pysad.core` module covers base classes of the `PySAD`. 3 | """ 4 | from .base_model import BaseDeepAD 5 | 6 | 7 | __all__ = ["BaseDeepAD"] 8 | 9 | # print(""" 10 | # ╭━━━╮ ╭━━━╮ ╭━━━ ╮ 11 | # ╰╮╭╮┃ ┃╭━╮┃ ╰╮╭ ╮┃ 12 | # ┃┃┃┣━━┳━━┳━━╮ ┃┃ ┃┃ ┃┃ ┃┃ 13 | # ┃┃┃┃┃━┫┃━┫╭╮┃ ┃┃ ┃┃ ┃┃ ┃┃ 14 | # ╭╯╰╯┃┃━┫┃━┫╰╯┃ ┃╰━╯┃ ╭╯╯ ╰┃ 15 | # ╰━━━┻━━┻━━┫╭━╯ ╰━━━╯ ╰━━━━╯ 16 | # ┃┃ 17 | # ╰╯ 18 | # """) -------------------------------------------------------------------------------- /deepod/core/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_networks import MLPnet 2 | from .base_networks import MlpAE 3 | from .base_networks import GRUNet 4 | from .base_networks import LSTMNet 5 | from .base_networks import ConvSeqEncoder 6 | from .base_networks import ConvNet 7 | from .ts_network_transformer import TSTransformerEncoder 8 | from .ts_network_tcn import TCNnet 9 | from .ts_network_tcn import TcnAE 10 | 11 | __all__ = ['MLPnet', 'MlpAE', 'GRUNet', 'LSTMNet', 'ConvSeqEncoder', 12 | 'ConvNet', 'TSTransformerEncoder', 'TCNnet', 'TcnAE'] -------------------------------------------------------------------------------- /deepod/core/networks/network_utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import warnings 3 | import importlib 4 | 5 | 6 | def _instantiate_class(module_name: str, class_name: str): 7 | module = importlib.import_module(module_name) 8 | class_ = getattr(module, class_name) 9 | return class_() 10 | 11 | 12 | def _handle_n_hidden(n_hidden): 13 | if type(n_hidden) == int: 14 | n_layers = 1 15 | hidden_dim = n_hidden 16 | elif type(n_hidden) == str: 17 | n_hidden = n_hidden.split(',') 18 | n_hidden = [int(a) for a in n_hidden] 19 | n_layers = len(n_hidden) 20 | hidden_dim = int(n_hidden[0]) 21 | 22 | if np.std(n_hidden) != 0: 23 | warnings.warn('use the first hidden num, ' 24 | 'the rest hidden numbers are deprecated', UserWarning) 25 | else: 26 | raise TypeError('n_hidden should be a string or a int.') 27 | 28 | return hidden_dim, n_layers 29 | -------------------------------------------------------------------------------- /deepod/core/networks/ts_network_dilated_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from deepod.core.networks.network_utility import _handle_n_hidden 3 | from torch.nn import functional as F 4 | 5 | 6 | class SamePadConv(torch.nn.Module): 7 | def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1): 8 | super().__init__() 9 | self.receptive_field = (kernel_size - 1) * dilation + 1 10 | padding = self.receptive_field // 2 11 | self.conv = torch.nn.Conv1d( 12 | in_channels, out_channels, kernel_size, 13 | padding=padding, 14 | dilation=dilation, 15 | groups=groups 16 | ) 17 | self.remove = 1 if self.receptive_field % 2 == 0 else 0 18 | 19 | def forward(self, x): 20 | out = self.conv(x) 21 | if self.remove > 0: 22 | out = out[:, :, : -self.remove] 23 | return out 24 | 25 | 26 | class ConvBlock(torch.nn.Module): 27 | def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False): 28 | super().__init__() 29 | self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation) 30 | self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation) 31 | self.projector = torch.nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None 32 | 33 | def forward(self, x): 34 | residual = x if self.projector is None else self.projector(x) 35 | x = F.gelu(x) 36 | x = self.conv1(x) 37 | x = F.gelu(x) 38 | x = self.conv2(x) 39 | return x + residual 40 | 41 | 42 | class DilatedConvEncoder(torch.nn.Module): 43 | def __init__(self, n_features, n_hidden='20', n_output=20, 44 | bias=False, 45 | kernel_size=3): 46 | super().__init__() 47 | 48 | hidden_dim, n_layers = _handle_n_hidden(n_hidden) 49 | 50 | self.input_fc = torch.nn.Linear(n_features, hidden_dim, bias=bias) 51 | channels = [hidden_dim] * n_layers + [n_output] 52 | self.net = torch.nn.Sequential(*[ 53 | ConvBlock( 54 | channels[i - 1] if i > 0 else hidden_dim, 55 | channels[i], 56 | kernel_size=kernel_size, 57 | dilation=2 ** i, 58 | final=(i == len(channels) - 1), 59 | ) 60 | for i in range(len(channels)) 61 | ]) 62 | 63 | def forward(self, x): 64 | x = self.input_fc(x) 65 | x = x.transpose(1, 2) 66 | x = self.net(x) 67 | x = x.transpose(1, 2) 68 | x = F.max_pool1d( 69 | x.transpose(1, 2), 70 | kernel_size=x.size(1) 71 | ).transpose(1, 2).squeeze(1) 72 | return x 73 | -------------------------------------------------------------------------------- /deepod/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from deepod.metrics._anomaly_detection import auc_roc 2 | from deepod.metrics._anomaly_detection import auc_pr 3 | from deepod.metrics._anomaly_detection import tabular_metrics 4 | from deepod.metrics._anomaly_detection import ts_metrics 5 | from deepod.metrics._tsad_adjustment import point_adjustment 6 | from deepod.metrics._anomaly_detection import ts_metrics_enhanced 7 | 8 | 9 | __all__ = [ 10 | 'auc_pr', 11 | 'auc_roc', 12 | 'tabular_metrics', 13 | 'ts_metrics', 14 | 'point_adjustment', 15 | 'ts_metrics_enhanced' 16 | ] -------------------------------------------------------------------------------- /deepod/metrics/_tsad_adjustment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def point_adjustment(y_true, y_score): 5 | """ 6 | adjust the score for segment detection. i.e., for each ground-truth anomaly segment, 7 | use the maximum score as the score of all points in that segment. This corresponds to point-adjust f1-score. 8 | *This function is copied/modified from the source code in [Zhihan Li et al. KDD21]* 9 | 10 | Args: 11 | 12 | y_true (np.array, required): 13 | Data label, 0 indicates normal timestamp, and 1 is anomaly. 14 | 15 | y_score (np.array, required): 16 | Predicted anomaly scores, higher score indicates higher likelihoods to be anomaly. 17 | 18 | Returns: 19 | 20 | np.array: 21 | Adjusted anomaly scores. 22 | 23 | """ 24 | score = y_score.copy() 25 | assert len(score) == len(y_true) 26 | splits = np.where(y_true[1:] != y_true[:-1])[0] + 1 27 | is_anomaly = y_true[0] == 1 28 | pos = 0 29 | for sp in splits: 30 | if is_anomaly: 31 | score[pos:sp] = np.max(score[pos:sp]) 32 | is_anomaly = not is_anomaly 33 | pos = sp 34 | sp = len(y_true) 35 | if is_anomaly: 36 | score[pos:sp] = np.max(score[pos:sp]) 37 | return score 38 | -------------------------------------------------------------------------------- /deepod/metrics/affiliation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/affiliation/__init__.py -------------------------------------------------------------------------------- /deepod/metrics/affiliation/_affiliation_zone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from ._integral_interval import interval_intersection 4 | 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 6 | """ 7 | Helper for `E_gt_func` 8 | 9 | :param j: index from 0 to len(Js) (included) on which to get the start 10 | :param Js: ground truth events, as a list of couples 11 | :param Trange: range of the series where Js is included 12 | :return: generalized start such that the middle of t_start and t_stop 13 | always gives the affiliation zone 14 | """ 15 | b = max(Trange) 16 | n = len(Js) 17 | if j == n: 18 | return(2*b - t_stop(n-1, Js, Trange)) 19 | else: 20 | return(Js[j][0]) 21 | 22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)): 23 | """ 24 | Helper for `E_gt_func` 25 | 26 | :param j: index from 0 to len(Js) (included) on which to get the stop 27 | :param Js: ground truth events, as a list of couples 28 | :param Trange: range of the series where Js is included 29 | :return: generalized stop such that the middle of t_start and t_stop 30 | always gives the affiliation zone 31 | """ 32 | if j == -1: 33 | a = min(Trange) 34 | return(2*a - t_start(0, Js, Trange)) 35 | else: 36 | return(Js[j][1]) 37 | 38 | def E_gt_func(j, Js, Trange): 39 | """ 40 | Get the affiliation zone of element j of the ground truth 41 | 42 | :param j: index from 0 to len(Js) (excluded) on which to get the zone 43 | :param Js: ground truth events, as a list of couples 44 | :param Trange: range of the series where Js is included, can 45 | be (-math.inf, math.inf) for distance measures 46 | :return: affiliation zone of element j of the ground truth represented 47 | as a couple 48 | """ 49 | range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2 50 | range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2 51 | return((range_left, range_right)) 52 | 53 | def get_all_E_gt_func(Js, Trange): 54 | """ 55 | Get the affiliation partition from the ground truth point of view 56 | 57 | :param Js: ground truth events, as a list of couples 58 | :param Trange: range of the series where Js is included, can 59 | be (-math.inf, math.inf) for distance measures 60 | :return: affiliation partition of the events 61 | """ 62 | # E_gt is the limit of affiliation/attraction for each ground truth event 63 | E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))] 64 | return(E_gt) 65 | 66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]): 67 | """ 68 | Cut the events into the affiliation zones 69 | The presentation given here is from the ground truth point of view, 70 | but it is also used in the reversed direction in the main function. 71 | 72 | :param Is: events as a list of couples 73 | :param E_gt: range of the affiliation zones 74 | :return: a list of list of intervals (each interval represented by either 75 | a couple or None for empty interval). The outer list is indexed by each 76 | affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`. 77 | """ 78 | out = [None] * len(E_gt) 79 | for j in range(len(E_gt)): 80 | E_gt_j = E_gt[j] 81 | discarded_idx_before = [I[1] < E_gt_j[0] for I in Is] # end point of predicted I is before the begin of E 82 | discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E 83 | kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)] 84 | Is_j = [x for x, y in zip(Is, kept_index)] 85 | out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j] 86 | return(out) 87 | -------------------------------------------------------------------------------- /deepod/metrics/affiliation/_single_ground_truth_event.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import math 4 | from ._affiliation_zone import ( 5 | get_all_E_gt_func, 6 | affiliation_partition) 7 | from ._integral_interval import ( 8 | integral_interval_distance, 9 | integral_interval_probaCDF_precision, 10 | integral_interval_probaCDF_recall, 11 | interval_length, 12 | sum_interval_lengths) 13 | 14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 15 | """ 16 | Compute the individual average distance from Is to a single ground truth J 17 | 18 | :param Is: list of predicted events within the affiliation zone of J 19 | :param J: couple representating the start and stop of a ground truth interval 20 | :return: individual average precision directed distance number 21 | """ 22 | if all([I is None for I in Is]): # no prediction in the current area 23 | return(math.nan) # undefined 24 | return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is)) 25 | 26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 27 | """ 28 | Compute the individual precision probability from Is to a single ground truth J 29 | 30 | :param Is: list of predicted events within the affiliation zone of J 31 | :param J: couple representating the start and stop of a ground truth interval 32 | :param E: couple representing the start and stop of the zone of affiliation of J 33 | :return: individual precision probability in [0, 1], or math.nan if undefined 34 | """ 35 | if all([I is None for I in Is]): # no prediction in the current area 36 | return(math.nan) # undefined 37 | return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is)) 38 | 39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)): 40 | """ 41 | Compute the individual average distance from a single J to the predictions Is 42 | 43 | :param Is: list of predicted events within the affiliation zone of J 44 | :param J: couple representating the start and stop of a ground truth interval 45 | :return: individual average recall directed distance number 46 | """ 47 | Is = [I for I in Is if I is not None] # filter possible None in Is 48 | if len(Is) == 0: # there is no prediction in the current area 49 | return(math.inf) 50 | E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf)) # here from the point of view of the predictions 51 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 52 | return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J)) 53 | 54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)): 55 | """ 56 | Compute the individual recall probability from a single ground truth J to Is 57 | 58 | :param Is: list of predicted events within the affiliation zone of J 59 | :param J: couple representating the start and stop of a ground truth interval 60 | :param E: couple representing the start and stop of the zone of affiliation of J 61 | :return: individual recall probability in [0, 1] 62 | """ 63 | Is = [I for I in Is if I is not None] # filter possible None in Is 64 | if len(Is) == 0: # there is no prediction in the current area 65 | return(0) 66 | E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions 67 | Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is 68 | return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J)) 69 | -------------------------------------------------------------------------------- /deepod/metrics/affiliation/generics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from itertools import groupby 4 | from operator import itemgetter 5 | import math 6 | import gzip 7 | import glob 8 | import os 9 | 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]): 11 | """ 12 | Convert a binary vector (indicating 1 for the anomalous instances) 13 | to a list of events. The events are considered as durations, 14 | i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1). 15 | 16 | :param vector: a list of elements belonging to {0, 1} 17 | :return: a list of couples, each couple representing the start and stop of 18 | each event 19 | """ 20 | positive_indexes = [idx for idx, val in enumerate(vector) if val > 0] 21 | events = [] 22 | for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]): 23 | cur_cut = list(map(itemgetter(1), g)) 24 | events.append((cur_cut[0], cur_cut[-1])) 25 | 26 | # Consistent conversion in case of range anomalies (for indexes): 27 | # A positive index i is considered as the interval [i, i+1), 28 | # so the last index should be moved by 1 29 | events = [(x, y+1) for (x,y) in events] 30 | 31 | return(events) 32 | 33 | def infer_Trange(events_pred, events_gt): 34 | """ 35 | Given the list of events events_pred and events_gt, get the 36 | smallest possible Trange corresponding to the start and stop indexes 37 | of the whole series. 38 | Trange will not influence the measure of distances, but will impact the 39 | measures of probabilities. 40 | 41 | :param events_pred: a list of couples corresponding to predicted events 42 | :param events_gt: a list of couples corresponding to ground truth events 43 | :return: a couple corresponding to the smallest range containing the events 44 | """ 45 | if len(events_gt) == 0: 46 | raise ValueError('The gt events should contain at least one event') 47 | if len(events_pred) == 0: 48 | # empty prediction, base Trange only on events_gt (which is non empty) 49 | return(infer_Trange(events_gt, events_gt)) 50 | 51 | min_pred = min([x[0] for x in events_pred]) 52 | min_gt = min([x[0] for x in events_gt]) 53 | max_pred = max([x[1] for x in events_pred]) 54 | max_gt = max([x[1] for x in events_gt]) 55 | Trange = (min(min_pred, min_gt), max(max_pred, max_gt)) 56 | return(Trange) 57 | 58 | def has_point_anomalies(events): 59 | """ 60 | Checking whether events contain point anomalies, i.e. 61 | events starting and stopping at the same time. 62 | 63 | :param events: a list of couples corresponding to predicted events 64 | :return: True is the events have any point anomalies, False otherwise 65 | """ 66 | if len(events) == 0: 67 | return(False) 68 | return(min([x[1] - x[0] for x in events]) == 0) 69 | 70 | def _sum_wo_nan(vec): 71 | """ 72 | Sum of elements, ignoring math.isnan ones 73 | 74 | :param vec: vector of floating numbers 75 | :return: sum of the elements, ignoring math.isnan ones 76 | """ 77 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 78 | return(sum(vec_wo_nan)) 79 | 80 | def _len_wo_nan(vec): 81 | """ 82 | Count of elements, ignoring math.isnan ones 83 | 84 | :param vec: vector of floating numbers 85 | :return: count of the elements, ignoring math.isnan ones 86 | """ 87 | vec_wo_nan = [e for e in vec if not math.isnan(e)] 88 | return(len(vec_wo_nan)) 89 | 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'): 91 | """ 92 | Load a file compressed with gz, such that each line of the 93 | file is either 0 (representing a normal instance) or 1 (representing) 94 | an anomalous instance. 95 | :param filename: file path to the gz compressed file 96 | :return: list of integers with either 0 or 1 97 | """ 98 | with gzip.open(filename, 'rb') as f: 99 | content = f.read().splitlines() 100 | content = [int(x) for x in content] 101 | return(content) 102 | 103 | def read_all_as_events(): 104 | """ 105 | Load the files contained in the folder `data/` and convert 106 | to events. The length of the series is kept. 107 | The convention for the file name is: `dataset_algorithm.gz` 108 | :return: two dictionaries: 109 | - the first containing the list of events for each dataset and algorithm, 110 | - the second containing the range of the series for each dataset 111 | """ 112 | filepaths = glob.glob('data/*.gz') 113 | datasets = dict() 114 | Tranges = dict() 115 | for filepath in filepaths: 116 | vector = read_gz_data(filepath) 117 | events = convert_vector_to_events(vector) 118 | # ad hoc cut for those files 119 | cut_filepath = (os.path.split(filepath)[1]).split('_') 120 | data_name = cut_filepath[0] 121 | algo_name = (cut_filepath[1]).split('.')[0] 122 | if not data_name in datasets: 123 | datasets[data_name] = dict() 124 | Tranges[data_name] = (0, len(vector)) 125 | datasets[data_name][algo_name] = events 126 | return(datasets, Tranges) 127 | 128 | def f1_func(p, r): 129 | """ 130 | Compute the f1 function 131 | :param p: precision numeric value 132 | :param r: recall numeric value 133 | :return: f1 numeric value 134 | """ 135 | return(2*p*r/(p+r)) 136 | -------------------------------------------------------------------------------- /deepod/metrics/affiliation/metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | from .generics import ( 4 | infer_Trange, 5 | has_point_anomalies, 6 | _len_wo_nan, 7 | _sum_wo_nan, 8 | read_all_as_events) 9 | from ._affiliation_zone import ( 10 | get_all_E_gt_func, 11 | affiliation_partition) 12 | from ._single_ground_truth_event import ( 13 | affiliation_precision_distance, 14 | affiliation_recall_distance, 15 | affiliation_precision_proba, 16 | affiliation_recall_proba) 17 | 18 | def test_events(events): 19 | """ 20 | Verify the validity of the input events 21 | :param events: list of events, each represented by a couple (start, stop) 22 | :return: None. Raise an error for incorrect formed or non ordered events 23 | """ 24 | if type(events) is not list: 25 | raise TypeError('Input `events` should be a list of couples') 26 | if not all([type(x) is tuple for x in events]): 27 | raise TypeError('Input `events` should be a list of tuples') 28 | if not all([len(x) == 2 for x in events]): 29 | raise ValueError('Input `events` should be a list of couples (start, stop)') 30 | if not all([x[0] <= x[1] for x in events]): 31 | raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop') 32 | if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]): 33 | raise ValueError('Couples of input `events` should be disjoint and ordered') 34 | 35 | def pr_from_events(events_pred, events_gt, Trange): 36 | """ 37 | Compute the affiliation metrics including the precision/recall in [0,1], 38 | along with the individual precision/recall distances and probabilities 39 | 40 | :param events_pred: list of predicted events, each represented by a couple 41 | indicating the start and the stop of the event 42 | :param events_gt: list of ground truth events, each represented by a couple 43 | indicating the start and the stop of the event 44 | :param Trange: range of the series where events_pred and events_gt are included, 45 | represented as a couple (start, stop) 46 | :return: dictionary with precision, recall, and the individual metrics 47 | """ 48 | # testing the inputs 49 | test_events(events_pred) 50 | test_events(events_gt) 51 | 52 | # other tests 53 | minimal_Trange = infer_Trange(events_pred, events_gt) 54 | if not Trange[0] <= minimal_Trange[0]: 55 | raise ValueError('`Trange` should include all the events') 56 | if not minimal_Trange[1] <= Trange[1]: 57 | raise ValueError('`Trange` should include all the events') 58 | 59 | if len(events_gt) == 0: 60 | raise ValueError('Input `events_gt` should have at least one event') 61 | 62 | if has_point_anomalies(events_pred) or has_point_anomalies(events_gt): 63 | raise ValueError('Cannot manage point anomalies currently') 64 | 65 | if Trange is None: 66 | # Set as default, but Trange should be indicated if probabilities are used 67 | raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function') 68 | 69 | E_gt = get_all_E_gt_func(events_gt, Trange) 70 | aff_partition = affiliation_partition(events_pred, E_gt) 71 | 72 | # Computing precision distance 73 | d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 74 | 75 | # Computing recall distance 76 | d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)] 77 | 78 | # Computing precision 79 | p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 80 | 81 | # Computing recall 82 | p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)] 83 | 84 | if _len_wo_nan(p_precision) > 0: 85 | p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision) 86 | else: 87 | p_precision_average = p_precision[0] # math.nan 88 | p_recall_average = sum(p_recall) / len(p_recall) 89 | 90 | dict_out = dict({'Affiliation_Precision': p_precision_average, 91 | 'Affiliation_Recall': p_recall_average, 92 | 'individual_precision_probabilities': p_precision, 93 | 'individual_recall_probabilities': p_recall, 94 | 'individual_precision_distances': d_precision, 95 | 'individual_recall_distances': d_recall}) 96 | return(dict_out) 97 | 98 | def produce_all_results(): 99 | """ 100 | Produce the affiliation precision/recall for all files 101 | contained in the `data` repository 102 | :return: a dictionary indexed by data names, each containing a dictionary 103 | indexed by algorithm names, each containing the results of the affiliation 104 | metrics (precision, recall, individual probabilities and distances) 105 | """ 106 | datasets, Tranges = read_all_as_events() # read all the events in folder `data` 107 | results = dict() 108 | for data_name in datasets.keys(): 109 | results_data = dict() 110 | for algo_name in datasets[data_name].keys(): 111 | if algo_name != 'groundtruth': 112 | results_data[algo_name] = pr_from_events(datasets[data_name][algo_name], 113 | datasets[data_name]['groundtruth'], 114 | Tranges[data_name]) 115 | results[data_name] = results_data 116 | return(results) 117 | -------------------------------------------------------------------------------- /deepod/metrics/vus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/vus/__init__.py -------------------------------------------------------------------------------- /deepod/metrics/vus/metrics.py: -------------------------------------------------------------------------------- 1 | from deepod.metrics.vus.utils.metrics import metricor 2 | 3 | 4 | def get_range_vus_roc(score, labels, slidingWindow): 5 | R_AUC_ROC, R_AUC_PR, _, _, _ = metricor().RangeAUC(labels=labels, score=score, 6 | window=slidingWindow, plot_ROC=True) 7 | _, _, _, _, VUS_ROC, VUS_PR = metricor().RangeAUC_volume(labels_original=labels, 8 | score=score, 9 | windowSize=2*slidingWindow) 10 | 11 | metrics = {'R_AUC_ROC': R_AUC_ROC, 'R_AUC_PR': R_AUC_PR, 'VUS_ROC': VUS_ROC, 'VUS_PR': VUS_PR} 12 | 13 | return metrics 14 | -------------------------------------------------------------------------------- /deepod/metrics/vus/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/metrics/vus/utils/__init__.py -------------------------------------------------------------------------------- /deepod/models/__init__.py: -------------------------------------------------------------------------------- 1 | from deepod.models.tabular.dsvdd import DeepSVDD 2 | from deepod.models.tabular.rca import RCA 3 | from deepod.models.tabular.dsad import DeepSAD 4 | from deepod.models.tabular.repen import REPEN 5 | from deepod.models.tabular.neutral import NeuTraL 6 | from deepod.models.tabular.dif import DeepIsolationForest 7 | from deepod.models.tabular.slad import SLAD 8 | from deepod.models.tabular.rdp import RDP 9 | from deepod.models.tabular.feawad import FeaWAD 10 | from deepod.models.tabular.devnet import DevNet 11 | from deepod.models.tabular.prenet import PReNet 12 | from deepod.models.tabular.goad import GOAD 13 | from deepod.models.tabular.icl import ICL 14 | from deepod.models.tabular.rosas import RoSAS 15 | 16 | from deepod.models.time_series.prenet import PReNetTS 17 | from deepod.models.time_series.dsad import DeepSADTS 18 | from deepod.models.time_series.devnet import DevNetTS 19 | 20 | from deepod.models.time_series.dif import DeepIsolationForestTS 21 | from deepod.models.time_series.dsvdd import DeepSVDDTS 22 | 23 | from deepod.models.time_series.dcdetector import DCdetector 24 | from deepod.models.time_series.timesnet import TimesNet 25 | from deepod.models.time_series.anomalytransformer import AnomalyTransformer 26 | from deepod.models.time_series.ncad import NCAD 27 | from deepod.models.time_series.tranad import TranAD 28 | from deepod.models.time_series.couta import COUTA 29 | from deepod.models.time_series.usad import USAD 30 | from deepod.models.time_series.tcned import TcnED 31 | 32 | __all__ = [ 33 | 'RCA', 'DeepSVDD', 'GOAD', 'NeuTraL', 'RDP', 'ICL', 'SLAD', 'DeepIsolationForest', 34 | 'DeepSAD', 'DevNet', 'PReNet', 'FeaWAD', 'REPEN', 'RoSAS', 35 | 'DCdetector', 'TimesNet', 'AnomalyTransformer', 'NCAD', 36 | 'TranAD', 'COUTA', 'USAD', 'TcnED', 37 | 'DeepIsolationForestTS', 'DeepSVDDTS', 38 | 'PReNetTS', 'DeepSADTS', 'DevNetTS' 39 | ] -------------------------------------------------------------------------------- /deepod/models/tabular/__init__.py: -------------------------------------------------------------------------------- 1 | from .rca import RCA 2 | from .dsvdd import DeepSVDD 3 | from .dsad import DeepSAD 4 | from .devnet import DevNet 5 | from .prenet import PReNet 6 | from .feawad import FeaWAD 7 | from .repen import REPEN 8 | from .goad import GOAD 9 | from .rca import RCA 10 | from .rdp import RDP 11 | from .icl import ICL 12 | from .slad import SLAD 13 | from .neutral import NeuTraL 14 | from .dif import DeepIsolationForest 15 | from .rosas import RoSAS 16 | 17 | __all__ = ['RCA', 'DeepSVDD', 'DeepSAD', 'DevNet', 'PReNet', 'FeaWAD', 18 | 'REPEN', 'GOAD', 'NeuTraL', 'RDP', 'ICL', 'SLAD', 'DeepIsolationForest', 'RoSAS'] -------------------------------------------------------------------------------- /deepod/models/tabular/devnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Deep anomaly detection with deviation networks. 4 | PyTorch's implementation 5 | @Author: Hongzuo Xu 6 | """ 7 | 8 | from deepod.core.base_model import BaseDeepAD 9 | from deepod.core.networks.base_networks import MLPnet 10 | from torch.utils.data import DataLoader, TensorDataset 11 | from torch.utils.data.sampler import WeightedRandomSampler 12 | import torch 13 | import numpy as np 14 | 15 | 16 | class DevNet(BaseDeepAD): 17 | """ 18 | Deviation Networks for Weakly-supervised Anomaly Detection (KDD'19) 19 | :cite:`pang2019deep` 20 | 21 | Args: 22 | epochs (int, optional): 23 | number of training epochs (default: 100). 24 | batch_size (int, optional): 25 | number of samples in a mini-batch (default: 64) 26 | lr (float, optional): 27 | learning rate (default: 1e-3) 28 | rep_dim (int, optional): 29 | it is for consistency, unused in this model. 30 | hidden_dims (list, str or int, optional): 31 | number of neural units in hidden layers, 32 | If list, each item is a layer; 33 | If str, neural units of hidden layers are split by comma; 34 | If int, number of neural units of single hidden layer 35 | (default: '100,50') 36 | act (str, optional): 37 | activation layer name, 38 | choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] 39 | (default='ReLU') 40 | bias (bool, optional): 41 | Additive bias in linear layer (default=False) 42 | margin (float, optional): 43 | margin value used in the deviation loss function (default=5.) 44 | l (int, optional): 45 | the size of samples of the Gaussian distribution 46 | used in the deviation loss function (default=5000.) 47 | epoch_steps (int, optional): 48 | Maximum steps in an epoch. 49 | If -1, all the batches will be processed 50 | (default=-1) 51 | prt_steps (int, optional): 52 | Number of epoch intervals per printing (default=10) 53 | device (str, optional): 54 | torch device (default='cuda'). 55 | verbose (int, optional): 56 | Verbosity mode (default=1) 57 | random_state (int, optional): 58 | the seed used by the random (default=42) 59 | """ 60 | def __init__(self, epochs=100, batch_size=64, lr=1e-3, 61 | network='MLP', 62 | rep_dim=128, hidden_dims='100,50', act='ReLU', bias=False, 63 | margin=5., l=5000, 64 | epoch_steps=-1, prt_steps=10, device='cuda', 65 | verbose=2, random_state=42): 66 | super(DevNet, self).__init__( 67 | data_type='tabular', model_name='DevNet', epochs=epochs, batch_size=batch_size, lr=lr, 68 | network=network, 69 | epoch_steps=epoch_steps, prt_steps=prt_steps, device=device, 70 | verbose=verbose, random_state=random_state 71 | ) 72 | 73 | self.margin = margin 74 | self.l = l 75 | 76 | self.hidden_dims = hidden_dims 77 | self.act = act 78 | self.bias = bias 79 | 80 | return 81 | 82 | def training_prepare(self, X, y): 83 | """ 84 | 85 | Args: 86 | X (np.array): input data array 87 | y (np.array): input data label 88 | 89 | Returns: 90 | train_loader (torch.DataLoader): data loader of training data 91 | net (torch.nn.Module): neural network 92 | criterion (torch.nn.Module): loss function 93 | 94 | """ 95 | # loader: balanced loader, a mini-batch contains a half of normal data and a half of anomalies 96 | n_anom = np.where(y == 1)[0].shape[0] 97 | n_norm = self.n_samples - n_anom 98 | weight_map = {0: 1. / n_norm, 1: 1. / n_anom} 99 | 100 | dataset = TensorDataset(torch.from_numpy(X).float(), torch.from_numpy(y).long()) 101 | sampler = WeightedRandomSampler(weights=[weight_map[label.item()] for data, label in dataset], 102 | num_samples=len(dataset), replacement=True) 103 | train_loader = DataLoader(dataset, batch_size=self.batch_size, sampler=sampler) 104 | 105 | network_params = { 106 | 'n_features': self.n_features, 107 | 'n_hidden': self.hidden_dims, 108 | 'n_output': 1, 109 | 'activation': self.act, 110 | 'bias': self.bias 111 | } 112 | net = MLPnet(**network_params).to(self.device) 113 | 114 | criterion = DevLoss(margin=self.margin, l=self.l) 115 | 116 | if self.verbose >= 2: 117 | print(net) 118 | 119 | return train_loader, net, criterion 120 | 121 | def inference_prepare(self, X): 122 | test_loader = DataLoader(X, batch_size=self.batch_size, 123 | drop_last=False, shuffle=False) 124 | self.criterion.reduction = 'none' 125 | return test_loader 126 | 127 | def training_forward(self, batch_x, net, criterion): 128 | batch_x, batch_y = batch_x 129 | batch_x = batch_x.float().to(self.device) 130 | batch_y = batch_y.to(self.device) 131 | pred = net(batch_x) 132 | loss = criterion(batch_y, pred) 133 | return loss 134 | 135 | def inference_forward(self, batch_x, net, criterion): 136 | batch_x = batch_x.float().to(self.device) 137 | s = net(batch_x) 138 | s = s.view(-1) 139 | batch_z = batch_x 140 | return batch_z, s 141 | 142 | 143 | class DevLoss(torch.nn.Module): 144 | """ 145 | Deviation Loss 146 | 147 | Parameters 148 | ---------- 149 | margin: float, optional (default=5.) 150 | Center of the pre-defined hyper-sphere in the representation space 151 | 152 | l: int, optional (default=5000.) 153 | the size of samples of the Gaussian distribution used in the deviation loss function 154 | 155 | reduction: str, optional (default='mean') 156 | choice = [``'none'`` | ``'mean'`` | ``'sum'``] 157 | - If ``'none'``: no reduction will be applied; 158 | - If ``'mean'``: the sum of the output will be divided by the number of 159 | elements in the output; 160 | - If ``'sum'``: the output will be summed 161 | 162 | """ 163 | def __init__(self, margin=5., l=5000, reduction='mean'): 164 | super(DevLoss, self).__init__() 165 | self.margin = margin 166 | self.loss_l = l 167 | self.reduction = reduction 168 | return 169 | 170 | def forward(self, y_true, y_pred): 171 | ref = torch.randn(self.loss_l) # from the normal dataset 172 | dev = (y_pred - torch.mean(ref)) / torch.std(ref) 173 | inlier_loss = torch.abs(dev) 174 | outlier_loss = torch.abs(torch.max(self.margin - dev, torch.zeros_like(dev))) 175 | loss = (1 - y_true) * inlier_loss + y_true * outlier_loss 176 | 177 | if self.reduction == 'mean': 178 | return torch.mean(loss) 179 | elif self.reduction == 'sum': 180 | return torch.sum(loss) 181 | elif self.reduction == 'none': 182 | return loss 183 | 184 | return loss 185 | -------------------------------------------------------------------------------- /deepod/models/tabular/neutral.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Neural Transformation Learning-based Anomaly Detection 4 | this script is partially adapted from https://github.com/boschresearch/NeuTraL-AD (AGPL-3.0 license) 5 | @Author: Hongzuo Xu 6 | """ 7 | 8 | from deepod.core.base_model import BaseDeepAD 9 | from deepod.core.networks.base_networks import MLPnet 10 | from torch.utils.data import DataLoader 11 | import torch.nn.functional as F 12 | import torch 13 | import numpy as np 14 | 15 | 16 | class NeuTraL(BaseDeepAD): 17 | """ 18 | Neural Transformation Learning-based Anomaly Detection (ICML'21) 19 | 20 | """ 21 | def __init__(self, epochs=100, batch_size=64, lr=1e-3, 22 | n_trans=11, trans_type='residual', temp=0.1, 23 | rep_dim=128, hidden_dims='100,50', trans_hidden_dims=50, 24 | act='LeakyReLU', bias=False, 25 | epoch_steps=-1, prt_steps=10, device='cuda', 26 | verbose=1, random_state=42): 27 | super(NeuTraL, self).__init__( 28 | model_name='NeuTraL', epochs=epochs, batch_size=batch_size, lr=lr, 29 | epoch_steps=epoch_steps, prt_steps=prt_steps, device=device, 30 | verbose=verbose, random_state=random_state 31 | ) 32 | 33 | self.n_trans = n_trans 34 | self.trans_type = trans_type 35 | self.temp = temp 36 | 37 | self.trans_hidden_dims = trans_hidden_dims 38 | self.enc_hidden_dims = hidden_dims 39 | self.rep_dim = rep_dim 40 | self.act = act 41 | self.bias = bias 42 | return 43 | 44 | def training_prepare(self, X, y): 45 | train_loader = DataLoader(X, batch_size=self.batch_size, shuffle=True) 46 | 47 | net = TabNeutralADNet( 48 | n_features=self.n_features, 49 | n_trans=self.n_trans, 50 | trans_type=self.trans_type, 51 | enc_hidden_dims=self.enc_hidden_dims, 52 | trans_hidden_dims=self.trans_hidden_dims, 53 | activation=self.act, 54 | bias=self.bias, 55 | rep_dim=self.rep_dim, 56 | device=self.device 57 | ) 58 | 59 | criterion = DCL(temperature=self.temp) 60 | 61 | if self.verbose >=2: 62 | print(net) 63 | 64 | return train_loader, net, criterion 65 | 66 | def inference_prepare(self, X): 67 | test_loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, shuffle=False) 68 | self.criterion.reduction = 'none' 69 | return test_loader 70 | 71 | def training_forward(self, batch_x, net, criterion): 72 | batch_x = batch_x.float().to(self.device) 73 | z = net(batch_x) 74 | loss = criterion(z) 75 | return loss 76 | 77 | def inference_forward(self, batch_x, net, criterion): 78 | batch_x = batch_x.float().to(self.device) 79 | batch_z = net(batch_x) 80 | s = criterion(batch_z) 81 | return batch_z, s 82 | 83 | 84 | class TabNeutralADNet(torch.nn.Module): 85 | """ 86 | network class of NeuTraL for tabular data 87 | 88 | Parameters 89 | ---------- 90 | n_features: int 91 | dimensionality of input data 92 | 93 | n_trans: int 94 | the number of transformation times 95 | 96 | trans_type: str, default='residual' 97 | transformation type 98 | 99 | enc_hidden_dims: list or str or int 100 | the number of neural units of hidden layers in encoder net 101 | 102 | trans_hidden_dims: list or str or int 103 | the number of neural units of hidden layers in transformation net 104 | 105 | rep_dim: int 106 | representation dimensionality 107 | 108 | activation: str 109 | activation layer name 110 | 111 | device: str 112 | device 113 | """ 114 | def __init__(self, n_features, n_trans=11, trans_type='residual', 115 | enc_hidden_dims='24,24,24,24', trans_hidden_dims=24, 116 | rep_dim=24, 117 | activation='ReLU', 118 | bias=False, 119 | device='cuda'): 120 | super(TabNeutralADNet, self).__init__() 121 | 122 | self.enc = MLPnet( 123 | n_features=n_features, 124 | n_hidden=enc_hidden_dims, 125 | n_output=rep_dim, 126 | activation=activation, 127 | bias=bias, 128 | batch_norm=False 129 | ) 130 | self.trans = torch.nn.ModuleList( 131 | [MLPnet(n_features=n_features, 132 | n_hidden=trans_hidden_dims, 133 | n_output=n_features, 134 | activation=activation, 135 | bias=bias, 136 | batch_norm=False) for _ in range(n_trans)] 137 | ) 138 | 139 | self.trans.to(device) 140 | self.enc.to(device) 141 | 142 | self.n_trans = n_trans 143 | self.trans_type = trans_type 144 | self.z_dim = rep_dim 145 | 146 | def forward(self, x): 147 | x_transform = torch.empty(x.shape[0], self.n_trans, x.shape[-1]).to(x) 148 | 149 | for i in range(self.n_trans): 150 | mask = self.trans[i](x) 151 | if self.trans_type == 'forward': 152 | x_transform[:, i] = mask 153 | elif self.trans_type == 'mul': 154 | mask = torch.sigmoid(mask) 155 | x_transform[:, i] = mask * x 156 | elif self.trans_type == 'residual': 157 | x_transform[:, i] = mask + x 158 | 159 | x_cat = torch.cat([x.unsqueeze(1), x_transform], 1) 160 | zs = self.enc(x_cat.reshape(-1, x.shape[-1])) 161 | zs = zs.reshape(x.shape[0], self.n_trans+1, self.z_dim) 162 | return zs 163 | 164 | 165 | class DCL(torch.nn.Module): 166 | def __init__(self, temperature=0.1, reduction='mean'): 167 | super(DCL, self).__init__() 168 | self.temp = temperature 169 | self.reduction = reduction 170 | 171 | def forward(self, z): 172 | z = F.normalize(z, p=2, dim=-1) 173 | z_ori = z[:, 0] # n,z 174 | z_trans = z[:, 1:] # n,k-1, z 175 | batch_size, n_trans, z_dim = z.shape 176 | 177 | sim_matrix = torch.exp(torch.matmul(z, z.permute(0, 2, 1) / self.temp)) # n,k,k 178 | mask = (torch.ones_like(sim_matrix).to(z) - torch.eye(n_trans).unsqueeze(0).to(z)).bool() 179 | sim_matrix = sim_matrix.masked_select(mask).view(batch_size, n_trans, -1) 180 | trans_matrix = sim_matrix[:, 1:].sum(-1) # n,k-1 181 | 182 | pos_sim = torch.exp(torch.sum(z_trans * z_ori.unsqueeze(1), -1) / self.temp) # n,k-1 183 | K = n_trans - 1 184 | scale = 1 / np.abs(K*np.log(1.0 / K)) 185 | 186 | loss = (torch.log(trans_matrix) - torch.log(pos_sim)) * scale 187 | loss = loss.sum(1) 188 | 189 | reduction = self.reduction 190 | if reduction == 'mean': 191 | return torch.mean(loss) 192 | elif reduction == 'sum': 193 | return torch.sum(loss) 194 | elif reduction == 'none': 195 | return loss 196 | 197 | return loss 198 | -------------------------------------------------------------------------------- /deepod/models/tabular/rdp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Random distance prediction-based anomaly detection 4 | this script is partially adapted from https://github.com/billhhh/RDP 5 | @Author: Hongzuo Xu 6 | """ 7 | 8 | from deepod.core.base_model import BaseDeepAD 9 | from deepod.core.networks.base_networks import MLPnet 10 | from torch.utils.data import DataLoader 11 | import torch.nn.functional as F 12 | import torch 13 | import copy 14 | 15 | 16 | class RDP(BaseDeepAD): 17 | """ 18 | Unsupervised Representation Learning by Predicting Random Distances 19 | (IJCAI'20) 20 | 21 | Parameters 22 | ---------- 23 | epochs: int, optional (default=100) 24 | Number of training epochs 25 | 26 | batch_size: int, optional (default=64) 27 | Number of samples in a mini-batch 28 | 29 | lr: float, optional (default=1e-3) 30 | Learning rate 31 | 32 | rep_dim: int, optional (default=128) 33 | Dimensionality of the representation space 34 | 35 | hidden_dims: list, str or int, optional (default='100,50') 36 | Number of neural units in hidden layers 37 | - If list, each item is a layer 38 | - If str, neural units of hidden layers are split by comma 39 | - If int, number of neural units of single hidden layer 40 | 41 | act: str, optional (default='ReLU') 42 | activation layer name 43 | choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] 44 | 45 | bias: bool, optional (default=False) 46 | Additive bias in linear layer 47 | 48 | epoch_steps: int, optional (default=-1) 49 | Maximum steps in an epoch 50 | - If -1, all the batches will be processed 51 | 52 | prt_steps: int, optional (default=10) 53 | Number of epoch intervals per printing 54 | 55 | device: str, optional (default='cuda') 56 | torch device, 57 | 58 | verbose: int, optional (default=1) 59 | Verbosity mode 60 | 61 | random_state: int, optional (default=42) 62 | the seed used by the random 63 | """ 64 | def __init__(self, epochs=100, batch_size=64, lr=1e-3, 65 | rep_dim=128, hidden_dims='100,50', act='LeakyReLU', bias=False, 66 | epoch_steps=-1, prt_steps=10, device='cuda', 67 | verbose=2, random_state=42): 68 | super(RDP, self).__init__( 69 | model_name='RDP', epochs=epochs, batch_size=batch_size, lr=lr, 70 | epoch_steps=epoch_steps, prt_steps=prt_steps, device=device, 71 | verbose=verbose, random_state=random_state 72 | ) 73 | 74 | self.hidden_dims = hidden_dims 75 | self.rep_dim = rep_dim 76 | self.act = act 77 | self.bias = bias 78 | return 79 | 80 | def training_prepare(self, X, y): 81 | train_loader = DataLoader(X, batch_size=self.batch_size, shuffle=True) 82 | 83 | net = MLPnet( 84 | n_features=self.n_features, 85 | n_hidden=self.hidden_dims, n_output=self.rep_dim, 86 | activation=self.act, bias=self.bias, 87 | skip_connection=None, 88 | ).to(self.device) 89 | 90 | rp_net = copy.deepcopy(net) 91 | criterion = RDPLoss(rp_net) 92 | 93 | if self.verbose >= 2: 94 | print(net) 95 | 96 | return train_loader, net, criterion 97 | 98 | def inference_prepare(self, X): 99 | test_loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, shuffle=False) 100 | self.criterion.reduction = 'none' 101 | return test_loader 102 | 103 | def training_forward(self, batch_x, net, criterion): 104 | batch_x1 = batch_x[torch.randperm(batch_x.shape[0])] 105 | batch_x = batch_x.float().to(self.device) 106 | batch_x1 = batch_x1.float().to(self.device) 107 | z, z1 = net(batch_x), net(batch_x1) 108 | loss = criterion(z, z1, batch_x, batch_x1) 109 | return loss 110 | 111 | def inference_forward(self, batch_x, net, criterion): 112 | batch_x = batch_x.float().to(self.device) 113 | batch_x1 = batch_x[torch.randperm(batch_x.shape[0])] 114 | batch_z, batch_z1 = net(batch_x), net(batch_x1) 115 | s = criterion(batch_z, batch_z1, batch_x, batch_x1) 116 | return batch_z, s 117 | 118 | 119 | class RDPLoss(torch.nn.Module): 120 | def __init__(self, random_projection_net, reduction='mean'): 121 | super(RDPLoss, self).__init__() 122 | self.rp_net = random_projection_net 123 | self.mse = torch.nn.MSELoss(reduction=reduction) 124 | self.reduction = reduction 125 | 126 | def forward(self, rep, rep1, x, x1): 127 | rep_target = self.rp_net(x) 128 | rep1_target = self.rp_net(x1) 129 | 130 | d_target = torch.sum(F.normalize(rep_target, p=1, dim=1) * 131 | F.normalize(rep1_target, p=1, dim=1), dim=1) 132 | d_pred = torch.sum(F.normalize(rep, p=1, dim=1) * 133 | F.normalize(rep1, p=1, dim=1), dim=1) 134 | 135 | if self.reduction == 'mean' or self.reduction == 'sum': 136 | gap_loss = self.mse(rep, rep_target) 137 | rdp_loss = self.mse(d_target, d_pred) 138 | 139 | else: 140 | gap_loss = torch.mean(F.mse_loss(rep, rep_target, reduction='none'), dim=1) 141 | rdp_loss = F.mse_loss(d_target, d_pred, reduction='none') 142 | 143 | return gap_loss + rdp_loss 144 | -------------------------------------------------------------------------------- /deepod/models/time_series/__init__.py: -------------------------------------------------------------------------------- 1 | # unsupervised 2 | from .dif import DeepIsolationForestTS 3 | from .dsvdd import DeepSVDDTS 4 | from .tranad import TranAD 5 | from .usad import USAD 6 | from .couta import COUTA 7 | from .tcned import TcnED 8 | from .anomalytransformer import AnomalyTransformer 9 | from .timesnet import TimesNet 10 | from .dcdetector import DCdetector 11 | 12 | # weakly-supervised 13 | from .dsad import DeepSADTS 14 | from .devnet import DevNetTS 15 | from .prenet import PReNetTS 16 | 17 | 18 | __all__ = ['DeepIsolationForestTS', 'DeepSVDDTS', 'TranAD', 'USAD', 'COUTA', 19 | 'DeepSADTS', 'DevNetTS', 'PReNetTS', 'AnomalyTransformer', 'TimesNet', 'DCdetector'] 20 | -------------------------------------------------------------------------------- /deepod/test/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- -------------------------------------------------------------------------------- /deepod/test/test_anomalyTransformer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.anomalytransformer import AnomalyTransformer 20 | 21 | 22 | class TestAnomalyTransformer(unittest.TestCase): 23 | def setUp(self): 24 | train_file = 'data/omi-1/omi-1_train.csv' 25 | test_file = 'data/omi-1/omi-1_test.csv' 26 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 27 | test_df = pd.read_csv(test_file, index_col=0) 28 | y = test_df['label'].values 29 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 30 | self.Xts_train = train_df.values 31 | self.Xts_test = test_df.values 32 | self.yts_test = y 33 | 34 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 35 | self.clf = AnomalyTransformer(seq_len=100, stride=1, epochs=2, 36 | batch_size=32, k=3, lr=1e-4, 37 | device=device, random_state=42) 38 | self.clf.fit(self.Xts_train) 39 | 40 | def test_parameters(self): 41 | assert (hasattr(self.clf, 'decision_scores_') and 42 | self.clf.decision_scores_ is not None) 43 | assert (hasattr(self.clf, 'labels_') and 44 | self.clf.labels_ is not None) 45 | assert (hasattr(self.clf, 'threshold_') and 46 | self.clf.threshold_ is not None) 47 | 48 | def test_train_scores(self): 49 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 50 | 51 | def test_prediction_scores(self): 52 | pred_scores = self.clf.decision_function(self.Xts_test) 53 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.Xts_test) 57 | assert_equal(pred_labels.shape, self.yts_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 80 | 81 | assert_equal(pred_labels.shape, self.yts_test.shape) 82 | assert_equal(confidence.shape, self.yts_test.shape) 83 | assert (confidence.min() >= 0) 84 | assert (confidence.max() <= 1) 85 | 86 | # def test_prediction_proba_linear_confidence(self): 87 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 88 | # method='linear', 89 | # return_confidence=True) 90 | # assert (pred_proba.min() >= 0) 91 | # assert (pred_proba.max() <= 1) 92 | # 93 | # assert_equal(confidence.shape, self.y_test.shape) 94 | # assert (confidence.min() >= 0) 95 | # assert (confidence.max() <= 1) 96 | # 97 | # def test_fit_predict(self): 98 | # pred_labels = self.clf.fit_predict(self.X_train) 99 | # assert_equal(pred_labels.shape, self.y_train.shape) 100 | # 101 | # def test_fit_predict_score(self): 102 | # self.clf.fit_predict_score(self.X_test, self.y_test) 103 | # self.clf.fit_predict_score(self.X_test, self.y_test, 104 | # scoring='roc_auc_score') 105 | # self.clf.fit_predict_score(self.X_test, self.y_test, 106 | # scoring='prc_n_score') 107 | # with assert_raises(NotImplementedError): 108 | # self.clf.fit_predict_score(self.X_test, self.y_test, 109 | # scoring='something') 110 | # 111 | # def test_predict_rank(self): 112 | # pred_socres = self.clf.decision_function(self.X_test) 113 | # pred_ranks = self.clf._predict_rank(self.X_test) 114 | # 115 | # # assert the order is reserved 116 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 117 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 118 | # assert_array_less(-0.1, pred_ranks) 119 | # 120 | # def test_predict_rank_normalized(self): 121 | # pred_socres = self.clf.decision_function(self.X_test) 122 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 123 | # 124 | # # assert the order is reserved 125 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 126 | # assert_array_less(pred_ranks, 1.01) 127 | # assert_array_less(-0.1, pred_ranks) 128 | 129 | # def test_plot(self): 130 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 131 | # assert_array_less(0, os) 132 | 133 | # def test_model_clone(self): 134 | # clone_clf = clone(self.clf) 135 | 136 | def tearDown(self): 137 | pass 138 | 139 | 140 | if __name__ == '__main__': 141 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_couta.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | import numpy as np 9 | 10 | # noinspection PyProtectedMember 11 | from numpy.testing import assert_equal 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.couta import COUTA 20 | from deepod.metrics import ts_metrics_enhanced 21 | 22 | 23 | class TestCOUTA(unittest.TestCase): 24 | def setUp(self): 25 | train_file = 'data/omi-1/omi-1_train.csv' 26 | test_file = 'data/omi-1/omi-1_test.csv' 27 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 28 | test_df = pd.read_csv(test_file, index_col=0) 29 | y = test_df['label'].values 30 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 31 | self.Xts_train = train_df.values 32 | self.Xts_test = test_df.values 33 | self.yts_test = y 34 | 35 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 36 | self.clf = COUTA(seq_len=100, stride=5, 37 | epochs=5, hidden_dims=50, 38 | device=device, random_state=42) 39 | self.clf.fit(self.Xts_train) 40 | 41 | def test_parameters(self): 42 | assert (hasattr(self.clf, 'decision_scores_') and 43 | self.clf.decision_scores_ is not None) 44 | assert (hasattr(self.clf, 'labels_') and 45 | self.clf.labels_ is not None) 46 | assert (hasattr(self.clf, 'threshold_') and 47 | self.clf.threshold_ is not None) 48 | 49 | def test_train_scores(self): 50 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 51 | 52 | def test_prediction_scores(self): 53 | pred_scores = self.clf.decision_function(self.Xts_test) 54 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 55 | 56 | def test_metric(self): 57 | pred_scores = self.clf.decision_function(self.Xts_test) 58 | 59 | anomaly_ratio = 1 60 | thresh = np.percentile(pred_scores, 100 - anomaly_ratio) 61 | pred = (pred_scores > thresh).astype(int) 62 | metrics = ts_metrics_enhanced(self.yts_test, pred_scores, pred) 63 | print("metrics", metrics) 64 | 65 | def test_prediction_labels(self): 66 | pred_labels = self.clf.predict(self.Xts_test) 67 | assert_equal(pred_labels.shape, self.yts_test.shape) 68 | 69 | # def test_prediction_proba(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test) 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_linear(self): 75 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 76 | # assert (pred_proba.min() >= 0) 77 | # assert (pred_proba.max() <= 1) 78 | # 79 | # def test_prediction_proba_unify(self): 80 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 81 | # assert (pred_proba.min() >= 0) 82 | # assert (pred_proba.max() <= 1) 83 | # 84 | # def test_prediction_proba_parameter(self): 85 | # with assert_raises(ValueError): 86 | # self.clf.predict_proba(self.X_test, method='something') 87 | 88 | def test_prediction_labels_confidence(self): 89 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 90 | 91 | assert_equal(pred_labels.shape, self.yts_test.shape) 92 | assert_equal(confidence.shape, self.yts_test.shape) 93 | assert (confidence.min() >= 0) 94 | assert (confidence.max() <= 1) 95 | 96 | 97 | # def test_prediction_proba_linear_confidence(self): 98 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 99 | # method='linear', 100 | # return_confidence=True) 101 | # assert (pred_proba.min() >= 0) 102 | # assert (pred_proba.max() <= 1) 103 | # 104 | # assert_equal(confidence.shape, self.y_test.shape) 105 | # assert (confidence.min() >= 0) 106 | # assert (confidence.max() <= 1) 107 | # 108 | # def test_fit_predict(self): 109 | # pred_labels = self.clf.fit_predict(self.X_train) 110 | # assert_equal(pred_labels.shape, self.y_train.shape) 111 | # 112 | # def test_fit_predict_score(self): 113 | # self.clf.fit_predict_score(self.X_test, self.y_test) 114 | # self.clf.fit_predict_score(self.X_test, self.y_test, 115 | # scoring='roc_auc_score') 116 | # self.clf.fit_predict_score(self.X_test, self.y_test, 117 | # scoring='prc_n_score') 118 | # with assert_raises(NotImplementedError): 119 | # self.clf.fit_predict_score(self.X_test, self.y_test, 120 | # scoring='something') 121 | # 122 | # def test_predict_rank(self): 123 | # pred_socres = self.clf.decision_function(self.X_test) 124 | # pred_ranks = self.clf._predict_rank(self.X_test) 125 | # 126 | # # assert the order is reserved 127 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 128 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 129 | # assert_array_less(-0.1, pred_ranks) 130 | # 131 | # def test_predict_rank_normalized(self): 132 | # pred_socres = self.clf.decision_function(self.X_test) 133 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 134 | # 135 | # # assert the order is reserved 136 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 137 | # assert_array_less(pred_ranks, 1.01) 138 | # assert_array_less(-0.1, pred_ranks) 139 | 140 | # def test_plot(self): 141 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 142 | # assert_array_less(0, os) 143 | 144 | # def test_model_clone(self): 145 | # clone_clf = clone(self.clf) 146 | 147 | def tearDown(self): 148 | pass 149 | 150 | 151 | if __name__ == '__main__': 152 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_dcdetector.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.dcdetector import DCdetector 20 | 21 | 22 | class TestDCdetector(unittest.TestCase): 23 | def setUp(self): 24 | train_file = 'data/omi-1/omi-1_train.csv' 25 | test_file = 'data/omi-1/omi-1_test.csv' 26 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 27 | test_df = pd.read_csv(test_file, index_col=0) 28 | y = test_df['label'].values 29 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 30 | self.Xts_train = train_df.values 31 | self.Xts_test = test_df.values 32 | self.yts_test = y 33 | 34 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 35 | self.clf = DCdetector(seq_len=90, stride=1, epochs=2, 36 | batch_size=32, lr=1e-4, patch_size=[5], 37 | device=device, random_state=42) 38 | self.clf.fit(self.Xts_train) 39 | 40 | def test_parameters(self): 41 | assert (hasattr(self.clf, 'decision_scores_') and 42 | self.clf.decision_scores_ is not None) 43 | assert (hasattr(self.clf, 'labels_') and 44 | self.clf.labels_ is not None) 45 | assert (hasattr(self.clf, 'threshold_') and 46 | self.clf.threshold_ is not None) 47 | 48 | def test_train_scores(self): 49 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 50 | 51 | def test_prediction_scores(self): 52 | pred_scores = self.clf.decision_function(self.Xts_test) 53 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.Xts_test, return_confidence=False) 57 | assert_equal(pred_labels.shape, self.yts_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 80 | 81 | assert_equal(pred_labels.shape, self.yts_test.shape) 82 | assert_equal(confidence.shape, self.yts_test.shape) 83 | assert (confidence.min() >= 0) 84 | assert (confidence.max() <= 1) 85 | 86 | # def test_prediction_proba_linear_confidence(self): 87 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 88 | # method='linear', 89 | # return_confidence=True) 90 | # assert (pred_proba.min() >= 0) 91 | # assert (pred_proba.max() <= 1) 92 | # 93 | # assert_equal(confidence.shape, self.y_test.shape) 94 | # assert (confidence.min() >= 0) 95 | # assert (confidence.max() <= 1) 96 | # 97 | # def test_fit_predict(self): 98 | # pred_labels = self.clf.fit_predict(self.X_train) 99 | # assert_equal(pred_labels.shape, self.y_train.shape) 100 | # 101 | # def test_fit_predict_score(self): 102 | # self.clf.fit_predict_score(self.X_test, self.y_test) 103 | # self.clf.fit_predict_score(self.X_test, self.y_test, 104 | # scoring='roc_auc_score') 105 | # self.clf.fit_predict_score(self.X_test, self.y_test, 106 | # scoring='prc_n_score') 107 | # with assert_raises(NotImplementedError): 108 | # self.clf.fit_predict_score(self.X_test, self.y_test, 109 | # scoring='something') 110 | # 111 | # def test_predict_rank(self): 112 | # pred_socres = self.clf.decision_function(self.X_test) 113 | # pred_ranks = self.clf._predict_rank(self.X_test) 114 | # 115 | # # assert the order is reserved 116 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 117 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 118 | # assert_array_less(-0.1, pred_ranks) 119 | # 120 | # def test_predict_rank_normalized(self): 121 | # pred_socres = self.clf.decision_function(self.X_test) 122 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 123 | # 124 | # # assert the order is reserved 125 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 126 | # assert_array_less(pred_ranks, 1.01) 127 | # assert_array_less(-0.1, pred_ranks) 128 | 129 | # def test_plot(self): 130 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 131 | # assert_array_less(0, os) 132 | 133 | # def test_model_clone(self): 134 | # clone_clf = clone(self.clf) 135 | 136 | def tearDown(self): 137 | pass 138 | 139 | 140 | if __name__ == '__main__': 141 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_dif.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.tabular.dif import DeepIsolationForest 20 | from deepod.models.time_series.dif import DeepIsolationForestTS 21 | from deepod.utils.data import generate_data 22 | 23 | 24 | class TestDIF(unittest.TestCase): 25 | def setUp(self): 26 | self.n_train = 1000 27 | self.n_test = 600 28 | self.contamination = 0.1 29 | self.roc_floor = 0.8 30 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 31 | n_train=self.n_train, n_test=self.n_test, n_features=10, 32 | contamination=self.contamination, random_state=42 33 | ) 34 | 35 | train_file = 'data/omi-1/omi-1_train.csv' 36 | test_file = 'data/omi-1/omi-1_test.csv' 37 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 38 | test_df = pd.read_csv(test_file, index_col=0) 39 | y = test_df['label'].values 40 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 41 | self.Xts_train = train_df.values 42 | self.Xts_test = test_df.values 43 | self.yts_test = y 44 | 45 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 46 | self.clf = DeepIsolationForest(device=device, n_ensemble=50, n_estimators=6) 47 | self.clf.fit(self.X_train) 48 | 49 | self.clf2 = DeepIsolationForestTS(seq_len=100, stride=5, 50 | epochs=20, hidden_dims='50', 51 | device=device, 52 | random_state=42) 53 | self.clf2.fit(self.Xts_train) 54 | 55 | def test_parameters(self): 56 | assert (hasattr(self.clf, 'decision_scores_') and 57 | self.clf.decision_scores_ is not None) 58 | assert (hasattr(self.clf, 'labels_') and 59 | self.clf.labels_ is not None) 60 | assert (hasattr(self.clf, 'threshold_') and 61 | self.clf.threshold_ is not None) 62 | 63 | # def test_train_scores(self): 64 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 65 | 66 | def test_prediction_scores(self): 67 | pred_scores = self.clf.decision_function(self.X_test) 68 | pred_scores2 = self.clf2.decision_function(self.Xts_test) 69 | 70 | # check score shapes 71 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 72 | assert_equal(pred_scores2.shape[0], self.Xts_test.shape[0]) 73 | 74 | # check performance 75 | assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 76 | # adj_eval_info = cal_metrics(self.yts_test, pred_scores2, pa=True) 77 | # assert (adj_eval_info[2] >= self.ts_f1_floor) 78 | 79 | def test_prediction_labels(self): 80 | pred_labels = self.clf.predict(self.X_test) 81 | pred_labels2 = self.clf2.predict(self.Xts_test) 82 | assert_equal(pred_labels.shape, self.y_test.shape) 83 | assert_equal(pred_labels2.shape, self.yts_test.shape) 84 | 85 | # def test_prediction_proba(self): 86 | # pred_proba = self.clf.predict_proba(self.X_test) 87 | # assert (pred_proba.min() >= 0) 88 | # assert (pred_proba.max() <= 1) 89 | # 90 | # def test_prediction_proba_linear(self): 91 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 92 | # assert (pred_proba.min() >= 0) 93 | # assert (pred_proba.max() <= 1) 94 | # 95 | # def test_prediction_proba_unify(self): 96 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 97 | # assert (pred_proba.min() >= 0) 98 | # assert (pred_proba.max() <= 1) 99 | # 100 | # def test_prediction_proba_parameter(self): 101 | # with assert_raises(ValueError): 102 | # self.clf.predict_proba(self.X_test, method='something') 103 | 104 | def test_prediction_labels_confidence(self): 105 | pred_labels, confidence = self.clf.predict(self.X_test, return_confidence=True) 106 | 107 | assert_equal(pred_labels.shape, self.y_test.shape) 108 | assert_equal(confidence.shape, self.y_test.shape) 109 | assert (confidence.min() >= 0) 110 | assert (confidence.max() <= 1) 111 | 112 | # def test_prediction_proba_linear_confidence(self): 113 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 114 | # method='linear', 115 | # return_confidence=True) 116 | # assert (pred_proba.min() >= 0) 117 | # assert (pred_proba.max() <= 1) 118 | # 119 | # assert_equal(confidence.shape, self.y_test.shape) 120 | # assert (confidence.min() >= 0) 121 | # assert (confidence.max() <= 1) 122 | # 123 | # def test_fit_predict(self): 124 | # pred_labels = self.clf.fit_predict(self.X_train) 125 | # assert_equal(pred_labels.shape, self.y_train.shape) 126 | # 127 | # def test_fit_predict_score(self): 128 | # self.clf.fit_predict_score(self.X_test, self.y_test) 129 | # self.clf.fit_predict_score(self.X_test, self.y_test, 130 | # scoring='roc_auc_score') 131 | # self.clf.fit_predict_score(self.X_test, self.y_test, 132 | # scoring='prc_n_score') 133 | # with assert_raises(NotImplementedError): 134 | # self.clf.fit_predict_score(self.X_test, self.y_test, 135 | # scoring='something') 136 | # 137 | # def test_predict_rank(self): 138 | # pred_socres = self.clf.decision_function(self.X_test) 139 | # pred_ranks = self.clf._predict_rank(self.X_test) 140 | # 141 | # # assert the order is reserved 142 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 143 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 144 | # assert_array_less(-0.1, pred_ranks) 145 | # 146 | # def test_predict_rank_normalized(self): 147 | # pred_socres = self.clf.decision_function(self.X_test) 148 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 149 | # 150 | # # assert the order is reserved 151 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 152 | # assert_array_less(pred_ranks, 1.01) 153 | # assert_array_less(-0.1, pred_ranks) 154 | 155 | # def test_plot(self): 156 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 157 | # assert_array_less(0, os) 158 | 159 | # def test_model_clone(self): 160 | # clone_clf = clone(self.clf) 161 | 162 | def tearDown(self): 163 | pass 164 | 165 | 166 | if __name__ == '__main__': 167 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_dsvdd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.tabular.dsvdd import DeepSVDD 20 | from deepod.models.time_series.dsvdd import DeepSVDDTS 21 | from deepod.utils.data import generate_data 22 | 23 | 24 | class TestDeepSVDD(unittest.TestCase): 25 | def setUp(self): 26 | self.n_train = 200 27 | self.n_test = 100 28 | self.contamination = 0.1 29 | self.roc_floor = 0.8 30 | self.ts_f1_floor = 0.8 31 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 32 | n_train=self.n_train, n_test=self.n_test, n_features=10, 33 | contamination=self.contamination, random_state=42 34 | ) 35 | 36 | train_file = 'data/omi-1/omi-1_train.csv' 37 | test_file = 'data/omi-1/omi-1_test.csv' 38 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 39 | test_df = pd.read_csv(test_file, index_col=0) 40 | y = test_df['label'].values 41 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 42 | self.Xts_train = train_df.values 43 | self.Xts_test = test_df.values 44 | self.yts_test = y 45 | 46 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 47 | self.clf = DeepSVDD(device=device, random_state=42) 48 | self.clf.fit(self.X_train) 49 | 50 | self.clf2 = DeepSVDDTS(seq_len=100, stride=5, epochs=20, hidden_dims='100,50', 51 | device=device, network='TCN', random_state=42) 52 | self.clf2.fit(self.Xts_train) 53 | 54 | self.clf3 = DeepSVDDTS(seq_len=100, stride=5, epochs=20, hidden_dims='100,50', 55 | device=device, network='ConvSeq', random_state=42) 56 | self.clf3.fit(self.Xts_train) 57 | 58 | def test_parameters(self): 59 | assert (hasattr(self.clf, 'decision_scores_') and 60 | self.clf.decision_scores_ is not None) 61 | assert (hasattr(self.clf, 'labels_') and 62 | self.clf.labels_ is not None) 63 | assert (hasattr(self.clf, 'threshold_') and 64 | self.clf.threshold_ is not None) 65 | 66 | def test_train_scores(self): 67 | assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 68 | assert_equal(len(self.clf2.decision_scores_), self.Xts_train.shape[0]) 69 | assert_equal(len(self.clf3.decision_scores_), self.Xts_train.shape[0]) 70 | 71 | def test_prediction_scores(self): 72 | pred_scores = self.clf.decision_function(self.X_test) 73 | pred_scores2 = self.clf2.decision_function(self.Xts_test) 74 | pred_scores3 = self.clf3.decision_function(self.Xts_test) 75 | 76 | # check score shapes 77 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 78 | assert_equal(pred_scores2.shape[0], self.Xts_test.shape[0]) 79 | assert_equal(pred_scores3.shape[0], self.Xts_test.shape[0]) 80 | 81 | def test_prediction_labels(self): 82 | pred_labels = self.clf.predict(self.X_test) 83 | pred_labels2 = self.clf2.predict(self.Xts_test) 84 | assert_equal(pred_labels.shape, self.y_test.shape) 85 | assert_equal(pred_labels2.shape, self.yts_test.shape) 86 | 87 | # def test_prediction_proba(self): 88 | # pred_proba = self.clf.predict_proba(self.X_test) 89 | # assert (pred_proba.min() >= 0) 90 | # assert (pred_proba.max() <= 1) 91 | # 92 | # def test_prediction_proba_linear(self): 93 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 94 | # assert (pred_proba.min() >= 0) 95 | # assert (pred_proba.max() <= 1) 96 | # 97 | # def test_prediction_proba_unify(self): 98 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 99 | # assert (pred_proba.min() >= 0) 100 | # assert (pred_proba.max() <= 1) 101 | # 102 | # def test_prediction_proba_parameter(self): 103 | # with assert_raises(ValueError): 104 | # self.clf.predict_proba(self.X_test, method='something') 105 | 106 | def test_prediction_labels_confidence(self): 107 | pred_labels, confidence = self.clf.predict(self.X_test, return_confidence=True) 108 | 109 | assert_equal(pred_labels.shape, self.y_test.shape) 110 | assert_equal(confidence.shape, self.y_test.shape) 111 | assert (confidence.min() >= 0) 112 | assert (confidence.max() <= 1) 113 | 114 | # def test_prediction_proba_linear_confidence(self): 115 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 116 | # method='linear', 117 | # return_confidence=True) 118 | # assert (pred_proba.min() >= 0) 119 | # assert (pred_proba.max() <= 1) 120 | # 121 | # assert_equal(confidence.shape, self.y_test.shape) 122 | # assert (confidence.min() >= 0) 123 | # assert (confidence.max() <= 1) 124 | # 125 | # def test_fit_predict(self): 126 | # pred_labels = self.clf.fit_predict(self.X_train) 127 | # assert_equal(pred_labels.shape, self.y_train.shape) 128 | # 129 | # def test_fit_predict_score(self): 130 | # self.clf.fit_predict_score(self.X_test, self.y_test) 131 | # self.clf.fit_predict_score(self.X_test, self.y_test, 132 | # scoring='roc_auc_score') 133 | # self.clf.fit_predict_score(self.X_test, self.y_test, 134 | # scoring='prc_n_score') 135 | # with assert_raises(NotImplementedError): 136 | # self.clf.fit_predict_score(self.X_test, self.y_test, 137 | # scoring='something') 138 | # 139 | # def test_predict_rank(self): 140 | # pred_socres = self.clf.decision_function(self.X_test) 141 | # pred_ranks = self.clf._predict_rank(self.X_test) 142 | # 143 | # # assert the order is reserved 144 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 145 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 146 | # assert_array_less(-0.1, pred_ranks) 147 | # 148 | # def test_predict_rank_normalized(self): 149 | # pred_socres = self.clf.decision_function(self.X_test) 150 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 151 | # 152 | # # assert the order is reserved 153 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 154 | # assert_array_less(pred_ranks, 1.01) 155 | # assert_array_less(-0.1, pred_ranks) 156 | 157 | # def test_plot(self): 158 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 159 | # assert_array_less(0, os) 160 | 161 | # def test_model_clone(self): 162 | # clone_clf = clone(self.clf) 163 | 164 | def tearDown(self): 165 | pass 166 | 167 | 168 | if __name__ == '__main__': 169 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_feawad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | 13 | # temporary solution for relative imports in case pyod is not installed 14 | # if deepod is installed, no need to use the following line 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from deepod.models.tabular.feawad import FeaWAD 18 | from deepod.utils.data import generate_data 19 | import numpy as np 20 | 21 | 22 | class TestFeaWAD(unittest.TestCase): 23 | def setUp(self): 24 | self.n_train = 200 25 | self.n_test = 100 26 | self.contamination = 0.1 27 | self.roc_floor = 0.8 28 | 29 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 30 | n_train=self.n_train, n_test=self.n_test, n_features=10, 31 | contamination=self.contamination, random_state=42) 32 | 33 | self.Xts_train = np.random.randn(1000, 19) 34 | self.yts_train = np.zeros(1000, dtype=int) 35 | self.yts_train[200:250] = 1 36 | self.Xts_test = self.Xts_train.copy() 37 | self.yts_test = self.yts_train.copy() 38 | 39 | anom_id = np.where(self.y_train == 1)[0] 40 | known_anom_id = np.random.choice(anom_id, 10, replace=False) 41 | y_semi = np.zeros_like(self.y_train, dtype=int) 42 | y_semi[known_anom_id] = 1 43 | 44 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 45 | self.clf = FeaWAD(epochs=20, device=device) 46 | self.clf.fit(self.X_train, y_semi) 47 | 48 | def test_parameters(self): 49 | assert (hasattr(self.clf, 'decision_scores_') and 50 | self.clf.decision_scores_ is not None) 51 | assert (hasattr(self.clf, 'labels_') and 52 | self.clf.labels_ is not None) 53 | assert (hasattr(self.clf, 'threshold_') and 54 | self.clf.threshold_ is not None) 55 | 56 | # def test_train_scores(self): 57 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 58 | 59 | def test_train_scores(self): 60 | assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 61 | 62 | def test_prediction_scores(self): 63 | pred_scores = self.clf.decision_function(self.X_test) 64 | 65 | # check score shapes 66 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 67 | 68 | # # check performance 69 | # auc = roc_auc_score(self.y_test, pred_scores) 70 | # assert (auc >= self.roc_floor), f'auc is {auc}' 71 | 72 | def test_prediction_labels(self): 73 | pred_labels = self.clf.predict(self.X_test) 74 | assert_equal(pred_labels.shape, self.y_test.shape) 75 | 76 | # def test_prediction_proba(self): 77 | # pred_proba = self.clf.predict_proba(self.X_test) 78 | # assert (pred_proba.min() >= 0) 79 | # assert (pred_proba.max() <= 1) 80 | # 81 | # def test_prediction_proba_linear(self): 82 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 83 | # assert (pred_proba.min() >= 0) 84 | # assert (pred_proba.max() <= 1) 85 | # 86 | # def test_prediction_proba_unify(self): 87 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 88 | # assert (pred_proba.min() >= 0) 89 | # assert (pred_proba.max() <= 1) 90 | # 91 | # def test_prediction_proba_parameter(self): 92 | # with assert_raises(ValueError): 93 | # self.clf.predict_proba(self.X_test, method='something') 94 | 95 | def test_prediction_labels_confidence(self): 96 | pred_labels, confidence = self.clf.predict(self.X_test, 97 | return_confidence=True) 98 | 99 | assert_equal(pred_labels.shape, self.y_test.shape) 100 | assert_equal(confidence.shape, self.y_test.shape) 101 | assert (confidence.min() >= 0) 102 | assert (confidence.max() <= 1) 103 | 104 | # def test_prediction_proba_linear_confidence(self): 105 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 106 | # method='linear', 107 | # return_confidence=True) 108 | # assert (pred_proba.min() >= 0) 109 | # assert (pred_proba.max() <= 1) 110 | # 111 | # assert_equal(confidence.shape, self.y_test.shape) 112 | # assert (confidence.min() >= 0) 113 | # assert (confidence.max() <= 1) 114 | # 115 | # def test_fit_predict(self): 116 | # pred_labels = self.clf.fit_predict(self.X_train) 117 | # assert_equal(pred_labels.shape, self.y_train.shape) 118 | # 119 | # def test_fit_predict_score(self): 120 | # self.clf.fit_predict_score(self.X_test, self.y_test) 121 | # self.clf.fit_predict_score(self.X_test, self.y_test, 122 | # scoring='roc_auc_score') 123 | # self.clf.fit_predict_score(self.X_test, self.y_test, 124 | # scoring='prc_n_score') 125 | # with assert_raises(NotImplementedError): 126 | # self.clf.fit_predict_score(self.X_test, self.y_test, 127 | # scoring='something') 128 | # 129 | # def test_predict_rank(self): 130 | # pred_socres = self.clf.decision_function(self.X_test) 131 | # pred_ranks = self.clf._predict_rank(self.X_test) 132 | # 133 | # # assert the order is reserved 134 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 135 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 136 | # assert_array_less(-0.1, pred_ranks) 137 | # 138 | # def test_predict_rank_normalized(self): 139 | # pred_socres = self.clf.decision_function(self.X_test) 140 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 141 | # 142 | # # assert the order is reserved 143 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 144 | # assert_array_less(pred_ranks, 1.01) 145 | # assert_array_less(-0.1, pred_ranks) 146 | 147 | # def test_plot(self): 148 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 149 | # assert_array_less(0, os) 150 | 151 | # def test_model_clone(self): 152 | # clone_clf = clone(self.clf) 153 | 154 | def tearDown(self): 155 | pass 156 | 157 | 158 | if __name__ == '__main__': 159 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_goad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_allclose 11 | from numpy.testing import assert_array_less 12 | from numpy.testing import assert_equal 13 | from numpy.testing import assert_raises 14 | from scipy.stats import rankdata 15 | from sklearn.base import clone 16 | from sklearn.metrics import roc_auc_score 17 | import torch 18 | 19 | # temporary solution for relative imports in case pyod is not installed 20 | # if deepod is installed, no need to use the following line 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 22 | 23 | from deepod.models import GOAD 24 | from deepod.utils.data import generate_data 25 | 26 | 27 | class TestGOAD(unittest.TestCase): 28 | def setUp(self): 29 | self.n_train = 200 30 | self.n_test = 100 31 | self.contamination = 0.1 32 | self.roc_floor = 0.8 33 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 34 | n_train=self.n_train, n_test=self.n_test, n_features=10, 35 | contamination=self.contamination, random_state=42) 36 | 37 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 38 | self.clf = GOAD(epochs=1, device=device, n_trans=64) 39 | self.clf.fit(self.X_train) 40 | 41 | def test_parameters(self): 42 | assert (hasattr(self.clf, 'decision_scores_') and 43 | self.clf.decision_scores_ is not None) 44 | assert (hasattr(self.clf, 'labels_') and 45 | self.clf.labels_ is not None) 46 | assert (hasattr(self.clf, 'threshold_') and 47 | self.clf.threshold_ is not None) 48 | 49 | # def test_train_scores(self): 50 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 51 | 52 | def test_prediction_scores(self): 53 | pred_scores = self.clf.decision_function(self.X_test) 54 | 55 | # check score shapes 56 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 57 | 58 | # check performance 59 | # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 60 | 61 | def test_prediction_labels(self): 62 | pred_labels = self.clf.predict(self.X_test) 63 | assert_equal(pred_labels.shape, self.y_test.shape) 64 | 65 | # def test_prediction_proba(self): 66 | # pred_proba = self.clf.predict_proba(self.X_test) 67 | # assert (pred_proba.min() >= 0) 68 | # assert (pred_proba.max() <= 1) 69 | # 70 | # def test_prediction_proba_linear(self): 71 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 72 | # assert (pred_proba.min() >= 0) 73 | # assert (pred_proba.max() <= 1) 74 | # 75 | # def test_prediction_proba_unify(self): 76 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 77 | # assert (pred_proba.min() >= 0) 78 | # assert (pred_proba.max() <= 1) 79 | # 80 | # def test_prediction_proba_parameter(self): 81 | # with assert_raises(ValueError): 82 | # self.clf.predict_proba(self.X_test, method='something') 83 | 84 | def test_prediction_labels_confidence(self): 85 | pred_labels, confidence = self.clf.predict(self.X_test, 86 | return_confidence=True) 87 | 88 | assert_equal(pred_labels.shape, self.y_test.shape) 89 | assert_equal(confidence.shape, self.y_test.shape) 90 | assert (confidence.min() >= 0) 91 | assert (confidence.max() <= 1) 92 | 93 | # def test_prediction_proba_linear_confidence(self): 94 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 95 | # method='linear', 96 | # return_confidence=True) 97 | # assert (pred_proba.min() >= 0) 98 | # assert (pred_proba.max() <= 1) 99 | # 100 | # assert_equal(confidence.shape, self.y_test.shape) 101 | # assert (confidence.min() >= 0) 102 | # assert (confidence.max() <= 1) 103 | # 104 | # def test_fit_predict(self): 105 | # pred_labels = self.clf.fit_predict(self.X_train) 106 | # assert_equal(pred_labels.shape, self.y_train.shape) 107 | # 108 | # def test_fit_predict_score(self): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test) 110 | # self.clf.fit_predict_score(self.X_test, self.y_test, 111 | # scoring='roc_auc_score') 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='prc_n_score') 114 | # with assert_raises(NotImplementedError): 115 | # self.clf.fit_predict_score(self.X_test, self.y_test, 116 | # scoring='something') 117 | # 118 | # def test_predict_rank(self): 119 | # pred_socres = self.clf.decision_function(self.X_test) 120 | # pred_ranks = self.clf._predict_rank(self.X_test) 121 | # 122 | # # assert the order is reserved 123 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 124 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 125 | # assert_array_less(-0.1, pred_ranks) 126 | # 127 | # def test_predict_rank_normalized(self): 128 | # pred_socres = self.clf.decision_function(self.X_test) 129 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 130 | # 131 | # # assert the order is reserved 132 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 133 | # assert_array_less(pred_ranks, 1.01) 134 | # assert_array_less(-0.1, pred_ranks) 135 | 136 | # def test_plot(self): 137 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 138 | # assert_array_less(0, os) 139 | 140 | # def test_model_clone(self): 141 | # clone_clf = clone(self.clf) 142 | 143 | def tearDown(self): 144 | pass 145 | 146 | 147 | if __name__ == '__main__': 148 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_icl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | 13 | # temporary solution for relative imports in case pyod is not installed 14 | # if deepod is installed, no need to use the following line 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from deepod.models.tabular.icl import ICL 18 | from deepod.utils.data import generate_data 19 | 20 | 21 | class TestICL(unittest.TestCase): 22 | def setUp(self): 23 | self.n_train = 200 24 | self.n_test = 100 25 | self.contamination = 0.1 26 | self.roc_floor = 0.8 27 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 28 | n_train=self.n_train, n_test=self.n_test, n_features=10, 29 | contamination=self.contamination, random_state=42) 30 | 31 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 32 | self.clf = ICL(epochs=1, device=device, n_ensemble='auto') 33 | self.clf.fit(self.X_train) 34 | 35 | def test_parameters(self): 36 | assert (hasattr(self.clf, 'decision_scores_') and 37 | self.clf.decision_scores_ is not None) 38 | assert (hasattr(self.clf, 'labels_') and 39 | self.clf.labels_ is not None) 40 | assert (hasattr(self.clf, 'threshold_') and 41 | self.clf.threshold_ is not None) 42 | 43 | # def test_train_scores(self): 44 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 45 | 46 | def test_prediction_scores(self): 47 | pred_scores = self.clf.decision_function(self.X_test) 48 | 49 | # check score shapes 50 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 51 | 52 | # check performance 53 | # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.X_test) 57 | assert_equal(pred_labels.shape, self.y_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.X_test, 80 | return_confidence=True) 81 | 82 | assert_equal(pred_labels.shape, self.y_test.shape) 83 | assert_equal(confidence.shape, self.y_test.shape) 84 | assert (confidence.min() >= 0) 85 | assert (confidence.max() <= 1) 86 | 87 | # def test_prediction_proba_linear_confidence(self): 88 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 89 | # method='linear', 90 | # return_confidence=True) 91 | # assert (pred_proba.min() >= 0) 92 | # assert (pred_proba.max() <= 1) 93 | # 94 | # assert_equal(confidence.shape, self.y_test.shape) 95 | # assert (confidence.min() >= 0) 96 | # assert (confidence.max() <= 1) 97 | # 98 | # def test_fit_predict(self): 99 | # pred_labels = self.clf.fit_predict(self.X_train) 100 | # assert_equal(pred_labels.shape, self.y_train.shape) 101 | # 102 | # def test_fit_predict_score(self): 103 | # self.clf.fit_predict_score(self.X_test, self.y_test) 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='roc_auc_score') 106 | # self.clf.fit_predict_score(self.X_test, self.y_test, 107 | # scoring='prc_n_score') 108 | # with assert_raises(NotImplementedError): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test, 110 | # scoring='something') 111 | # 112 | # def test_predict_rank(self): 113 | # pred_socres = self.clf.decision_function(self.X_test) 114 | # pred_ranks = self.clf._predict_rank(self.X_test) 115 | # 116 | # # assert the order is reserved 117 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 118 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 119 | # assert_array_less(-0.1, pred_ranks) 120 | # 121 | # def test_predict_rank_normalized(self): 122 | # pred_socres = self.clf.decision_function(self.X_test) 123 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 124 | # 125 | # # assert the order is reserved 126 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 127 | # assert_array_less(pred_ranks, 1.01) 128 | # assert_array_less(-0.1, pred_ranks) 129 | 130 | # def test_plot(self): 131 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 132 | # assert_array_less(0, os) 133 | 134 | # def test_model_clone(self): 135 | # clone_clf = clone(self.clf) 136 | 137 | def tearDown(self): 138 | pass 139 | 140 | 141 | if __name__ == '__main__': 142 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_ncad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.ncad import NCAD 20 | 21 | 22 | class TestDCdetector(unittest.TestCase): 23 | def setUp(self): 24 | train_file = 'data/omi-1/omi-1_train.csv' 25 | test_file = 'data/omi-1/omi-1_test.csv' 26 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 27 | test_df = pd.read_csv(test_file, index_col=0) 28 | y = test_df['label'].values 29 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 30 | self.Xts_train = train_df.values 31 | self.Xts_test = test_df.values 32 | self.yts_test = y 33 | 34 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 35 | self.clf = NCAD(seq_len=100, stride=1, epochs=2, 36 | batch_size=32, lr=1e-4, 37 | device=device, random_state=42) 38 | self.clf.fit(self.Xts_train) 39 | 40 | def test_parameters(self): 41 | assert (hasattr(self.clf, 'decision_scores_') and 42 | self.clf.decision_scores_ is not None) 43 | assert (hasattr(self.clf, 'labels_') and 44 | self.clf.labels_ is not None) 45 | assert (hasattr(self.clf, 'threshold_') and 46 | self.clf.threshold_ is not None) 47 | 48 | def test_train_scores(self): 49 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 50 | 51 | def test_prediction_scores(self): 52 | pred_scores = self.clf.decision_function(self.Xts_test) 53 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.Xts_test) 57 | assert_equal(pred_labels.shape, self.yts_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 80 | 81 | assert_equal(pred_labels.shape, self.yts_test.shape) 82 | assert_equal(confidence.shape, self.yts_test.shape) 83 | assert (confidence.min() >= 0) 84 | assert (confidence.max() <= 1) 85 | 86 | # def test_prediction_proba_linear_confidence(self): 87 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 88 | # method='linear', 89 | # return_confidence=True) 90 | # assert (pred_proba.min() >= 0) 91 | # assert (pred_proba.max() <= 1) 92 | # 93 | # assert_equal(confidence.shape, self.y_test.shape) 94 | # assert (confidence.min() >= 0) 95 | # assert (confidence.max() <= 1) 96 | # 97 | # def test_fit_predict(self): 98 | # pred_labels = self.clf.fit_predict(self.X_train) 99 | # assert_equal(pred_labels.shape, self.y_train.shape) 100 | # 101 | # def test_fit_predict_score(self): 102 | # self.clf.fit_predict_score(self.X_test, self.y_test) 103 | # self.clf.fit_predict_score(self.X_test, self.y_test, 104 | # scoring='roc_auc_score') 105 | # self.clf.fit_predict_score(self.X_test, self.y_test, 106 | # scoring='prc_n_score') 107 | # with assert_raises(NotImplementedError): 108 | # self.clf.fit_predict_score(self.X_test, self.y_test, 109 | # scoring='something') 110 | # 111 | # def test_predict_rank(self): 112 | # pred_socres = self.clf.decision_function(self.X_test) 113 | # pred_ranks = self.clf._predict_rank(self.X_test) 114 | # 115 | # # assert the order is reserved 116 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 117 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 118 | # assert_array_less(-0.1, pred_ranks) 119 | # 120 | # def test_predict_rank_normalized(self): 121 | # pred_socres = self.clf.decision_function(self.X_test) 122 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 123 | # 124 | # # assert the order is reserved 125 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 126 | # assert_array_less(pred_ranks, 1.01) 127 | # assert_array_less(-0.1, pred_ranks) 128 | 129 | # def test_plot(self): 130 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 131 | # assert_array_less(0, os) 132 | 133 | # def test_model_clone(self): 134 | # clone_clf = clone(self.clf) 135 | 136 | def tearDown(self): 137 | pass 138 | 139 | 140 | if __name__ == '__main__': 141 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_neutral.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_allclose 11 | from numpy.testing import assert_array_less 12 | from numpy.testing import assert_equal 13 | from numpy.testing import assert_raises 14 | from scipy.stats import rankdata 15 | from sklearn.base import clone 16 | from sklearn.metrics import roc_auc_score 17 | import torch 18 | 19 | # temporary solution for relative imports in case pyod is not installed 20 | # if deepod is installed, no need to use the following line 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 22 | 23 | from deepod.models import NeuTraL 24 | from deepod.utils.data import generate_data 25 | 26 | 27 | class TestNeuTral(unittest.TestCase): 28 | def setUp(self): 29 | self.n_train = 200 30 | self.n_test = 100 31 | self.contamination = 0.1 32 | self.roc_floor = 0.8 33 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 34 | n_train=self.n_train, n_test=self.n_test, n_features=10, 35 | contamination=self.contamination, random_state=42) 36 | 37 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 38 | self.clf = NeuTraL(epochs=1, device=device) 39 | self.clf.fit(self.X_train) 40 | 41 | def test_parameters(self): 42 | assert (hasattr(self.clf, 'decision_scores_') and 43 | self.clf.decision_scores_ is not None) 44 | assert (hasattr(self.clf, 'labels_') and 45 | self.clf.labels_ is not None) 46 | assert (hasattr(self.clf, 'threshold_') and 47 | self.clf.threshold_ is not None) 48 | 49 | # def test_train_scores(self): 50 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 51 | 52 | def test_prediction_scores(self): 53 | pred_scores = self.clf.decision_function(self.X_test) 54 | 55 | # check score shapes 56 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 57 | 58 | # check performance 59 | # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 60 | 61 | def test_prediction_labels(self): 62 | pred_labels = self.clf.predict(self.X_test) 63 | assert_equal(pred_labels.shape, self.y_test.shape) 64 | 65 | # def test_prediction_proba(self): 66 | # pred_proba = self.clf.predict_proba(self.X_test) 67 | # assert (pred_proba.min() >= 0) 68 | # assert (pred_proba.max() <= 1) 69 | # 70 | # def test_prediction_proba_linear(self): 71 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 72 | # assert (pred_proba.min() >= 0) 73 | # assert (pred_proba.max() <= 1) 74 | # 75 | # def test_prediction_proba_unify(self): 76 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 77 | # assert (pred_proba.min() >= 0) 78 | # assert (pred_proba.max() <= 1) 79 | # 80 | # def test_prediction_proba_parameter(self): 81 | # with assert_raises(ValueError): 82 | # self.clf.predict_proba(self.X_test, method='something') 83 | 84 | def test_prediction_labels_confidence(self): 85 | pred_labels, confidence = self.clf.predict(self.X_test, 86 | return_confidence=True) 87 | 88 | assert_equal(pred_labels.shape, self.y_test.shape) 89 | assert_equal(confidence.shape, self.y_test.shape) 90 | assert (confidence.min() >= 0) 91 | assert (confidence.max() <= 1) 92 | 93 | # def test_prediction_proba_linear_confidence(self): 94 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 95 | # method='linear', 96 | # return_confidence=True) 97 | # assert (pred_proba.min() >= 0) 98 | # assert (pred_proba.max() <= 1) 99 | # 100 | # assert_equal(confidence.shape, self.y_test.shape) 101 | # assert (confidence.min() >= 0) 102 | # assert (confidence.max() <= 1) 103 | # 104 | # def test_fit_predict(self): 105 | # pred_labels = self.clf.fit_predict(self.X_train) 106 | # assert_equal(pred_labels.shape, self.y_train.shape) 107 | # 108 | # def test_fit_predict_score(self): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test) 110 | # self.clf.fit_predict_score(self.X_test, self.y_test, 111 | # scoring='roc_auc_score') 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='prc_n_score') 114 | # with assert_raises(NotImplementedError): 115 | # self.clf.fit_predict_score(self.X_test, self.y_test, 116 | # scoring='something') 117 | # 118 | # def test_predict_rank(self): 119 | # pred_socres = self.clf.decision_function(self.X_test) 120 | # pred_ranks = self.clf._predict_rank(self.X_test) 121 | # 122 | # # assert the order is reserved 123 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 124 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 125 | # assert_array_less(-0.1, pred_ranks) 126 | # 127 | # def test_predict_rank_normalized(self): 128 | # pred_socres = self.clf.decision_function(self.X_test) 129 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 130 | # 131 | # # assert the order is reserved 132 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 133 | # assert_array_less(pred_ranks, 1.01) 134 | # assert_array_less(-0.1, pred_ranks) 135 | 136 | # def test_plot(self): 137 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 138 | # assert_array_less(0, os) 139 | 140 | # def test_model_clone(self): 141 | # clone_clf = clone(self.clf) 142 | 143 | def tearDown(self): 144 | pass 145 | 146 | 147 | if __name__ == '__main__': 148 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_rca.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | 13 | # temporary solution for relative imports in case pyod is not installed 14 | # if deepod is installed, no need to use the following line 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from deepod.models.tabular.rca import RCA 18 | from deepod.utils.data import generate_data 19 | 20 | 21 | class TestRCA(unittest.TestCase): 22 | def setUp(self): 23 | self.n_train = 200 24 | self.n_test = 100 25 | self.contamination = 0.1 26 | self.roc_floor = 0.8 27 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 28 | n_train=self.n_train, n_test=self.n_test, n_features=10, 29 | contamination=self.contamination, random_state=42) 30 | 31 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 32 | self.clf = RCA(epochs=1, device=device, act='LeakyReLU') 33 | self.clf.fit(self.X_train) 34 | 35 | def test_parameters(self): 36 | assert (hasattr(self.clf, 'decision_scores_') and 37 | self.clf.decision_scores_ is not None) 38 | assert (hasattr(self.clf, 'labels_') and 39 | self.clf.labels_ is not None) 40 | assert (hasattr(self.clf, 'threshold_') and 41 | self.clf.threshold_ is not None) 42 | 43 | # def test_train_scores(self): 44 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 45 | 46 | def test_prediction_scores(self): 47 | pred_scores = self.clf.decision_function(self.X_test) 48 | 49 | # check score shapes 50 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 51 | 52 | # check performance 53 | # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.X_test) 57 | assert_equal(pred_labels.shape, self.y_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.X_test, 80 | return_confidence=True) 81 | 82 | assert_equal(pred_labels.shape, self.y_test.shape) 83 | assert_equal(confidence.shape, self.y_test.shape) 84 | assert (confidence.min() >= 0) 85 | assert (confidence.max() <= 1) 86 | 87 | # def test_prediction_proba_linear_confidence(self): 88 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 89 | # method='linear', 90 | # return_confidence=True) 91 | # assert (pred_proba.min() >= 0) 92 | # assert (pred_proba.max() <= 1) 93 | # 94 | # assert_equal(confidence.shape, self.y_test.shape) 95 | # assert (confidence.min() >= 0) 96 | # assert (confidence.max() <= 1) 97 | # 98 | # def test_fit_predict(self): 99 | # pred_labels = self.clf.fit_predict(self.X_train) 100 | # assert_equal(pred_labels.shape, self.y_train.shape) 101 | # 102 | # def test_fit_predict_score(self): 103 | # self.clf.fit_predict_score(self.X_test, self.y_test) 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='roc_auc_score') 106 | # self.clf.fit_predict_score(self.X_test, self.y_test, 107 | # scoring='prc_n_score') 108 | # with assert_raises(NotImplementedError): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test, 110 | # scoring='something') 111 | # 112 | # def test_predict_rank(self): 113 | # pred_socres = self.clf.decision_function(self.X_test) 114 | # pred_ranks = self.clf._predict_rank(self.X_test) 115 | # 116 | # # assert the order is reserved 117 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 118 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 119 | # assert_array_less(-0.1, pred_ranks) 120 | # 121 | # def test_predict_rank_normalized(self): 122 | # pred_socres = self.clf.decision_function(self.X_test) 123 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 124 | # 125 | # # assert the order is reserved 126 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 127 | # assert_array_less(pred_ranks, 1.01) 128 | # assert_array_less(-0.1, pred_ranks) 129 | 130 | # def test_plot(self): 131 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 132 | # assert_array_less(0, os) 133 | 134 | # def test_model_clone(self): 135 | # clone_clf = clone(self.clf) 136 | 137 | def tearDown(self): 138 | pass 139 | 140 | 141 | if __name__ == '__main__': 142 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_rdp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_allclose 11 | from numpy.testing import assert_array_less 12 | from numpy.testing import assert_equal 13 | from numpy.testing import assert_raises 14 | from scipy.stats import rankdata 15 | from sklearn.base import clone 16 | from sklearn.metrics import roc_auc_score 17 | import torch 18 | 19 | # temporary solution for relative imports in case pyod is not installed 20 | # if deepod is installed, no need to use the following line 21 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 22 | 23 | from deepod.models import RDP 24 | from deepod.utils.data import generate_data 25 | 26 | 27 | class TestRDP(unittest.TestCase): 28 | def setUp(self): 29 | self.n_train = 200 30 | self.n_test = 100 31 | self.contamination = 0.1 32 | self.roc_floor = 0.8 33 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 34 | n_train=self.n_train, n_test=self.n_test, n_features=10, 35 | contamination=self.contamination, random_state=42) 36 | 37 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 38 | self.clf = RDP(hidden_dims=100, device=device) 39 | self.clf.fit(self.X_train) 40 | 41 | def test_parameters(self): 42 | assert (hasattr(self.clf, 'decision_scores_') and 43 | self.clf.decision_scores_ is not None) 44 | assert (hasattr(self.clf, 'labels_') and 45 | self.clf.labels_ is not None) 46 | assert (hasattr(self.clf, 'threshold_') and 47 | self.clf.threshold_ is not None) 48 | 49 | # def test_train_scores(self): 50 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 51 | 52 | def test_prediction_scores(self): 53 | pred_scores = self.clf.decision_function(self.X_test) 54 | 55 | # check score shapes 56 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 57 | 58 | def test_prediction_labels(self): 59 | pred_labels = self.clf.predict(self.X_test) 60 | assert_equal(pred_labels.shape, self.y_test.shape) 61 | 62 | # def test_prediction_proba(self): 63 | # pred_proba = self.clf.predict_proba(self.X_test) 64 | # assert (pred_proba.min() >= 0) 65 | # assert (pred_proba.max() <= 1) 66 | # 67 | # def test_prediction_proba_linear(self): 68 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 69 | # assert (pred_proba.min() >= 0) 70 | # assert (pred_proba.max() <= 1) 71 | # 72 | # def test_prediction_proba_unify(self): 73 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 74 | # assert (pred_proba.min() >= 0) 75 | # assert (pred_proba.max() <= 1) 76 | # 77 | # def test_prediction_proba_parameter(self): 78 | # with assert_raises(ValueError): 79 | # self.clf.predict_proba(self.X_test, method='something') 80 | 81 | def test_prediction_labels_confidence(self): 82 | pred_labels, confidence = self.clf.predict(self.X_test, 83 | return_confidence=True) 84 | 85 | assert_equal(pred_labels.shape, self.y_test.shape) 86 | assert_equal(confidence.shape, self.y_test.shape) 87 | assert (confidence.min() >= 0) 88 | assert (confidence.max() <= 1) 89 | 90 | # def test_prediction_proba_linear_confidence(self): 91 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 92 | # method='linear', 93 | # return_confidence=True) 94 | # assert (pred_proba.min() >= 0) 95 | # assert (pred_proba.max() <= 1) 96 | # 97 | # assert_equal(confidence.shape, self.y_test.shape) 98 | # assert (confidence.min() >= 0) 99 | # assert (confidence.max() <= 1) 100 | # 101 | # def test_fit_predict(self): 102 | # pred_labels = self.clf.fit_predict(self.X_train) 103 | # assert_equal(pred_labels.shape, self.y_train.shape) 104 | # 105 | # def test_fit_predict_score(self): 106 | # self.clf.fit_predict_score(self.X_test, self.y_test) 107 | # self.clf.fit_predict_score(self.X_test, self.y_test, 108 | # scoring='roc_auc_score') 109 | # self.clf.fit_predict_score(self.X_test, self.y_test, 110 | # scoring='prc_n_score') 111 | # with assert_raises(NotImplementedError): 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='something') 114 | # 115 | # def test_predict_rank(self): 116 | # pred_socres = self.clf.decision_function(self.X_test) 117 | # pred_ranks = self.clf._predict_rank(self.X_test) 118 | # 119 | # # assert the order is reserved 120 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 121 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 122 | # assert_array_less(-0.1, pred_ranks) 123 | # 124 | # def test_predict_rank_normalized(self): 125 | # pred_socres = self.clf.decision_function(self.X_test) 126 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 127 | # 128 | # # assert the order is reserved 129 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 130 | # assert_array_less(pred_ranks, 1.01) 131 | # assert_array_less(-0.1, pred_ranks) 132 | 133 | # def test_plot(self): 134 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 135 | # assert_array_less(0, os) 136 | 137 | # def test_model_clone(self): 138 | # clone_clf = clone(self.clf) 139 | 140 | def tearDown(self): 141 | pass 142 | 143 | 144 | if __name__ == '__main__': 145 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_repen.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | import pandas as pd 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | 14 | # temporary solution for relative imports in case pyod is not installed 15 | # if deepod is installed, no need to use the following line 16 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 17 | 18 | from deepod.models.tabular.repen import REPEN 19 | from deepod.utils.data import generate_data 20 | 21 | 22 | class TestREPEN(unittest.TestCase): 23 | def setUp(self): 24 | self.n_train = 200 25 | self.n_test = 100 26 | self.contamination = 0.1 27 | self.roc_floor = 0.8 28 | self.ts_f1_floor = 0.0 29 | 30 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 31 | n_train=self.n_train, n_test=self.n_test, n_features=10, 32 | contamination=self.contamination, random_state=42) 33 | 34 | train_file = 'data/omi-1/omi-1_train.csv' 35 | test_file = 'data/omi-1/omi-1_test.csv' 36 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 37 | test_df = pd.read_csv(test_file, index_col=0) 38 | y = test_df['label'].values 39 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 40 | self.Xts_train = train_df.values 41 | self.Xts_test = test_df.values 42 | self.yts_test = y 43 | 44 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 45 | self.clf = REPEN(epochs=5, device=device) 46 | self.clf.fit(self.X_train) 47 | 48 | def test_parameters(self): 49 | assert (hasattr(self.clf, 'decision_scores_') and 50 | self.clf.decision_scores_ is not None) 51 | assert (hasattr(self.clf, 'labels_') and 52 | self.clf.labels_ is not None) 53 | assert (hasattr(self.clf, 'threshold_') and 54 | self.clf.threshold_ is not None) 55 | 56 | def test_train_scores(self): 57 | assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 58 | 59 | def test_prediction_scores(self): 60 | pred_scores = self.clf.decision_function(self.X_test) 61 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 62 | 63 | def test_prediction_labels(self): 64 | pred_labels = self.clf.predict(self.X_test) 65 | assert_equal(pred_labels.shape, self.y_test.shape) 66 | 67 | # def test_prediction_proba(self): 68 | # pred_proba = self.clf.predict_proba(self.X_test) 69 | # assert (pred_proba.min() >= 0) 70 | # assert (pred_proba.max() <= 1) 71 | # 72 | # def test_prediction_proba_linear(self): 73 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 74 | # assert (pred_proba.min() >= 0) 75 | # assert (pred_proba.max() <= 1) 76 | # 77 | # def test_prediction_proba_unify(self): 78 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 79 | # assert (pred_proba.min() >= 0) 80 | # assert (pred_proba.max() <= 1) 81 | # 82 | # def test_prediction_proba_parameter(self): 83 | # with assert_raises(ValueError): 84 | # self.clf.predict_proba(self.X_test, method='something') 85 | 86 | def test_prediction_labels_confidence(self): 87 | pred_labels, confidence = self.clf.predict(self.X_test, 88 | return_confidence=True) 89 | 90 | assert_equal(pred_labels.shape, self.y_test.shape) 91 | assert_equal(confidence.shape, self.y_test.shape) 92 | assert (confidence.min() >= 0) 93 | assert (confidence.max() <= 1) 94 | 95 | # def test_prediction_proba_linear_confidence(self): 96 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 97 | # method='linear', 98 | # return_confidence=True) 99 | # assert (pred_proba.min() >= 0) 100 | # assert (pred_proba.max() <= 1) 101 | # 102 | # assert_equal(confidence.shape, self.y_test.shape) 103 | # assert (confidence.min() >= 0) 104 | # assert (confidence.max() <= 1) 105 | # 106 | # def test_fit_predict(self): 107 | # pred_labels = self.clf.fit_predict(self.X_train) 108 | # assert_equal(pred_labels.shape, self.y_train.shape) 109 | # 110 | # def test_fit_predict_score(self): 111 | # self.clf.fit_predict_score(self.X_test, self.y_test) 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='roc_auc_score') 114 | # self.clf.fit_predict_score(self.X_test, self.y_test, 115 | # scoring='prc_n_score') 116 | # with assert_raises(NotImplementedError): 117 | # self.clf.fit_predict_score(self.X_test, self.y_test, 118 | # scoring='something') 119 | # 120 | # def test_predict_rank(self): 121 | # pred_socres = self.clf.decision_function(self.X_test) 122 | # pred_ranks = self.clf._predict_rank(self.X_test) 123 | # 124 | # # assert the order is reserved 125 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 126 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 127 | # assert_array_less(-0.1, pred_ranks) 128 | # 129 | # def test_predict_rank_normalized(self): 130 | # pred_socres = self.clf.decision_function(self.X_test) 131 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 132 | # 133 | # # assert the order is reserved 134 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 135 | # assert_array_less(pred_ranks, 1.01) 136 | # assert_array_less(-0.1, pred_ranks) 137 | 138 | # def test_plot(self): 139 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 140 | # assert_array_less(0, os) 141 | 142 | # def test_model_clone(self): 143 | # clone_clf = clone(self.clf) 144 | 145 | def tearDown(self): 146 | pass 147 | 148 | 149 | if __name__ == '__main__': 150 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_rosas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | 13 | # temporary solution for relative imports in case pyod is not installed 14 | # if deepod is installed, no need to use the following line 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from deepod.models.tabular.rosas import RoSAS 18 | from deepod.utils.data import generate_data 19 | import numpy as np 20 | 21 | 22 | class TestDevNet(unittest.TestCase): 23 | def setUp(self): 24 | self.n_train = 200 25 | self.n_test = 100 26 | self.contamination = 0.1 27 | self.roc_floor = 0.8 28 | 29 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 30 | n_train=self.n_train, n_test=self.n_test, n_features=10, 31 | contamination=self.contamination, random_state=42) 32 | 33 | anom_id = np.where(self.y_train == 1)[0] 34 | known_anom_id = np.random.choice(anom_id, 10, replace=False) 35 | y_semi = np.zeros_like(self.y_train, dtype=int) 36 | y_semi[known_anom_id] = 1 37 | 38 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 39 | self.clf = RoSAS(epochs=1, hidden_dims=20, device=device, random_state=42) 40 | self.clf.fit(self.X_train, y_semi) 41 | 42 | def test_parameters(self): 43 | assert (hasattr(self.clf, 'decision_scores_') and 44 | self.clf.decision_scores_ is not None) 45 | assert (hasattr(self.clf, 'labels_') and 46 | self.clf.labels_ is not None) 47 | assert (hasattr(self.clf, 'threshold_') and 48 | self.clf.threshold_ is not None) 49 | 50 | def test_train_scores(self): 51 | assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 52 | 53 | def test_prediction_scores(self): 54 | pred_scores = self.clf.decision_function(self.X_test) 55 | 56 | # check score shapes 57 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 58 | 59 | def test_prediction_labels(self): 60 | pred_labels = self.clf.predict(self.X_test) 61 | assert_equal(pred_labels.shape, self.y_test.shape) 62 | 63 | 64 | # def test_prediction_proba(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test) 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_linear(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_unify(self): 75 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 76 | # assert (pred_proba.min() >= 0) 77 | # assert (pred_proba.max() <= 1) 78 | # 79 | # def test_prediction_proba_parameter(self): 80 | # with assert_raises(ValueError): 81 | # self.clf.predict_proba(self.X_test, method='something') 82 | 83 | def test_prediction_labels_confidence(self): 84 | pred_labels, confidence = self.clf.predict(self.X_test, 85 | return_confidence=True) 86 | 87 | assert_equal(pred_labels.shape, self.y_test.shape) 88 | assert_equal(confidence.shape, self.y_test.shape) 89 | assert (confidence.min() >= 0) 90 | assert (confidence.max() <= 1) 91 | 92 | 93 | # def test_prediction_proba_linear_confidence(self): 94 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 95 | # method='linear', 96 | # return_confidence=True) 97 | # assert (pred_proba.min() >= 0) 98 | # assert (pred_proba.max() <= 1) 99 | # 100 | # assert_equal(confidence.shape, self.y_test.shape) 101 | # assert (confidence.min() >= 0) 102 | # assert (confidence.max() <= 1) 103 | # 104 | # def test_fit_predict(self): 105 | # pred_labels = self.clf.fit_predict(self.X_train) 106 | # assert_equal(pred_labels.shape, self.y_train.shape) 107 | # 108 | # def test_fit_predict_score(self): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test) 110 | # self.clf.fit_predict_score(self.X_test, self.y_test, 111 | # scoring='roc_auc_score') 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='prc_n_score') 114 | # with assert_raises(NotImplementedError): 115 | # self.clf.fit_predict_score(self.X_test, self.y_test, 116 | # scoring='something') 117 | # 118 | # def test_predict_rank(self): 119 | # pred_socres = self.clf.decision_function(self.X_test) 120 | # pred_ranks = self.clf._predict_rank(self.X_test) 121 | # 122 | # # assert the order is reserved 123 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 124 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 125 | # assert_array_less(-0.1, pred_ranks) 126 | # 127 | # def test_predict_rank_normalized(self): 128 | # pred_socres = self.clf.decision_function(self.X_test) 129 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 130 | # 131 | # # assert the order is reserved 132 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 133 | # assert_array_less(pred_ranks, 1.01) 134 | # assert_array_less(-0.1, pred_ranks) 135 | 136 | # def test_plot(self): 137 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 138 | # assert_array_less(0, os) 139 | 140 | # def test_model_clone(self): 141 | # clone_clf = clone(self.clf) 142 | 143 | def tearDown(self): 144 | pass 145 | 146 | 147 | if __name__ == '__main__': 148 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_slad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | 13 | # temporary solution for relative imports in case pyod is not installed 14 | # if deepod is installed, no need to use the following line 15 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from deepod.models.tabular.slad import SLAD 18 | from deepod.utils.data import generate_data 19 | 20 | 21 | class TestSLAD(unittest.TestCase): 22 | def setUp(self): 23 | self.n_train = 200 24 | self.n_test = 100 25 | self.contamination = 0.1 26 | self.roc_floor = 0.8 27 | self.X_train, self.X_test, self.y_train, self.y_test = generate_data( 28 | n_train=self.n_train, n_test=self.n_test, n_features=10, 29 | contamination=self.contamination, random_state=42) 30 | 31 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 32 | self.clf = SLAD(epochs=2, device=device) 33 | self.clf.fit(self.X_train) 34 | 35 | def test_parameters(self): 36 | assert (hasattr(self.clf, 'decision_scores_') and 37 | self.clf.decision_scores_ is not None) 38 | assert (hasattr(self.clf, 'labels_') and 39 | self.clf.labels_ is not None) 40 | assert (hasattr(self.clf, 'threshold_') and 41 | self.clf.threshold_ is not None) 42 | 43 | # def test_train_scores(self): 44 | # assert_equal(len(self.clf.decision_scores_), self.X_train.shape[0]) 45 | 46 | def test_prediction_scores(self): 47 | pred_scores = self.clf.decision_function(self.X_test) 48 | 49 | # check score shapes 50 | assert_equal(pred_scores.shape[0], self.X_test.shape[0]) 51 | 52 | # check performance 53 | # assert (roc_auc_score(self.y_test, pred_scores) >= self.roc_floor) 54 | 55 | def test_prediction_labels(self): 56 | pred_labels = self.clf.predict(self.X_test) 57 | assert_equal(pred_labels.shape, self.y_test.shape) 58 | 59 | # def test_prediction_proba(self): 60 | # pred_proba = self.clf.predict_proba(self.X_test) 61 | # assert (pred_proba.min() >= 0) 62 | # assert (pred_proba.max() <= 1) 63 | # 64 | # def test_prediction_proba_linear(self): 65 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 66 | # assert (pred_proba.min() >= 0) 67 | # assert (pred_proba.max() <= 1) 68 | # 69 | # def test_prediction_proba_unify(self): 70 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 71 | # assert (pred_proba.min() >= 0) 72 | # assert (pred_proba.max() <= 1) 73 | # 74 | # def test_prediction_proba_parameter(self): 75 | # with assert_raises(ValueError): 76 | # self.clf.predict_proba(self.X_test, method='something') 77 | 78 | def test_prediction_labels_confidence(self): 79 | pred_labels, confidence = self.clf.predict(self.X_test, 80 | return_confidence=True) 81 | 82 | assert_equal(pred_labels.shape, self.y_test.shape) 83 | assert_equal(confidence.shape, self.y_test.shape) 84 | assert (confidence.min() >= 0) 85 | assert (confidence.max() <= 1) 86 | 87 | # def test_prediction_proba_linear_confidence(self): 88 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 89 | # method='linear', 90 | # return_confidence=True) 91 | # assert (pred_proba.min() >= 0) 92 | # assert (pred_proba.max() <= 1) 93 | # 94 | # assert_equal(confidence.shape, self.y_test.shape) 95 | # assert (confidence.min() >= 0) 96 | # assert (confidence.max() <= 1) 97 | # 98 | # def test_fit_predict(self): 99 | # pred_labels = self.clf.fit_predict(self.X_train) 100 | # assert_equal(pred_labels.shape, self.y_train.shape) 101 | # 102 | # def test_fit_predict_score(self): 103 | # self.clf.fit_predict_score(self.X_test, self.y_test) 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='roc_auc_score') 106 | # self.clf.fit_predict_score(self.X_test, self.y_test, 107 | # scoring='prc_n_score') 108 | # with assert_raises(NotImplementedError): 109 | # self.clf.fit_predict_score(self.X_test, self.y_test, 110 | # scoring='something') 111 | # 112 | # def test_predict_rank(self): 113 | # pred_socres = self.clf.decision_function(self.X_test) 114 | # pred_ranks = self.clf._predict_rank(self.X_test) 115 | # 116 | # # assert the order is reserved 117 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 118 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 119 | # assert_array_less(-0.1, pred_ranks) 120 | # 121 | # def test_predict_rank_normalized(self): 122 | # pred_socres = self.clf.decision_function(self.X_test) 123 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 124 | # 125 | # # assert the order is reserved 126 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 127 | # assert_array_less(pred_ranks, 1.01) 128 | # assert_array_less(-0.1, pred_ranks) 129 | 130 | # def test_plot(self): 131 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 132 | # assert_array_less(0, os) 133 | 134 | # def test_model_clone(self): 135 | # clone_clf = clone(self.clf) 136 | 137 | def tearDown(self): 138 | pass 139 | 140 | 141 | if __name__ == '__main__': 142 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_tcned.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | import torch 12 | import pandas as pd 13 | 14 | # temporary solution for relative imports in case pyod is not installed 15 | # if deepod is installed, no need to use the following line 16 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 17 | 18 | from deepod.models.time_series.tcned import TcnED 19 | 20 | 21 | class TestTcnED(unittest.TestCase): 22 | def setUp(self): 23 | train_file = 'data/omi-1/omi-1_train.csv' 24 | test_file = 'data/omi-1/omi-1_test.csv' 25 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 26 | test_df = pd.read_csv(test_file, index_col=0) 27 | y = test_df['label'].values 28 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 29 | self.Xts_train = train_df.values 30 | self.Xts_test = test_df.values 31 | self.yts_test = y 32 | 33 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 34 | self.clf = TcnED(seq_len=100, stride=5, 35 | epochs=5, hidden_dims=50, 36 | device=device, random_state=42) 37 | self.clf.fit(self.Xts_train) 38 | 39 | def test_parameters(self): 40 | assert (hasattr(self.clf, 'decision_scores_') and 41 | self.clf.decision_scores_ is not None) 42 | assert (hasattr(self.clf, 'labels_') and 43 | self.clf.labels_ is not None) 44 | assert (hasattr(self.clf, 'threshold_') and 45 | self.clf.threshold_ is not None) 46 | 47 | def test_train_scores(self): 48 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 49 | 50 | def test_prediction_scores(self): 51 | pred_scores = self.clf.decision_function(self.Xts_test) 52 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 53 | 54 | def test_prediction_labels(self): 55 | pred_labels = self.clf.predict(self.Xts_test) 56 | assert_equal(pred_labels.shape, self.yts_test.shape) 57 | 58 | # def test_prediction_proba(self): 59 | # pred_proba = self.clf.predict_proba(self.X_test) 60 | # assert (pred_proba.min() >= 0) 61 | # assert (pred_proba.max() <= 1) 62 | # 63 | # def test_prediction_proba_linear(self): 64 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 65 | # assert (pred_proba.min() >= 0) 66 | # assert (pred_proba.max() <= 1) 67 | # 68 | # def test_prediction_proba_unify(self): 69 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 70 | # assert (pred_proba.min() >= 0) 71 | # assert (pred_proba.max() <= 1) 72 | # 73 | # def test_prediction_proba_parameter(self): 74 | # with assert_raises(ValueError): 75 | # self.clf.predict_proba(self.X_test, method='something') 76 | 77 | def test_prediction_labels_confidence(self): 78 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 79 | 80 | assert_equal(pred_labels.shape, self.yts_test.shape) 81 | assert_equal(confidence.shape, self.yts_test.shape) 82 | assert (confidence.min() >= 0) 83 | assert (confidence.max() <= 1) 84 | 85 | # def test_prediction_proba_linear_confidence(self): 86 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 87 | # method='linear', 88 | # return_confidence=True) 89 | # assert (pred_proba.min() >= 0) 90 | # assert (pred_proba.max() <= 1) 91 | # 92 | # assert_equal(confidence.shape, self.y_test.shape) 93 | # assert (confidence.min() >= 0) 94 | # assert (confidence.max() <= 1) 95 | # 96 | # def test_fit_predict(self): 97 | # pred_labels = self.clf.fit_predict(self.X_train) 98 | # assert_equal(pred_labels.shape, self.y_train.shape) 99 | # 100 | # def test_fit_predict_score(self): 101 | # self.clf.fit_predict_score(self.X_test, self.y_test) 102 | # self.clf.fit_predict_score(self.X_test, self.y_test, 103 | # scoring='roc_auc_score') 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='prc_n_score') 106 | # with assert_raises(NotImplementedError): 107 | # self.clf.fit_predict_score(self.X_test, self.y_test, 108 | # scoring='something') 109 | # 110 | # def test_predict_rank(self): 111 | # pred_socres = self.clf.decision_function(self.X_test) 112 | # pred_ranks = self.clf._predict_rank(self.X_test) 113 | # 114 | # # assert the order is reserved 115 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 116 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 117 | # assert_array_less(-0.1, pred_ranks) 118 | # 119 | # def test_predict_rank_normalized(self): 120 | # pred_socres = self.clf.decision_function(self.X_test) 121 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 122 | # 123 | # # assert the order is reserved 124 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 125 | # assert_array_less(pred_ranks, 1.01) 126 | # assert_array_less(-0.1, pred_ranks) 127 | 128 | # def test_plot(self): 129 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 130 | # assert_array_less(0, os) 131 | 132 | # def test_model_clone(self): 133 | # clone_clf = clone(self.clf) 134 | 135 | def tearDown(self): 136 | pass 137 | 138 | 139 | if __name__ == '__main__': 140 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_timesnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.timesnet import TimesNet 20 | 21 | 22 | class TestTimesNet(unittest.TestCase): 23 | def setUp(self): 24 | train_file = 'data/omi-1/omi-1_train.csv' 25 | test_file = 'data/omi-1/omi-1_test.csv' 26 | # test_file = 'data/omi-1/omi-1_test.csv' 27 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 28 | test_df = pd.read_csv(test_file, index_col=0) 29 | y = test_df['label'].values 30 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 31 | self.Xts_train = train_df.values 32 | self.Xts_test = test_df.values 33 | self.yts_test = y 34 | 35 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 36 | self.clf = TimesNet( 37 | seq_len=100, stride=1, epochs=2, 38 | batch_size=32, lr=1e-4, 39 | device=device, random_state=42 40 | ) 41 | 42 | self.clf.fit(self.Xts_train) 43 | 44 | def test_parameters(self): 45 | assert (hasattr(self.clf, 'decision_scores_') and 46 | self.clf.decision_scores_ is not None) 47 | assert (hasattr(self.clf, 'labels_') and 48 | self.clf.labels_ is not None) 49 | assert (hasattr(self.clf, 'threshold_') and 50 | self.clf.threshold_ is not None) 51 | 52 | def test_train_scores(self): 53 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 54 | 55 | def test_prediction_scores(self): 56 | pred_scores = self.clf.decision_function(self.Xts_test) 57 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 58 | 59 | def test_prediction_labels(self): 60 | pred_labels = self.clf.predict(self.Xts_test) 61 | assert_equal(pred_labels.shape, self.yts_test.shape) 62 | 63 | # def test_prediction_proba(self): 64 | # pred_proba = self.clf.predict_proba(self.X_test) 65 | # assert (pred_proba.min() >= 0) 66 | # assert (pred_proba.max() <= 1) 67 | # 68 | # def test_prediction_proba_linear(self): 69 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 70 | # assert (pred_proba.min() >= 0) 71 | # assert (pred_proba.max() <= 1) 72 | # 73 | # def test_prediction_proba_unify(self): 74 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 75 | # assert (pred_proba.min() >= 0) 76 | # assert (pred_proba.max() <= 1) 77 | # 78 | # def test_prediction_proba_parameter(self): 79 | # with assert_raises(ValueError): 80 | # self.clf.predict_proba(self.X_test, method='something') 81 | 82 | def test_prediction_labels_confidence(self): 83 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 84 | 85 | assert_equal(pred_labels.shape, self.yts_test.shape) 86 | assert_equal(confidence.shape, self.yts_test.shape) 87 | assert (confidence.min() >= 0) 88 | assert (confidence.max() <= 1) 89 | 90 | # def test_prediction_proba_linear_confidence(self): 91 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 92 | # method='linear', 93 | # return_confidence=True) 94 | # assert (pred_proba.min() >= 0) 95 | # assert (pred_proba.max() <= 1) 96 | # 97 | # assert_equal(confidence.shape, self.y_test.shape) 98 | # assert (confidence.min() >= 0) 99 | # assert (confidence.max() <= 1) 100 | # 101 | # def test_fit_predict(self): 102 | # pred_labels = self.clf.fit_predict(self.X_train) 103 | # assert_equal(pred_labels.shape, self.y_train.shape) 104 | # 105 | # def test_fit_predict_score(self): 106 | # self.clf.fit_predict_score(self.X_test, self.y_test) 107 | # self.clf.fit_predict_score(self.X_test, self.y_test, 108 | # scoring='roc_auc_score') 109 | # self.clf.fit_predict_score(self.X_test, self.y_test, 110 | # scoring='prc_n_score') 111 | # with assert_raises(NotImplementedError): 112 | # self.clf.fit_predict_score(self.X_test, self.y_test, 113 | # scoring='something') 114 | # 115 | # def test_predict_rank(self): 116 | # pred_socres = self.clf.decision_function(self.X_test) 117 | # pred_ranks = self.clf._predict_rank(self.X_test) 118 | # 119 | # # assert the order is reserved 120 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 121 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 122 | # assert_array_less(-0.1, pred_ranks) 123 | # 124 | # def test_predict_rank_normalized(self): 125 | # pred_socres = self.clf.decision_function(self.X_test) 126 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 127 | # 128 | # # assert the order is reserved 129 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 130 | # assert_array_less(pred_ranks, 1.01) 131 | # assert_array_less(-0.1, pred_ranks) 132 | 133 | # def test_plot(self): 134 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 135 | # assert_array_less(0, os) 136 | 137 | # def test_model_clone(self): 138 | # clone_clf = clone(self.clf) 139 | 140 | def tearDown(self): 141 | pass 142 | 143 | 144 | if __name__ == '__main__': 145 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_tranad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.tranad import TranAD 20 | 21 | 22 | class TestTranAD(unittest.TestCase): 23 | def setUp(self): 24 | train_file = 'data/omi-1/omi-1_train.csv' 25 | test_file = 'data/omi-1/omi-1_test.csv' 26 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 27 | test_df = pd.read_csv(test_file, index_col=0) 28 | y = test_df['label'].values 29 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 30 | self.Xts_train = train_df.values 31 | self.Xts_test = test_df.values 32 | self.yts_test = y 33 | 34 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 35 | self.clf = TranAD(seq_len=100, stride=5, epochs=2, 36 | device=device, random_state=42) 37 | self.clf.fit(self.Xts_train) 38 | 39 | def test_parameters(self): 40 | assert (hasattr(self.clf, 'decision_scores_') and 41 | self.clf.decision_scores_ is not None) 42 | assert (hasattr(self.clf, 'labels_') and 43 | self.clf.labels_ is not None) 44 | assert (hasattr(self.clf, 'threshold_') and 45 | self.clf.threshold_ is not None) 46 | 47 | def test_train_scores(self): 48 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 49 | 50 | def test_prediction_scores(self): 51 | pred_scores = self.clf.decision_function(self.Xts_test) 52 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 53 | 54 | def test_prediction_labels(self): 55 | pred_labels = self.clf.predict(self.Xts_test) 56 | assert_equal(pred_labels.shape, self.yts_test.shape) 57 | 58 | # def test_prediction_proba(self): 59 | # pred_proba = self.clf.predict_proba(self.X_test) 60 | # assert (pred_proba.min() >= 0) 61 | # assert (pred_proba.max() <= 1) 62 | # 63 | # def test_prediction_proba_linear(self): 64 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 65 | # assert (pred_proba.min() >= 0) 66 | # assert (pred_proba.max() <= 1) 67 | # 68 | # def test_prediction_proba_unify(self): 69 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 70 | # assert (pred_proba.min() >= 0) 71 | # assert (pred_proba.max() <= 1) 72 | # 73 | # def test_prediction_proba_parameter(self): 74 | # with assert_raises(ValueError): 75 | # self.clf.predict_proba(self.X_test, method='something') 76 | 77 | def test_prediction_labels_confidence(self): 78 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 79 | 80 | assert_equal(pred_labels.shape, self.yts_test.shape) 81 | assert_equal(confidence.shape, self.yts_test.shape) 82 | assert (confidence.min() >= 0) 83 | assert (confidence.max() <= 1) 84 | 85 | # def test_prediction_proba_linear_confidence(self): 86 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 87 | # method='linear', 88 | # return_confidence=True) 89 | # assert (pred_proba.min() >= 0) 90 | # assert (pred_proba.max() <= 1) 91 | # 92 | # assert_equal(confidence.shape, self.y_test.shape) 93 | # assert (confidence.min() >= 0) 94 | # assert (confidence.max() <= 1) 95 | # 96 | # def test_fit_predict(self): 97 | # pred_labels = self.clf.fit_predict(self.X_train) 98 | # assert_equal(pred_labels.shape, self.y_train.shape) 99 | # 100 | # def test_fit_predict_score(self): 101 | # self.clf.fit_predict_score(self.X_test, self.y_test) 102 | # self.clf.fit_predict_score(self.X_test, self.y_test, 103 | # scoring='roc_auc_score') 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='prc_n_score') 106 | # with assert_raises(NotImplementedError): 107 | # self.clf.fit_predict_score(self.X_test, self.y_test, 108 | # scoring='something') 109 | # 110 | # def test_predict_rank(self): 111 | # pred_socres = self.clf.decision_function(self.X_test) 112 | # pred_ranks = self.clf._predict_rank(self.X_test) 113 | # 114 | # # assert the order is reserved 115 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 116 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 117 | # assert_array_less(-0.1, pred_ranks) 118 | # 119 | # def test_predict_rank_normalized(self): 120 | # pred_socres = self.clf.decision_function(self.X_test) 121 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 122 | # 123 | # # assert the order is reserved 124 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 125 | # assert_array_less(pred_ranks, 1.01) 126 | # assert_array_less(-0.1, pred_ranks) 127 | 128 | # def test_plot(self): 129 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 130 | # assert_array_less(0, os) 131 | 132 | # def test_model_clone(self): 133 | # clone_clf = clone(self.clf) 134 | 135 | def tearDown(self): 136 | pass 137 | 138 | 139 | if __name__ == '__main__': 140 | unittest.main() -------------------------------------------------------------------------------- /deepod/test/test_usad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import sys 7 | import unittest 8 | 9 | # noinspection PyProtectedMember 10 | from numpy.testing import assert_equal 11 | from sklearn.metrics import roc_auc_score 12 | import torch 13 | import pandas as pd 14 | 15 | # temporary solution for relative imports in case pyod is not installed 16 | # if deepod is installed, no need to use the following line 17 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 18 | 19 | from deepod.models.time_series.usad import USAD 20 | 21 | class TestUSAD(unittest.TestCase): 22 | def setUp(self): 23 | train_file = 'data/omi-1/omi-1_train.csv' 24 | test_file = 'data/omi-1/omi-1_test.csv' 25 | train_df = pd.read_csv(train_file, sep=',', index_col=0) 26 | test_df = pd.read_csv(test_file, index_col=0) 27 | y = test_df['label'].values 28 | train_df, test_df = train_df.drop('label', axis=1), test_df.drop('label', axis=1) 29 | self.Xts_train = train_df.values 30 | self.Xts_test = test_df.values 31 | self.yts_test = y 32 | 33 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 34 | self.clf = USAD(seq_len=100, stride=5, 35 | epochs=5, hidden_dims=50, 36 | device=device, random_state=42) 37 | self.clf.fit(self.Xts_train) 38 | 39 | def test_parameters(self): 40 | assert (hasattr(self.clf, 'decision_scores_') and 41 | self.clf.decision_scores_ is not None) 42 | assert (hasattr(self.clf, 'labels_') and 43 | self.clf.labels_ is not None) 44 | assert (hasattr(self.clf, 'threshold_') and 45 | self.clf.threshold_ is not None) 46 | 47 | def test_train_scores(self): 48 | assert_equal(len(self.clf.decision_scores_), self.Xts_train.shape[0]) 49 | 50 | def test_prediction_scores(self): 51 | pred_scores = self.clf.decision_function(self.Xts_test) 52 | assert_equal(pred_scores.shape[0], self.Xts_test.shape[0]) 53 | 54 | def test_prediction_labels(self): 55 | pred_labels = self.clf.predict(self.Xts_test) 56 | assert_equal(pred_labels.shape, self.yts_test.shape) 57 | 58 | # def test_prediction_proba(self): 59 | # pred_proba = self.clf.predict_proba(self.X_test) 60 | # assert (pred_proba.min() >= 0) 61 | # assert (pred_proba.max() <= 1) 62 | # 63 | # def test_prediction_proba_linear(self): 64 | # pred_proba = self.clf.predict_proba(self.X_test, method='linear') 65 | # assert (pred_proba.min() >= 0) 66 | # assert (pred_proba.max() <= 1) 67 | # 68 | # def test_prediction_proba_unify(self): 69 | # pred_proba = self.clf.predict_proba(self.X_test, method='unify') 70 | # assert (pred_proba.min() >= 0) 71 | # assert (pred_proba.max() <= 1) 72 | # 73 | # def test_prediction_proba_parameter(self): 74 | # with assert_raises(ValueError): 75 | # self.clf.predict_proba(self.X_test, method='something') 76 | 77 | def test_prediction_labels_confidence(self): 78 | pred_labels, confidence = self.clf.predict(self.Xts_test, return_confidence=True) 79 | 80 | assert_equal(pred_labels.shape, self.yts_test.shape) 81 | assert_equal(confidence.shape, self.yts_test.shape) 82 | assert (confidence.min() >= 0) 83 | assert (confidence.max() <= 1) 84 | 85 | # def test_prediction_proba_linear_confidence(self): 86 | # pred_proba, confidence = self.clf.predict_proba(self.X_test, 87 | # method='linear', 88 | # return_confidence=True) 89 | # assert (pred_proba.min() >= 0) 90 | # assert (pred_proba.max() <= 1) 91 | # 92 | # assert_equal(confidence.shape, self.y_test.shape) 93 | # assert (confidence.min() >= 0) 94 | # assert (confidence.max() <= 1) 95 | # 96 | # def test_fit_predict(self): 97 | # pred_labels = self.clf.fit_predict(self.X_train) 98 | # assert_equal(pred_labels.shape, self.y_train.shape) 99 | # 100 | # def test_fit_predict_score(self): 101 | # self.clf.fit_predict_score(self.X_test, self.y_test) 102 | # self.clf.fit_predict_score(self.X_test, self.y_test, 103 | # scoring='roc_auc_score') 104 | # self.clf.fit_predict_score(self.X_test, self.y_test, 105 | # scoring='prc_n_score') 106 | # with assert_raises(NotImplementedError): 107 | # self.clf.fit_predict_score(self.X_test, self.y_test, 108 | # scoring='something') 109 | # 110 | # def test_predict_rank(self): 111 | # pred_socres = self.clf.decision_function(self.X_test) 112 | # pred_ranks = self.clf._predict_rank(self.X_test) 113 | # 114 | # # assert the order is reserved 115 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 116 | # assert_array_less(pred_ranks, self.X_train.shape[0] + 1) 117 | # assert_array_less(-0.1, pred_ranks) 118 | # 119 | # def test_predict_rank_normalized(self): 120 | # pred_socres = self.clf.decision_function(self.X_test) 121 | # pred_ranks = self.clf._predict_rank(self.X_test, normalized=True) 122 | # 123 | # # assert the order is reserved 124 | # assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=3) 125 | # assert_array_less(pred_ranks, 1.01) 126 | # assert_array_less(-0.1, pred_ranks) 127 | 128 | # def test_plot(self): 129 | # os, cutoff1, cutoff2 = self.clf.explain_outlier(ind=1) 130 | # assert_array_less(0, os) 131 | 132 | # def test_model_clone(self): 133 | # clone_clf = clone(self.clf) 134 | 135 | def tearDown(self): 136 | pass 137 | 138 | 139 | if __name__ == '__main__': 140 | unittest.main() -------------------------------------------------------------------------------- /deepod/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/deepod/utils/__init__.py -------------------------------------------------------------------------------- /deepod/utils/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Utility functions for manipulating data 3 | """ 4 | # Author: Yue Zhao 5 | # Author: Yahya Almardeny 6 | # License: BSD 2 clause 7 | 8 | from warnings import warn 9 | from sklearn.utils import check_X_y 10 | from sklearn.utils import check_random_state 11 | from sklearn.utils import check_consistent_length 12 | import numpy as np 13 | 14 | 15 | def _generate_data(n_inliers, n_outliers, n_features, coef, offset, 16 | random_state, n_nan=0, n_inf=0): 17 | """Internal function to generate data samples. 18 | 19 | Parameters 20 | ---------- 21 | n_inliers : int 22 | The number of inliers. 23 | 24 | n_outliers : int 25 | The number of outliers. 26 | 27 | n_features : int 28 | The number of features (dimensions). 29 | 30 | coef : float in range [0,1)+0.001 31 | The coefficient of data generation. 32 | 33 | offset : int 34 | Adjust the value range of Gaussian and Uniform. 35 | 36 | random_state : int, RandomState instance or None, optional (default=None) 37 | If int, random_state is the seed used by the random number generator; 38 | If RandomState instance, random_state is the random number generator; 39 | If None, the random number generator is the RandomState instance used 40 | by `np.random`. 41 | 42 | n_nan : int 43 | The number of values that are missing (np.NaN). Defaults to zero. 44 | 45 | n_inf : int 46 | The number of values that are infinite. (np.infty). Defaults to zero. 47 | 48 | Returns 49 | ------- 50 | X : numpy array of shape (n_train, n_features) 51 | Data. 52 | 53 | y : numpy array of shape (n_train,) 54 | Ground truth. 55 | """ 56 | 57 | inliers = coef * random_state.randn(n_inliers, n_features) + offset 58 | outliers = random_state.uniform(low=-1 * offset, high=offset, 59 | size=(n_outliers, n_features)) 60 | X = np.r_[inliers, outliers] 61 | 62 | y = np.r_[np.zeros((n_inliers,)), np.ones((n_outliers,))] 63 | 64 | if n_nan > 0: 65 | X = np.r_[X, np.full((n_nan, n_features), np.NaN)] 66 | y = np.r_[y, np.full((n_nan), np.NaN)] 67 | 68 | if n_inf > 0: 69 | X = np.r_[X, np.full((n_inf, n_features), np.infty)] 70 | y = np.r_[y, np.full((n_inf), np.infty)] 71 | 72 | return X, y 73 | 74 | 75 | def generate_data(n_train=1000, n_test=500, n_features=2, contamination=0.1, 76 | train_only=False, offset=10, 77 | random_state=None, n_nan=0, n_inf=0): 78 | """Utility function to generate synthesized data. 79 | Normal data is generated by a multivariate Gaussian distribution and 80 | outliers are generated by a uniform distribution. 81 | "X_train, X_test, y_train, y_test" are returned. 82 | 83 | Parameters 84 | ---------- 85 | n_train : int, (default=1000) 86 | The number of training points to generate. 87 | 88 | n_test : int, (default=500) 89 | The number of test points to generate. 90 | 91 | n_features : int, optional (default=2) 92 | The number of features (dimensions). 93 | 94 | contamination : float in (0., 0.5), optional (default=0.1) 95 | The amount of contamination of the data set, i.e. 96 | the proportion of outliers in the data set. Used when fitting to 97 | define the threshold on the decision function. 98 | 99 | train_only : bool, optional (default=False) 100 | If true, generate train data only. 101 | 102 | offset : int, optional (default=10) 103 | Adjust the value range of Gaussian and Uniform. 104 | 105 | random_state : int, RandomState instance or None, optional (default=None) 106 | If int, random_state is the seed used by the random number generator; 107 | If RandomState instance, random_state is the random number generator; 108 | If None, the random number generator is the RandomState instance used 109 | by `np.random`. 110 | 111 | n_nan : int 112 | The number of values that are missing (np.NaN). Defaults to zero. 113 | 114 | n_inf : int 115 | The number of values that are infinite. (np.infty). Defaults to zero. 116 | 117 | Returns 118 | ------- 119 | X_train : numpy array of shape (n_train, n_features) 120 | Training data. 121 | 122 | X_test : numpy array of shape (n_test, n_features) 123 | Test data. 124 | 125 | y_train : numpy array of shape (n_train,) 126 | Training ground truth. 127 | 128 | y_test : numpy array of shape (n_test,) 129 | Test ground truth. 130 | 131 | """ 132 | 133 | # initialize a random state and seeds for the instance 134 | random_state = check_random_state(random_state) 135 | offset_ = random_state.randint(low=offset) 136 | coef_ = random_state.random_sample() + 0.001 # in case of underflow 137 | 138 | n_outliers_train = int(n_train * contamination) 139 | n_inliers_train = int(n_train - n_outliers_train) 140 | 141 | X_train, y_train = _generate_data(n_inliers_train, n_outliers_train, 142 | n_features, coef_, offset_, random_state, 143 | n_nan, n_inf) 144 | 145 | if train_only: 146 | return X_train, y_train 147 | 148 | n_outliers_test = int(n_test * contamination) 149 | n_inliers_test = int(n_test - n_outliers_test) 150 | 151 | X_test, y_test = _generate_data(n_inliers_test, n_outliers_test, 152 | n_features, coef_, offset_, random_state, 153 | n_nan, n_inf) 154 | 155 | return X_train, X_test, y_train, y_test 156 | 157 | -------------------------------------------------------------------------------- /deepod/utils/utility.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics 3 | 4 | 5 | def get_sub_seqs(x_arr, seq_len=100, stride=1): 6 | """ 7 | 8 | Parameters 9 | ---------- 10 | x_arr: np.array, required 11 | input original data with shape [time_length, channels] 12 | 13 | seq_len: int, optional (default=100) 14 | Size of window used to create subsequences from the data 15 | 16 | stride: int, optional (default=1) 17 | number of time points the window will move between two subsequences 18 | 19 | Returns 20 | ------- 21 | x_seqs: np.array 22 | Split sub-sequences of input time-series data 23 | """ 24 | 25 | if x_arr.shape[0] < seq_len: 26 | seq_len = x_arr.shape[0] 27 | seq_starts = np.arange(0, x_arr.shape[0] - seq_len + 1, stride) 28 | x_seqs = np.array([x_arr[i:i + seq_len] for i in seq_starts]) 29 | 30 | return x_seqs 31 | 32 | 33 | def get_sub_seqs_label(y, seq_len=100, stride=1): 34 | """ 35 | 36 | Parameters 37 | ---------- 38 | y: np.array, required 39 | data labels 40 | 41 | seq_len: int, optional (default=100) 42 | Size of window used to create subsequences from the data 43 | 44 | stride: int, optional (default=1) 45 | number of time points the window will move between two subsequences 46 | 47 | Returns 48 | ------- 49 | y_seqs: np.array 50 | Split label of each sequence 51 | """ 52 | if y.shape[0] < seq_len: 53 | seq_len = y.shape[0] 54 | 55 | seq_starts = np.arange(0, y.shape[0] - seq_len + 1, stride) 56 | ys = np.array([y[i:i + seq_len] for i in seq_starts]) 57 | y = np.sum(ys, axis=1) / seq_len 58 | 59 | y_binary = np.zeros_like(y) 60 | y_binary[np.where(y!=0)[0]] = 1 61 | return y_binary 62 | 63 | 64 | -------------------------------------------------------------------------------- /deepod/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | ``deepod`` is a deep learning-based outlier/anomaly detection framework. 3 | """ 4 | # Based on pysad package, pyod package 5 | # License: 6 | 7 | # PEP0440 compatible formatted version, see: 8 | # https://www.python.org/dev/peps/pep-0440/ 9 | # 10 | # Generic release markers: 11 | # X.Y 12 | # X.Y.Z # For bugfix releases 13 | # 14 | # Admissible pre-release markers: 15 | # X.YaN # Alpha release 16 | # X.YbN # Beta release 17 | # X.YrcN # Release Candidate 18 | # X.Y # Final release 19 | # 20 | # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. 21 | # 'X.Y.dev0' is the canonical version of 'X.Y.dev' 22 | # 23 | 24 | __version__ = '0.5.0' 25 | -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://gems.ruby-china.com" 2 | gem "jekyll-rtd-theme" 3 | 4 | gem "github-pages", group: :jekyll_plugins -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | DEBUG=JEKYLL_GITHUB_TOKEN=blank PAGES_API_URL=http://0.0.0.0 2 | 3 | default: 4 | @gem install jekyll bundler && bundle install 5 | 6 | update: 7 | @bundle update 8 | 9 | clean: 10 | @bundle exec jekyll clean 11 | 12 | build: clean 13 | @${DEBUG} bundle exec jekyll build --profile --config _config.yml,.debug.yml 14 | 15 | server: clean 16 | @${DEBUG} bundle exec jekyll server --livereload --config _config.yml,.debug.yml -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | .. currentmodule:: {{ module }} 4 | .. autoclass:: {{ objname }} 5 | {% block methods %} 6 | {% if methods %} 7 | .. rubric:: Methods 8 | .. autosummary:: 9 | {% for item in methods %} 10 | ~{{ name }}.{{ item }} 11 | {%- endfor %} 12 | {% endif %} 13 | {% endblock %} 14 | {% block attributes %} 15 | {% if attributes %} 16 | .. rubric:: Attributes 17 | .. autosummary:: 18 | {% for item in attributes %} 19 | ~{{ name }}.{{ item }} 20 | {%- endfor %} 21 | {% endif %} 22 | {% endblock %} -------------------------------------------------------------------------------- /docs/additional.contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============= 3 | 4 | Everyone are very welcome to contribute. 5 | 6 | We share the same values of the `scikit-learn `_ community 7 | 8 | 9 | .. note:: 10 | We are a community based on openness and friendly, didactic, discussions. 11 | 12 | We aspire to treat everybody equally, and value their contributions. We are particularly seeking people 13 | from underrepresented backgrounds in Open Source Software and scikit-learn in particular to participate 14 | and contribute their expertise and experience. 15 | 16 | Decisions are made based on technical merit and consensus. 17 | 18 | Code is not the only way to help the project. Reviewing pull requests, 19 | answering questions to help others on mailing lists or issues, organizing and teaching tutorials, 20 | working on the website, improving the documentation, are all priceless contributions. 21 | 22 | We abide by the principles of openness, respect, and consideration of others of the Python 23 | Software Foundation: https://www.python.org/psf/codeofconduct/ 24 | 25 | In case you experience issues using this package, do not hesitate to submit a ticket to the GitHub issue tracker. 26 | You are also welcome to post feature requests or pull requests. 27 | 28 | 29 | 30 | For any questions, you may open issue on Github or drop me an email at hongzuoxu(at)126.com. 31 | 32 | 33 | TODO list 34 | --------- 35 | We attach a TODO list below, we are very pleased if you can contribute anything on this list. 36 | -------------------------------------------------------------------------------- /docs/additional.license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | 5 | This project is licensed under the BSD 2-Clause License. 6 | 7 | .. code-block:: 8 | 9 | BSD 2-Clause License 10 | 11 | Copyright (c) 2023, Hongzuo Xu All rights reserved. 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are met: 15 | 16 | 1. Redistributions of source code must retain the above copyright notice, this 17 | list of conditions and the following disclaimer. 18 | 19 | 2. Redistributions in binary form must reproduce the above copyright notice, 20 | this list of conditions and the following disclaimer in the documentation 21 | and/or other materials provided with the distribution. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 26 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 27 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | 34 | -------------------------------------------------------------------------------- /docs/additional.star_history.rst: -------------------------------------------------------------------------------- 1 | Star History on Github 2 | ====================== 3 | 4 | Thanks goes to those who give us stars and fork our repository. 5 | 6 | 7 | .. image:: https://api.star-history.com/svg?repos=xuhongzuo/DeepOD&type=Date 8 | :target: https://star-history.com/#xuhongzuo/DeepOD&Date 9 | :align: center 10 | 11 | -------------------------------------------------------------------------------- /docs/api_cc.rst: -------------------------------------------------------------------------------- 1 | API CheatSheet 2 | ============== 3 | 4 | The following APIs are applicable for all detector models for easy use. 5 | 6 | * :func:`deepod.core.base_model.BaseDeepAD.fit`: Fit detector. y is ignored in unsupervised methods. 7 | * :func:`deepod.core.base_model.BaseDeepAD.decision_function`: Predict raw anomaly score of X using the fitted detector. 8 | * :func:`deepod.core.base_model.BaseDeepAD.predict`: Predict if a particular sample is an outlier or not using the fitted detector. 9 | 10 | 11 | Key Attributes of a fitted model: 12 | 13 | * :attr:`deepod.core.base_model.BaseDeepAD.decision_scores_`: The outlier scores of the training data. The higher, the more abnormal. 14 | Outliers tend to have higher scores. 15 | * :attr:`deepod.core.base_model.BaseDeepAD.labels_`: The binary labels of the training data. 0 stands for inliers and 1 for outliers/anomalies. 16 | 17 | 18 | See base class definition below: 19 | 20 | deepod.core.base_model module 21 | ----------------------- 22 | 23 | .. automodule:: deepod.core.base_model 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | :inherited-members: 28 | 29 | -------------------------------------------------------------------------------- /docs/api_reference.base_networks.rst: -------------------------------------------------------------------------------- 1 | Network Architectures 2 | ------------------------------------ 3 | 4 | 5 | 6 | .. currentmodule:: deepod 7 | 8 | .. autosummary:: 9 | :nosignatures: 10 | :template: class.rst 11 | :toctree: generated 12 | 13 | core.networks.MLPnet 14 | core.networks.MlpAE 15 | core.networks.GRUNet 16 | core.networks.LSTMNet 17 | core.networks.ConvSeqEncoder 18 | core.networks.ConvNet 19 | core.networks.TcnAE 20 | core.networks.TCNnet 21 | core.networks.TSTransformerEncoder 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/api_reference.metrics.rst: -------------------------------------------------------------------------------- 1 | Evaluation Metrics 2 | =================== 3 | 4 | 5 | 6 | 7 | .. automodule:: deepod.metrics 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | :inherited-members: 12 | 13 | .. rubric:: References 14 | 15 | .. bibliography:: 16 | :cited: 17 | :labelprefix: B -------------------------------------------------------------------------------- /docs/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ------------- 3 | 4 | This is the API documentation for ``DeepOD``. 5 | 6 | 7 | .. toctree:: 8 | 9 | api_reference.tabular 10 | api_reference.time_series 11 | api_reference.base_networks 12 | api_reference.metrics 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/api_reference.tabular.rst: -------------------------------------------------------------------------------- 1 | Models for Tabular Data 2 | ------------------------------------------ 3 | 4 | 5 | .. automodule:: deepod.models.tabular 6 | :no-members: 7 | :no-inherited-members: 8 | 9 | 10 | .. currentmodule:: deepod 11 | 12 | 13 | Unsupervised Models 14 | ^^^^^^^^^^^^^^^^^^^^^ 15 | implemented unsupervised anomaly detection models 16 | 17 | .. autosummary:: 18 | :nosignatures: 19 | :template: class.rst 20 | :toctree: generated 21 | 22 | models.DeepSVDD 23 | models.RCA 24 | models.DevNet 25 | models.DeepIsolationForest 26 | models.REPEN 27 | models.SLAD 28 | models.ICL 29 | models.RDP 30 | models.GOAD 31 | models.NeuTraL 32 | 33 | Weakly-supervised Models 34 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 35 | implemented weakly-sueprvised anomaly detection models 36 | 37 | .. autosummary:: 38 | :nosignatures: 39 | :template: class.rst 40 | :toctree: generated 41 | 42 | models.DevNet 43 | models.DeepSAD 44 | models.FeaWAD 45 | models.RoSAS 46 | models.PReNet 47 | 48 | 49 | .. rubric:: References 50 | 51 | .. bibliography:: 52 | :cited: 53 | :labelprefix: B -------------------------------------------------------------------------------- /docs/api_reference.time_series.rst: -------------------------------------------------------------------------------- 1 | Models for Time Series 2 | ======================================== 3 | 4 | 5 | .. automodule:: deepod.models.time_series 6 | :no-members: 7 | :no-inherited-members: 8 | 9 | 10 | .. currentmodule:: deepod 11 | 12 | 13 | 14 | implemented unsupervised anomaly detection models for time series data. 15 | 16 | .. autosummary:: 17 | :nosignatures: 18 | :template: class.rst 19 | :toctree: generated 20 | 21 | models.TimesNet 22 | models.DCdetector 23 | models.AnomalyTransformer 24 | models.NCAD 25 | models.TranAD 26 | models.COUTA 27 | models.TcnED 28 | models.DeepIsolationForestTS 29 | models.DeepSVDDTS 30 | models.DeepSADTS 31 | models.DevNetTS 32 | models.PReNetTS 33 | 34 | 35 | 36 | .. rubric:: References 37 | 38 | .. bibliography:: 39 | :cited: 40 | :labelprefix: B 41 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | 16 | import os 17 | import sys 18 | from os.path import dirname, abspath 19 | 20 | sys.path.insert(0, abspath('..')) 21 | deepod_dir = dirname(dirname(abspath(__file__))) 22 | 23 | version_path = os.path.join(deepod_dir, 'deepod', 'version.py') 24 | exec(open(version_path).read()) 25 | # -- Project information ----------------------------------------------------- 26 | 27 | project = 'DeepOD' 28 | copyright = '2023, Hongzuo Xu' 29 | author = 'Hongzuo Xu' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.doctest', 44 | 'sphinx.ext.intersphinx', 45 | 'sphinx.ext.coverage', 46 | 'sphinx.ext.imgmath', 47 | 'sphinx.ext.viewcode', 48 | 'sphinx.ext.autosummary', 49 | 'sphinxcontrib.bibtex', 50 | # 'sphinx.ext.napoleon', 51 | 'sphinx_rtd_theme', 52 | 'sphinx.ext.napoleon' 53 | ] 54 | 55 | bibtex_bibfiles = ['zreferences.bib'] 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ['_templates'] 59 | 60 | # The suffix(es) of source filenames. 61 | # You can specify multiple suffix as a list of string: 62 | # 63 | source_suffix = ['.rst', '.md'] 64 | 65 | # The master toctree document. 66 | master_doc = 'index' 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = 'en' 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This pattern also affects html_static_path and html_extra_path . 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'pyod.test.rst'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | # -- Options for HTML output ------------------------------------------------- 84 | 85 | # The theme to use for HTML and HTML Help pages. See the documentation for 86 | # a list of builtin themes. 87 | # https://www.sphinx-doc.org/en/master/usage/theming.html#themes#Themes 88 | # html_theme = 'default' 89 | # html_theme = "alabaster" 90 | # html_theme = 'sphinxawesome_theme' 91 | html_theme = 'furo' 92 | # html_theme = 'sphinx_rtd_theme' 93 | 94 | # Theme options are theme-specific and customize the look and feel of a theme 95 | # further. For a list of options available for each theme, see the 96 | # documentation. 97 | # 98 | # html_theme_options = { 99 | # 'canonical_url': '', 100 | # 'logo_only': False, 101 | # 'display_version': True, 102 | # 'prev_next_buttons_location': 'bottom', 103 | # 'style_external_links': False, 104 | # #'vcs_pageview_mode': '', 105 | # #'style_nav_header_background': 'white', 106 | # # Toc options 107 | # 'collapse_navigation': True, 108 | # 'sticky_navigation': True, 109 | # 'navigation_depth': 7, 110 | # 'includehidden': True, 111 | # 'titles_only': False, 112 | # } 113 | 114 | # Add any paths that contain custom static files (such as style sheets) here, 115 | # relative to this directory. They are copied after the builtin static files, 116 | # so a file named "default.css" will overwrite the builtin "default.css". 117 | html_static_path = ['_static'] 118 | 119 | autosummary_generate = True 120 | autodoc_default_options = {'members': True, 121 | 'inherited-members': True, 122 | } 123 | autodoc_typehints = "none" 124 | 125 | 126 | # Custom sidebar templates, must be a dictionary that maps document names 127 | # to template names. 128 | # 129 | # The default sidebars (for documents that don't match any pattern) are 130 | # defined by theme itself. Builtin themes are using these templates by 131 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 132 | # 'searchbox.html']``. 133 | # 134 | # html_sidebars = {} 135 | # html_sidebars = {'**': ['globaltoc.html', 'relations.html', 'sourcelink.html', 136 | # 'searchbox.html']} 137 | 138 | # -- Options for HTMLHelp output --------------------------------------------- 139 | 140 | # Output file base name for HTML help builder. 141 | htmlhelp_basename = 'deepoddoc' 142 | 143 | # -- Options for LaTeX output ------------------------------------------------ 144 | 145 | latex_elements = { 146 | # The paper size ('letterpaper' or 'a4paper'). 147 | # 148 | # 'papersize': 'letterpaper', 149 | 150 | # The font size ('10pt', '11pt' or '12pt'). 151 | # 152 | # 'pointsize': '10pt', 153 | 154 | # Additional stuff for the LaTeX preamble. 155 | # 156 | # 'preamble': '', 157 | 158 | # Latex figure (float) alignment 159 | # 160 | # 'figure_align': 'htbp', 161 | } 162 | 163 | # Grouping the document tree_ into LaTeX files. List of tuples 164 | # (source start file, target name, title, 165 | # author, documentclass [howto, manual, or own class]). 166 | latex_documents = [ 167 | (master_doc, 'deepod.tex', 'deepod Documentation', 168 | 'Hongzuo Xu', 'manual'), 169 | ] 170 | 171 | # -- Options for manual page output ------------------------------------------ 172 | 173 | # One entry per manual page. List of tuples 174 | # (source start file, name, description, authors, manual section). 175 | man_pages = [ 176 | (master_doc, 'pyod', 'pyod Documentation', 177 | [author], 1) 178 | ] 179 | 180 | # -- Options for Texinfo output ---------------------------------------------- 181 | 182 | # Grouping the document tree_ into Texinfo files. List of tuples 183 | # (source start file, target name, title, author, 184 | # dir menu entry, description, category) 185 | texinfo_documents = [ 186 | (master_doc, 'DeepOD', 'DeepOD Documentation', 187 | author, 'DeepOD', 'One line description of project.', 188 | 'Miscellaneous'), 189 | ] 190 | 191 | # -- Extension configuration ------------------------------------------------- 192 | 193 | # -- Options for intersphinx extension --------------------------------------- 194 | 195 | # Example configuration for intersphinx: refer to the Python standard library. 196 | intersphinx_mapping = {'https://docs.python.org/': None} 197 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. DeepOD documentation master file, created by 3 | sphinx-quickstart on Tue Nov 7 21:28:52 2023. 4 | You can adapt this file completely to your liking, but it should at least 5 | contain the root `toctree` directive. 6 | 7 | 8 | Welcome to DeepOD documentation! 9 | ================================== 10 | 11 | 12 | .. image:: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml/badge.svg 13 | :target: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml 14 | :alt: testing2 15 | 16 | .. image:: https://readthedocs.org/projects/deepod/badge/?version=latest 17 | :target: https://deepod.readthedocs.io/en/latest/?badge=latest 18 | :alt: Documentation Status 19 | 20 | .. image:: https://coveralls.io/repos/github/xuhongzuo/DeepOD/badge.svg?branch=main 21 | :target: https://coveralls.io/github/xuhongzuo/DeepOD?branch=main 22 | :alt: coveralls 23 | 24 | .. image:: https://static.pepy.tech/personalized-badge/deepod?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads 25 | :target: https://pepy.tech/project/deepod 26 | :alt: downloads 27 | 28 | 29 | 30 | 31 | ``DeepOD`` is an open-source python library for Deep Learning-based `Outlier Detection `_ 32 | and `Anomaly Detection `_. ``DeepOD`` supports tabular anomaly detection and time-series anomaly detection. 33 | 34 | 35 | DeepOD includes **27** deep outlier detection / anomaly detection algorithms (in unsupervised/weakly-supervised paradigm). 36 | More baseline algorithms will be included later. 37 | 38 | 39 | 40 | **DeepOD is featured for**: 41 | 42 | * **Unified APIs** across various algorithms. 43 | * **SOTA models** includes reconstruction-, representation-learning-, and self-superivsed-based latest deep learning methods. 44 | * **Comprehensive Testbed** that can be used to directly test different models on benchmark datasets (highly recommend for academic research). 45 | * **Versatile** in different data types including tabular and time-series data (DeepOD will support other data types like images, graph, log, trace, etc. in the future, welcome PR :telescope:). 46 | * **Diverse Network Structures** can be plugged into detection models, we now support LSTM, GRU, TCN, Conv, and Transformer for time-series data. (welcome PR as well :sparkles:) 47 | 48 | 49 | If you are interested in our project, we are pleased to have your stars and forks :thumbsup: :beers: . 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | Citation 58 | ~~~~~~~~~~~~~~~~~ 59 | If you use this library in your work, please cite this paper: 60 | 61 | Hongzuo Xu, Guansong Pang, Yijie Wang and Yongjun Wang, "Deep Isolation Forest for Anomaly Detection," in IEEE Transactions on Knowledge and Data Engineering, doi: 10.1109/TKDE.2023.3270293. 62 | 63 | 64 | You can also use the BibTex entry below for citation. 65 | 66 | .. code-block:: bibtex 67 | 68 | @ARTICLE{xu2023deep, 69 | author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun}, 70 | journal={IEEE Transactions on Knowledge and Data Engineering}, 71 | title={Deep Isolation Forest for Anomaly Detection}, 72 | year={2023}, 73 | volume={}, 74 | number={}, 75 | pages={1-14}, 76 | doi={10.1109/TKDE.2023.3270293} 77 | } 78 | 79 | 80 | 81 | 82 | ---- 83 | 84 | 85 | .. toctree:: 86 | :maxdepth: 2 87 | :hidden: 88 | :caption: Getting Started 89 | 90 | start.install 91 | start.examples 92 | start.model_save 93 | 94 | .. toctree:: 95 | :maxdepth: 2 96 | :hidden: 97 | :caption: Documentation 98 | 99 | api_reference 100 | api_cc 101 | 102 | .. toctree:: 103 | :maxdepth: 2 104 | :hidden: 105 | :caption: Additional Information 106 | 107 | additional.contributing 108 | additional.license 109 | additional.star_history -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19 2 | scipy>=1.5.1 3 | scikit_learn>=0.20.0 4 | pandas>=1.0.0 5 | torch>=1.10.0,<1.13.1 6 | tqdm>=4.62.3 7 | ray==2.6.1 8 | pyarrow>=11.0.0 9 | einops 10 | sphinx-rtd-theme==1.3.0 11 | sphinxawesome-theme==4.1.0 12 | sphinxcontrib-bibtex==2.5.0 13 | furo==2023.9.10 -------------------------------------------------------------------------------- /docs/start.examples.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ========= 3 | 4 | 5 | Directly Use Detection Models 6 | ------------------------------ 7 | 8 | 9 | DeepOD can be used in a few lines of code. 10 | This API style is the same with `Sklean `_ and `PyOD `_. 11 | 12 | 13 | **for tabular anomaly detection:** 14 | 15 | .. code-block:: python 16 | 17 | 18 | # unsupervised methods 19 | from deepod.models.tabular import DeepSVDD 20 | clf = DeepSVDD() 21 | clf.fit(X_train, y=None) 22 | scores = clf.decision_function(X_test) 23 | 24 | # weakly-supervised methods 25 | from deepod.models.tabular import DevNet 26 | clf = DevNet() 27 | clf.fit(X_train, y=semi_y) # semi_y uses 1 for known anomalies, and 0 for unlabeled data 28 | scores = clf.decision_function(X_test) 29 | 30 | # evaluation of tabular anomaly detection 31 | from deepod.metrics import tabular_metrics 32 | auc, ap, f1 = tabular_metrics(y_test, scores) 33 | 34 | 35 | **for time series anomaly detection:** 36 | 37 | 38 | .. code-block:: python 39 | 40 | 41 | # time series anomaly detection methods 42 | from deepod.models.time_series import TimesNet 43 | clf = TimesNet() 44 | clf.fit(X_train) 45 | scores = clf.decision_function(X_test) 46 | 47 | # evaluation of time series anomaly detection 48 | from deepod.metrics import ts_metrics 49 | from deepod.metrics import point_adjustment # execute point adjustment for time series ad 50 | eval_metrics = ts_metrics(labels, scores) 51 | adj_eval_metrics = ts_metrics(labels, point_adjustment(labels, scores)) 52 | 53 | 54 | 55 | Testbed 56 | -------- 57 | 58 | 59 | 60 | Testbed contains the whole process of testing an anomaly detection model, including data loading, preprocessing, anomaly detection, and evaluation. 61 | 62 | Please refer to ``testbed/`` 63 | 64 | * ``testbed/testbed_unsupervised_ad.py`` is for testing unsupervised tabular anomaly detection models. 65 | 66 | * ``testbed/testbed_unsupervised_tsad.py`` is for testing unsupervised time-series anomaly detection models. 67 | 68 | 69 | Key arguments: 70 | 71 | * ``--input_dir``: name of the folder that contains datasets (.csv, .npy) 72 | 73 | * ``--dataset``: "FULL" represents testing all the files within the folder, or a list of dataset names using commas to split them (e.g., "10_cover*,20_letter*") 74 | 75 | * ``--model``: anomaly detection model name 76 | 77 | * ``--runs``: how many times running the detection model, finally report an average performance with standard deviation values 78 | 79 | 80 | Example: 81 | 82 | 1. Download `ADBench `_ datasets. 83 | 2. modify the ``dataset_root`` variable as the directory of the dataset. 84 | 3. ``input_dir`` is the sub-folder name of the ``dataset_root``, e.g., ``Classical`` or ``NLP_by_BERT``. 85 | 4. use the following command in the bash 86 | 87 | 88 | .. code-block:: bash 89 | 90 | 91 | cd DeepOD 92 | pip install . 93 | cd testbed 94 | python testbed_unsupervised_ad.py --model DeepIsolationForest --runs 5 --input_dir ADBench 95 | 96 | -------------------------------------------------------------------------------- /docs/start.install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | It is recommended to use **pip** for installation. Please make sure 5 | **the latest version** is installed, as DeepOD is updated frequently: 6 | 7 | .. code-block:: bash 8 | 9 | pip install deepod # normal install 10 | pip install --upgrade deepod # or update if needed 11 | 12 | 13 | Alternatively, you could clone and run setup.py file: 14 | 15 | .. code-block:: bash 16 | 17 | git clone https://github.com/xuhongzuo/deepod.git 18 | cd pyod 19 | pip install . 20 | 21 | 22 | **Required Dependencies**\ : 23 | 24 | 25 | * Python 3.7+ 26 | * numpy>=1.19 27 | * scipy>=1.5.1 28 | * scikit_learn>=0.20.0 29 | * pandas>=1.0.0 30 | * torch>1.10.0,<1.13.1 31 | * ray==2.6.1 32 | * pyarrow>=11.0.0 33 | * einops 34 | 35 | -------------------------------------------------------------------------------- /docs/start.model_save.rst: -------------------------------------------------------------------------------- 1 | Model Save & Load 2 | ================== 3 | 4 | The detection model class has ``save_model`` and ``load_model`` functions. 5 | 6 | We take the `DeepSVDD` model for example. 7 | 8 | .. code-block:: python 9 | 10 | from deepod.models import DeepSVDD 11 | 12 | # training an anomaly detection model 13 | model = DeepSVDD() # or any other models in DeepOD 14 | model.fit(X_train) # training 15 | 16 | path = 'save_file.pkl' 17 | model.save_model(path) # save trained model at the assigned path 18 | 19 | # directly load trained model from path 20 | model = DeepSVDD.load_model(path) 21 | model.decision_function(X_test) 22 | # or 23 | model.predict(X_test) 24 | 25 | 26 | 27 | You can also directly use pickle for saving and loading DeepOD models. 28 | 29 | .. code-block:: python 30 | 31 | import pickle 32 | from deepod.models import DeepSVDD 33 | 34 | model = DeepSVDD() 35 | model.fit(X_train) 36 | 37 | with open('save_file.pkl', 'wb'): 38 | pickle.dump(model) 39 | 40 | with open('save_file.pkl', 'rb') 41 | model = pickle.load(f) 42 | 43 | model.decision_function(X_test) 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/zreferences.bib: -------------------------------------------------------------------------------- 1 | @inproceedings{liu2008isolation, 2 | title={Isolation forest}, 3 | author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua}, 4 | booktitle={Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on}, 5 | pages={413--422}, 6 | year={2008}, 7 | organization={IEEE} 8 | } 9 | 10 | @article{liu2012isolation, 11 | title={Isolation-based anomaly detection}, 12 | author={Liu, Fei Tony and Ting, Kai Ming and Zhou, Zhi-Hua}, 13 | journal={ACM Transactions on Knowledge Discovery from Data (TKDD)}, 14 | volume={6}, 15 | number={1}, 16 | pages={3}, 17 | year={2012}, 18 | publisher={ACM} 19 | } 20 | 21 | 22 | @article{xu2023dif, 23 | author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun}, 24 | journal={IEEE Transactions on Knowledge and Data Engineering}, 25 | title={Deep Isolation Forest for Anomaly Detection}, 26 | year={2023}, 27 | volume={}, 28 | number={}, 29 | pages={1-14}, 30 | doi={10.1109/TKDE.2023.3270293} 31 | } 32 | 33 | 34 | @article{ruff2018deepsvdd, 35 | title={Deep One-Class Classification}, 36 | author={Ruff, Lukas and Vandermeulen, Robert and Görnitz, Nico and Deecke, Lucas and Siddiqui, Shoaib and Binder, Alexander and Müller, Emmanuel and Kloft, Marius}, 37 | journal={International conference on machine learning}, 38 | year={2018} 39 | } 40 | 41 | 42 | @inproceedings{pang2019deep, 43 | title={Deep anomaly detection with deviation networks}, 44 | author={Pang, Guansong and Shen, Chunhua and van den Hengel, Anton}, 45 | booktitle={Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery \& data mining}, 46 | pages={353--362}, 47 | year={2019} 48 | } -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: deepod 2 | dependencies: 3 | - numpy 4 | - pandas 5 | - scikit-learn 6 | - scipy 7 | - pytorch 8 | - tqdm 9 | - ray 10 | - pyarrow 11 | 12 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xuhongzuo/DeepOD/bb8c20c5c7e860a7c5f6dde4b4a9981bd0eb739d/examples/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19 2 | scipy>=1.5.1 3 | scikit_learn>=0.20.0 4 | pandas>=1.0.0 5 | torch>=1.10.0,<1.13.1 6 | tqdm>=4.62.3 7 | ray==2.6.1 8 | pyarrow>=11.0.0 9 | einops 10 | statsmodels 11 | arch -------------------------------------------------------------------------------- /requirements_ci.yml: -------------------------------------------------------------------------------- 1 | pytest==5.2.1 2 | pytest-cov==2.10.0 3 | coveralls==2.1.1 4 | setuptools>=49.6.0 5 | tqdm>=4.62.3 6 | einops 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import os 3 | 4 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.rst'), encoding='utf-8') as f: 5 | readme = f.read() 6 | 7 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'requirements.txt'), encoding='utf-8') as f: 8 | requirements = f.read().splitlines() 9 | 10 | setuptools.setup( 11 | name="deepod", 12 | version="0.4.1", 13 | author="Hongzuo Xu", 14 | author_email="hongzuoxu@126.com", 15 | description="", 16 | long_description=readme, 17 | long_description_content_type="text/x-rst", 18 | license='MIT License', 19 | url="https://github.com/xuhongzuo/DeepOD", 20 | keywords=['outlier detection', 'anomaly detection', 'deep anomaly detection', 21 | 'deep learning', 'data mining'], 22 | packages=setuptools.find_packages(exclude=['test']), 23 | install_requires=requirements, 24 | classifiers=[ 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: 3.10", 29 | "License :: OSI Approved :: BSD License", 30 | ], 31 | ) 32 | -------------------------------------------------------------------------------- /testbed/configs.yaml: -------------------------------------------------------------------------------- 1 | DIF: 2 | rep_dim: 20 3 | hidden_dims: 32 4 | n_ensemble: 50 5 | n_estimators: 6 6 | 7 | DeepSVDDTS: 8 | network: Transformer 9 | rep_dim: 64 10 | hidden_dims: 512 11 | act: GELU 12 | lr: 0.00001 13 | epochs: 20 14 | batch_size: 128 15 | epoch_steps: -1 16 | 17 | TranAD: 18 | lr: 0.001 19 | epochs: 10 20 | batch_size: 128 21 | epoch_steps: -1 22 | 23 | USAD: 24 | hidden_dims: 100 25 | lr: 1e-3 26 | epochs: 10 27 | batch_size: 128 28 | 29 | COUTA: 30 | neg_batch_ratio: 0.2 31 | alpha: 0.1 32 | rep_dim: 16 33 | hidden_dims: 16 34 | lr: 0.0001 35 | epochs: 20 36 | batch_size: 64 37 | 38 | TcnED: 39 | epochs: 100 40 | lr: 0.00015 41 | dropout: 0.42 42 | kernel_size: 2 43 | rep_dim: 8 44 | hidden_dims: '8,8' 45 | batch_size: 512 46 | 47 | AnomalyTransformer: 48 | lr: 0.0001 49 | epochs: 10 50 | batch_size: 32 51 | k: 3 52 | 53 | TimesNet: 54 | lr: 0.0001 55 | batch_size: 128 56 | epochs: 10 57 | pred_len: 0 58 | e_layers: 2 59 | d_model: 64 60 | d_ff: 64 61 | dropout: 0.1 62 | top_k: 5 63 | num_kernels: 6 64 | 65 | 66 | 67 | RoSAS: 68 | epoch_steps: 16 69 | epochs: 200 70 | batch_size: 32 71 | lr: 0.005 72 | n_emb: 128 73 | alpha: 0.5 74 | margin: 1 75 | beta: 1 76 | 77 | DCdetector: 78 | patch_size: [1,3,5] 79 | batch_size: 128 80 | epochs: 10 81 | lr: 0.0001 82 | n_heads: 1 83 | d_model: 256 84 | e_layers: 3 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /testbed/testbed_unsupervised_ad.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | testbed of unsupervised tabular anomaly detection 4 | @Author: Hongzuo Xu 5 | """ 6 | 7 | import os 8 | import sys 9 | 10 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 11 | 12 | import warnings 13 | import argparse 14 | import getpass 15 | import time 16 | import numpy as np 17 | import importlib as imp 18 | from utils import get_data_lst, read_data 19 | from deepod.metrics import tabular_metrics 20 | 21 | 22 | dataset_root = f'/home/{getpass.getuser()}/dataset/1-tabular/' 23 | 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("--runs", type=int, default=5, 26 | help="how many times we repeat the experiments to obtain the average performance") 27 | parser.add_argument("--input_dir", type=str, 28 | default='ADBench-classical', 29 | help="the path of the data sets") 30 | parser.add_argument("--output_dir", type=str, default='@record/', 31 | help="the output file path") 32 | parser.add_argument("--dataset", type=str, default='FULL', 33 | help="FULL represents all the csv file in the folder, " 34 | "or a list of data set names split by comma") 35 | parser.add_argument("--model", type=str, default='DeepSVDD', help="",) 36 | parser.add_argument("--auto_hyper", default=True, action='store_true', help="") 37 | 38 | parser.add_argument("--normalization", type=str, default='min-max', help="",) 39 | parser.add_argument('--silent_header', action='store_true') 40 | parser.add_argument("--flag", type=str, default='') 41 | args = parser.parse_args() 42 | 43 | 44 | os.makedirs(args.output_dir, exist_ok=True) 45 | data_lst = get_data_lst(os.path.join(dataset_root, args.input_dir), args.dataset) 46 | print(os.path.join(dataset_root, args.input_dir)) 47 | print(data_lst) 48 | 49 | module = imp.import_module('deepod.models.tabular') 50 | model_class = getattr(module, args.model) 51 | 52 | cur_time = time.strftime("%m-%d %H.%M.%S", time.localtime()) 53 | result_file = os.path.join(args.output_dir, f'{args.model}.{args.input_dir}.{args.flag}.csv') 54 | 55 | if not args.silent_header: 56 | f = open(result_file, 'a') 57 | print('\n---------------------------------------------------------', file=f) 58 | print(f'model: {args.model}, collection: {args.input_dir}, ' 59 | f'datasets: {args.dataset}, normalization: {args.normalization}, {args.runs}runs, ', file=f) 60 | print('---------------------------------------------------------', file=f) 61 | print('data, auc-roc, std, auc-pr, std, f1, std, time', file=f) 62 | f.close() 63 | 64 | 65 | for file in data_lst: 66 | dataset_name = os.path.splitext(os.path.split(file)[1])[0] 67 | 68 | print(f'\n-------------------------{dataset_name}-----------------------') 69 | 70 | split = '50%-normal' 71 | print(f'train-test split: {split}, normalization: {args.normalization}') 72 | x_train, y_train, x_test, y_test = read_data(file=file, split=split, 73 | normalization=args.normalization, 74 | seed=42) 75 | if x_train is None: 76 | continue 77 | 78 | auc_lst, ap_lst, f1_lst = np.zeros(args.runs), np.zeros(args.runs), np.zeros(args.runs) 79 | t1_lst, t2_lst = [], [] 80 | runs = args.runs 81 | 82 | model_configs = {} 83 | if args.auto_hyper: 84 | clf = model_class(random_state=42) 85 | 86 | # check whether the anomaly detection model supports ray tuning 87 | if not hasattr(clf, 'fit_auto_hyper'): 88 | warnings.warn(f'anomaly detection model {args.model} ' 89 | f'does not support auto tuning hyper-parameters currently.') 90 | break 91 | 92 | print(f'\nRunning [1/{args.runs}] of [{args.model}] on Dataset [{dataset_name}] (rat tune)') 93 | tuned_model_configs = clf.fit_auto_hyper(X=x_train, 94 | X_test=x_test, y_test=y_test, 95 | n_ray_samples=1, time_budget_s=None) 96 | model_configs = tuned_model_configs 97 | print(f'model parameter configure update to: {model_configs}') 98 | scores = clf.decision_function(x_test) 99 | 100 | auc, ap, f1 = tabular_metrics(y_test, scores) 101 | 102 | print(f'{dataset_name}, {auc:.4f}, {ap:.4f}, {f1:.4f}, {args.model}') 103 | 104 | for i in range(runs): 105 | start_time = time.time() 106 | print(f'\nRunning [{i+1}/{args.runs}] of [{args.model}] on Dataset [{dataset_name}]') 107 | 108 | clf = model_class(**model_configs, random_state=42+i) 109 | clf.fit(x_train) 110 | 111 | train_time = time.time() 112 | scores = clf.decision_function(x_test) 113 | done_time = time.time() 114 | 115 | auc, ap, f1 = tabular_metrics(y_test, scores) 116 | auc_lst[i], ap_lst[i], f1_lst[i] = auc, ap, f1 117 | t1_lst.append(train_time - start_time) 118 | t2_lst.append(done_time - start_time) 119 | 120 | print(f'{dataset_name}, {auc_lst[i]:.4f}, {ap_lst[i]:.4f}, {f1_lst[i]:.4f}, ' 121 | f'{t1_lst[i]:.1f}/{t2_lst[i]:.1f}, {args.model}, {str(model_configs)}') 122 | 123 | avg_auc, avg_ap, avg_f1 = np.average(auc_lst), np.average(ap_lst), np.average(f1_lst) 124 | std_auc, std_ap, std_f1 = np.std(auc_lst), np.std(ap_lst), np.std(f1_lst) 125 | avg_time1 = np.average(t1_lst) 126 | avg_time2 = np.average(t2_lst) 127 | 128 | f = open(result_file, 'a') 129 | txt = f'{dataset_name}, ' \ 130 | f'{avg_auc:.4f}, {std_auc:.4f}, ' \ 131 | f'{avg_ap:.4f}, {std_ap:.4f}, ' \ 132 | f'{avg_f1:.4f}, {std_f1:.4f}, ' \ 133 | f'{avg_time1:.1f}/{avg_time2:.1f}, {args.model}, {str(model_configs)}' 134 | print(txt, file=f) 135 | print(txt) 136 | f.close() 137 | --------------------------------------------------------------------------------