├── .DS_Store
├── .cproject
├── .gitignore
├── .gitmodules
├── .project
├── How_to_commit_push_submodules.txt
├── LICENSE
├── README.md
├── _pyisc_modules
├── AnomalyClustering.py
├── AnomalyDetector.py
├── BaseISC.py
├── DataObject.py
├── OutlierClustering.py
├── SklearnClassifier.py
├── SklearnOutlierDetector.py
└── __init__.py
├── bld.bat
├── build.sh
├── conda_build_config.yaml
├── docs
├── pyISC_classification_example.ipynb
├── pyISC_classification_example_2.ipynb
├── pyISC_multivariable_example.ipynb
├── pyISC_simple_anomaly_example.ipynb
├── pyISC_sklearn_anomaly_detection.ipynb
├── pyISC_sklearn_outlier_detection.ipynb
├── pyISC_sklearn_outlier_detection_comparison.ipynb
├── pyISC_sklearn_outlier_detection_realworld_data.ipynb
└── pyISC_tutorial.ipynb
├── meta.yaml
├── pyisc.i
├── setup.py
├── setup2.py
├── src
├── .DS_Store
├── _AnomalyDetector.cc
├── _AnomalyDetector.hh
├── _DataObject.cc
├── _DataObject.hh
├── _Format.cc
├── _Format.hh
├── _JSonExporter.cc
├── _JSonExporter.hh
├── _JSonImporter.cc
├── _JSonImporter.hh
├── mystring.cc
└── mystring.hh
└── unittests
├── __init__.py
├── test_AnomalyDetector.py
├── test_DataObject.py
├── test_JSonExportImport.py
├── test_SklearnOutlierDetection.py
├── test_max_index_problem.py
├── test_p_ConditionalGaussian.py
├── test_p_ConditionalGaussianDependencyMatrix.py
└── test_pickle_export_import.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/.DS_Store
--------------------------------------------------------------------------------
/.cproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.cpp
2 | numpy.i
3 | build
4 | pyisc.py
5 | visisc.py
6 | .ipynb_checkpoints
7 | *~
8 | MANIFEST
9 | .idea
10 | *.pyc
11 |
12 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dataformat"]
2 | path = dataformat
3 | url = https://github.com/sics-dna/dataformat
4 | [submodule "ArduinoJson"]
5 | path = ArduinoJson
6 | url = https://github.com/bblanchon/ArduinoJson.git
7 | [submodule "isc2"]
8 | path = isc2
9 | url = https://github.com/sics-dna/isc2
10 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | pyisc
4 |
5 |
6 | dataformat
7 | isc2
8 |
9 |
10 |
11 | org.python.pydev.PyDevBuilder
12 |
13 |
14 |
15 |
16 | org.eclipse.cdt.managedbuilder.core.genmakebuilder
17 | clean,full,incremental,
18 |
19 |
20 |
21 |
22 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder
23 | full,incremental,
24 |
25 |
26 |
27 |
28 |
29 | org.eclipse.cdt.core.cnature
30 | org.eclipse.cdt.core.ccnature
31 | org.eclipse.cdt.managedbuilder.core.managedBuildNature
32 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature
33 | org.python.pydev.pythonNature
34 |
35 |
36 |
--------------------------------------------------------------------------------
/How_to_commit_push_submodules.txt:
--------------------------------------------------------------------------------
1 | In pyISC folder:
2 |
3 | git commit -a -m "Committing submodule changes from superproject"
4 |
5 | git push --recurse-submodules=on-demand
6 |
7 | See https://stackoverflow.com/questions/14233939/git-submodule-commit-push-pull?noredirect=1&lq=1
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyISC
2 |
3 | The Python API to the ISC anomaly detection and classification framework. The framework implements Baysian statistical methods for anomaly detection and classification. Currently supported statistical models are: Poisson, Gamma and multivariate Gaussian distributions.
4 |
5 | ### Email forum(s)
6 |
7 | Questions regarding the use of the framework: https://groups.google.com/forum/#!forum/pyisc-users
8 |
9 | ## Prerequisite:
10 |
11 | Notice, pyISC/visISC has only been tested using 64 bit Python.
12 |
13 | ### Install Python distribution
14 |
15 | Install Python 2.7
16 |
17 | Anaconda is the recommended Python distribution : https://www.continuum.io/downloads
18 |
19 | Libraries:
20 | - numpy, scipy, scikit-learn (required for running pyisc)
21 | - matplotlib, ipython, jupyter, pandas (only required for running tutorial examples)
22 |
23 | Install with anaconda:
24 |
25 | (If you want to disable ssl verification when installing, you will find the instructions here.)
26 |
27 | `>> conda install numpy pandas scikit-learn ipython jupyter`
28 |
29 |
30 | If you intend to also install visISC, you have to downgrade the numpy installation to version 1.9
31 |
32 | `>> conda install numpy==1.9.3`
33 |
34 | ### Install a c++ compiler if not installed
35 |
36 | Windows:
37 |
38 | `>> conda install mingw libpython==1.0`
39 |
40 | OS X:
41 |
42 | Install the Xcode developer tools from App Store.
43 |
44 | ### Install Swig
45 |
46 | (search for suitable version with `>> anaconda search -t conda swig`)
47 |
48 | Windows:
49 |
50 | `>> conda install --channel https://conda.anaconda.org/salilab swig`
51 |
52 | OS X:
53 |
54 | `>> conda install --channel https://conda.anaconda.org/minrk swig`
55 |
56 |
57 | ## Installation
58 |
59 | For installing from source code, you need a git client
60 |
61 | Then:
62 |
63 | `>> git clone https://github.com/STREAM3/pyisc --recursive`
64 |
65 | `>> cd pyisc`
66 |
67 | `>> python setup.py install`
68 |
69 | ## Run tutorial
70 |
71 | `>> cd docs`
72 |
73 | `>> jupyter notebook pyISC_tutorial.ipynb`
74 |
75 | If not opened automatically, click on `pyISC_tutorial.ipynb` in the web page that was opened in a web browser.
76 |
77 | ### How to Cite
78 |
79 | Emruli, B., Olsson, T., & Holst, A. (2017). pyISC: A Bayesian Anomaly Detection Framework for Python. In Florida Artificial Intelligence Research Society Conference. Retrieved from https://aaai.org/ocs/index.php/FLAIRS/FLAIRS17/paper/view/15527
80 |
81 |
--------------------------------------------------------------------------------
/_pyisc_modules/AnomalyClustering.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------------------------
2 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
3 | #
4 | # Main author: Tomas Olsson
5 | #
6 | # This code is free software: you can redistribute it and/or modify it
7 | # under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # This code is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this code. If not, see .
18 | # ------------------------------------------------------------------------
19 |
20 | from pyisc import AnomalyDetector, SklearnClassifier
21 | from sklearn import utils
22 | import numpy as np
23 |
24 |
25 | class AnomalyClustering(AnomalyDetector):
26 | max_num_of_iterations = 1000
27 |
28 | def __init__(self, n_clusters=2, n_repeat=10, *anomaly_detector_params0, **anomaly_detector_params1):
29 | self.n_clusters = n_clusters
30 | self.n_repeat = n_repeat
31 | self.ad_parms0 = anomaly_detector_params0
32 | self.ad_parms1 = anomaly_detector_params1
33 | self.clf_ = None
34 | AnomalyDetector.__init__(self,*anomaly_detector_params0, **anomaly_detector_params1)
35 |
36 | def _create_detector(self, *ad_parms0, **ad_parms1):
37 | return AnomalyDetector(*ad_parms0, **ad_parms1)
38 |
39 | def _detector_fit(self, X, y):
40 | return AnomalyDetector.fit(self, X, y)
41 |
42 | def fit(self,X,verbose=False):
43 | ss =[]
44 | labels_list = []
45 | for i in range(self.n_repeat):
46 | od = self._create_detector(*self.ad_parms0, **self.ad_parms1)
47 | labels = self._train_clf(od, X, self.n_clusters,verbose=verbose)
48 |
49 | ss += [od.loglikelihood(X,labels)]
50 |
51 | labels_list += [labels]
52 |
53 | #print ss, labels
54 |
55 | self._detector_fit(X, np.array(labels_list[np.argmax(ss)]))
56 |
57 | self.clf_ = SklearnClassifier.clf(self)
58 |
59 | return self
60 |
61 |
62 |
63 | def _train_clf(self, ad, X, k=None, default_labels=None, verbose=False):
64 | '''
65 |
66 | :param ad: anomaly detector that shall be trained
67 | :param X: a DataObject
68 | :param k: the number of clusters
69 | :param default_labels: the clustering is started with the provided clusters/labels, where k is ignored.
70 | :return:
71 | '''
72 | cluster_labels = default_labels
73 |
74 | count_equal_movements = 0
75 | num_of_last_movements = 5 # the last 5 number of moments are stored
76 | last_movements = [-1 for _ in range(num_of_last_movements)]
77 | num_of_iterations = 0
78 |
79 | while True:
80 | if cluster_labels is None: # Restart the clustering
81 | cluster_labels = np.array(utils.shuffle(np.mod(np.array(list(range(len(X)))), k))) if k > 1 else np.array([0 for _ in range(len(X))])
82 | last_movements = [-1 for _ in range(num_of_last_movements)]
83 | num_of_iterations = 0
84 | if verbose:
85 | print("Initialized clusters",np.unique(cluster_labels))
86 |
87 | ad.fit(X, cluster_labels)
88 | if ad.classes_ == []:
89 | ad.fit(X, np.zeros((len(X)),))
90 |
91 | clf = SklearnClassifier.clf(ad)
92 | cluster_labels_new = clf.predict(X)
93 |
94 | movements = sum((cluster_labels_new != cluster_labels) * 1.0)
95 |
96 | if movements in last_movements:
97 | count_equal_movements += 1
98 | else:
99 | count_equal_movements = 0
100 | last_movements = last_movements[1:] + [movements]
101 |
102 | if count_equal_movements >= 20 or num_of_iterations > self.max_num_of_iterations: # Restart the clustering if the number of movements in last_movements are greater or more equal than 20
103 | cluster_labels = None # Restart clustering
104 | continue
105 |
106 | if verbose:
107 | print("movements", movements)
108 |
109 | if movements == 0:
110 | break
111 |
112 | cluster_labels = cluster_labels_new
113 |
114 | num_of_iterations += 1
115 |
116 | return cluster_labels
117 |
118 | def anomaly_score(self, X,y=None):
119 | return AnomalyDetector.anomaly_score(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y)
120 |
121 | def loglikelihood(self,X,y=None):
122 | return AnomalyDetector.loglikelihood(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y)
123 |
--------------------------------------------------------------------------------
/_pyisc_modules/AnomalyDetector.py:
--------------------------------------------------------------------------------
1 | """
2 | The Python Wrapper of all ISC anomaly scoring methods.
3 | """
4 | # --------------------------------------------------------------------------
5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
6 | #
7 | # Main author: Tomas Olsson
8 | #
9 | # This code is free software: you can redistribute it and/or modify it
10 | # under the terms of the GNU Lesser General Public License as published
11 | # by the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This code is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU Lesser General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU Lesser General Public License
20 | # along with this code. If not, see .
21 | # --------------------------------------------------------------------------
22 |
23 | from numpy import ndarray, array
24 |
25 | from pyisc import BaseISC, _JSonExporter
26 |
27 | import pyisc
28 |
29 |
30 | class AnomalyDetector(BaseISC):
31 |
32 | def anomaly_score(self,X, y=None, n_jobs=1):
33 | '''
34 | Score each row in X,y with an anomaly score.
35 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject
36 | :param y: must be an array,list or None, cannot be a column_index as when fitting the data
37 | :return:
38 | '''
39 |
40 | if isinstance(X, pyisc.DataObject):
41 | return self._anomaly_detector._CalcAnomaly(X,X.size())
42 | elif isinstance(X, ndarray) or isinstance(X, list):
43 | data_object = self._convert_to_data_object_in_scoring(array(X), y)
44 |
45 | if data_object is not None:
46 | return self.anomaly_score(data_object)
47 |
48 | raise ValueError("Unknown type of data to score X, y", type(X), type(y))
49 |
50 |
51 | def anomaly_score_details(self,X,y=None,index=None):
52 | '''
53 | Computes the detailed anomaly scores of each element in X, that is, anomaly score for each used statistical component\n
54 | :param X: is a DataObject or numpy array or list\n
55 | :param y: is None or an array of classes, must be consistent with how the data was fitted, cannot be a column_index
56 | :param index: is None or an index into X\n\n
57 | :return: a list with (a list for each element in X if X is two dimensional, otherwise only a single list):\n
58 | [\n
59 | a double value with total deviation, \n
60 | an int value with predicted class (if class_column was set to true in constructor),\n
61 | an int value with predicted cluster (if clustering was set to true in constructor), \n
62 | an array with deviations for each individual component,\n
63 | an array with the peak, that is, the most probable feature value for each feature column,\n
64 | an array with the least acceptable value for each feature column,\n
65 | an array with the largest acceptable value for each feature column\n
66 | ]
67 | '''
68 | if isinstance(X, pyisc._DataObject) and y is None:
69 | if isinstance(index,int):
70 | return self._anomaly_score_intfloat(X._get_intfloat(index),X.length(), X)
71 | else:
72 | return [self.anomaly_score_details(X,index=i) for i in range(X.size())]
73 | elif isinstance(X, ndarray):
74 | data_object = self._convert_to_data_object_in_scoring(X, y)
75 | if data_object is not None:
76 | return self.anomaly_score_details(data_object,index)
77 |
78 | elif isinstance(X, list):
79 | return self.anomaly_score(array(X),y,index)
80 |
81 | raise ValueError("Unknown type of data to score?", type(X) ) if not isinstance(X, pyisc._DataObject) and not isinstance(X, list) and not isinstance(X, ndarray) else ""
82 |
83 |
84 |
85 | def _anomaly_score_intfloat(self, x_intfloat, length, data_object):
86 | deviations = pyisc._double_array(self.num_of_partitions)
87 | min = pyisc._intfloat_array(length)
88 | max = pyisc._intfloat_array(length)
89 | peak = pyisc._intfloat_array(length)
90 | anom = pyisc._double_array(1)
91 | cla = pyisc._int_array(1)
92 | clu = pyisc._int_array(1)
93 |
94 | self._anomaly_detector._CalcAnomalyDetails(x_intfloat,anom, cla, clu, deviations, peak, min, max)
95 |
96 | if self.is_clustering and self.class_column > -1:
97 | result = [pyisc._get_double_value(anom,0),
98 | pyisc._get_int_value(cla,0),
99 | pyisc._get_int_value(clu,0),
100 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)),
101 | list(data_object._convert_to_numpyarray(peak, length)),
102 | list(data_object._convert_to_numpyarray(min, length)),
103 | list(data_object._convert_to_numpyarray(max, length))]
104 | elif self.is_clustering:
105 | result = [pyisc._get_double_value(anom,0),
106 | pyisc._get_int_value(clu,0),
107 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)),
108 | list(data_object._convert_to_numpyarray(peak, length)),
109 | list(data_object._convert_to_numpyarray(min, length)),
110 | list(data_object._convert_to_numpyarray(max, length))]
111 | elif self.class_column > -1:
112 | result = [pyisc._get_double_value(anom,0),
113 | pyisc._get_int_value(cla,0),
114 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)),
115 | list(data_object._convert_to_numpyarray(peak, length)),
116 | list(data_object._convert_to_numpyarray(min, length)),
117 | list(data_object._convert_to_numpyarray(max, length))]
118 | else:
119 | result = [pyisc._get_double_value(anom,0),
120 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)),
121 | list(data_object._convert_to_numpyarray(peak, length)),
122 | list(data_object._convert_to_numpyarray(min, length)),
123 | list(data_object._convert_to_numpyarray(max, length))]
124 |
125 | pyisc._free_array_double(deviations);
126 | pyisc._free_array_intfloat(min)
127 | pyisc._free_array_intfloat(max)
128 | pyisc._free_array_intfloat(peak)
129 | pyisc._free_array_double(anom)
130 | pyisc._free_array_int(cla)
131 | pyisc._free_array_int(clu)
132 |
133 |
134 | return result
135 |
--------------------------------------------------------------------------------
/_pyisc_modules/BaseISC.py:
--------------------------------------------------------------------------------
1 | """
2 | The Python Wrapper of all ISC anomaly detector training methods.
3 | """
4 | # --------------------------------------------------------------------------
5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
6 | #
7 | # Main author: Tomas Olsson
8 | #
9 | # This code is free software: you can redistribute it and/or modify it
10 | # under the terms of the GNU Lesser General Public License as published
11 | # by the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This code is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU Lesser General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU Lesser General Public License
20 | # along with this code. If not, see .
21 | # --------------------------------------------------------------------------
22 | from _pyisc import _to_cpp_array
23 | from abc import abstractmethod
24 | import numpy
25 | from numpy import ndarray, array, c_
26 | from pyisc import _to_cpp_array_int, _AnomalyDetector, \
27 | _IscMultiGaussianMicroModel, \
28 | _IscPoissonMicroModel, \
29 | _IscPoissonMicroModelOneside, \
30 | _IscMicroModelVector, _IscGammaMicroModel, \
31 | _IscMarkovGaussMicroModel, \
32 | _IscMarkovGaussMicroModelVector, \
33 | _IscMarkovGaussCombinerMicroModel, \
34 | _IscMarkovGaussMatrixMicroModel
35 | import pyisc
36 |
37 | __author__ = 'tol'
38 |
39 | cr_min = pyisc.IscMin
40 | cr_max = pyisc.IscMax
41 | cr_plus= pyisc.IscPlus
42 |
43 | class P_ProbabilityModel:
44 | _saved_model = None
45 | @abstractmethod
46 | def __init__(self):
47 | pass
48 |
49 | @abstractmethod
50 | def create_micromodel(self):
51 | pass
52 |
53 | @abstractmethod
54 | def get_column_index(self):
55 | '''
56 | :return: list with used column index
57 | '''
58 | pass
59 |
60 | def __getstate__(self):
61 | odict = self.__dict__.copy() # copy the dict since we change it
62 | del odict['_saved_model'] # remove swig object entry
63 | return odict
64 |
65 |
66 | class P_Gaussian(P_ProbabilityModel):
67 |
68 | def __init__(self, value_column):
69 | '''
70 | A Multivaritae Gaussian distribution using value_columns as column index into the data object
71 | :param value_index: an integer (single variable) or an array of integers (multivariate)
72 | :return:
73 | '''
74 | if isinstance(value_column, list):
75 | self.column_index = value_column
76 | else:
77 | self.column_index = [value_column]
78 |
79 | def get_column_index(self):
80 | return self.column_index
81 |
82 | def create_micromodel(self):
83 | column_array = _to_cpp_array_int(self.column_index)
84 | self._saved_model = _IscMultiGaussianMicroModel(len(self.column_index), column_array)
85 | pyisc._free_array_int(column_array)
86 | return self._saved_model
87 |
88 |
89 | class P_Poisson(P_ProbabilityModel):
90 |
91 | def __init__(self, frequency_column, period_column):
92 | '''
93 | A Poisson distribution using frequency_column as column index into the data object for the frequency and
94 | period_column into the data object for the period where frequency was counted.
95 | This probability model checks for both unusual high frequency values and unusual small values.
96 | :param frequency_column:
97 | :param period_column:
98 | '''
99 | self.column_index = [frequency_column,period_column]
100 |
101 | def get_column_index(self):
102 | return self.column_index
103 |
104 | def create_micromodel(self):
105 | self._saved_model = _IscPoissonMicroModel(self.column_index[0], self.column_index[1])
106 | return self._saved_model
107 |
108 |
109 | class P_PoissonOnesided(P_ProbabilityModel):
110 |
111 | def __init__(self, frequency_column, period_column):
112 | '''
113 | A Poisson distribution using frequency_column as column index into the data object for the frequency and
114 | period_column into the data object for the period where frequency was counted.
115 | This probability model only checks for unusual high frequency values, but not unusual small values.
116 | :param frequency_column:
117 | :param period_column:
118 | :return:
119 | '''
120 | self.column_index = [frequency_column,period_column]
121 |
122 | def get_column_index(self):
123 | return self.column_index
124 |
125 | def create_micromodel(self):
126 | self._saved_model = _IscPoissonMicroModelOneside(self.column_index[0], self.column_index[1])
127 | return self._saved_model
128 |
129 |
130 | class P_Gamma(P_ProbabilityModel):
131 |
132 | def __init__(self, frequency_column, period_column):
133 | '''
134 | An approximation of the Gamma distribution by the use of Poisson distribution that uses the frequency_column as column index into the data object for the frequency and
135 | period_column into the data object for the period where frequency was counted.
136 | :param frequency_column:
137 | :param period_column:
138 | :return:
139 | '''
140 |
141 | self.frequency_column = frequency_column
142 | self.period_column = period_column
143 |
144 | def get_column_index(self):
145 | return [self.frequency_column,self.period_column]
146 |
147 | def create_micromodel(self):
148 | self._saved_model = _IscGammaMicroModel(self.frequency_column,self.period_column)
149 | return self._saved_model
150 |
151 | class P_ConditionalGaussian(P_ProbabilityModel):
152 |
153 | def __init__(self, prediction_column, conditional_column):
154 | '''
155 | Implements a conditional multivariate Gaussian distribution.
156 |
157 | :param prediction_column: an integer or an list of integers
158 | :param condition_column: an integer or an list of integers
159 | '''
160 |
161 | self.prediction_column = prediction_column
162 | self.conditional_column = conditional_column
163 |
164 | def get_column_index(self):
165 | return self.prediction_column if isinstance(self.prediction_column, list) else [self.prediction_column] + \
166 | self.conditional_column if isinstance(self.prediction_column, list) else [self.conditional_column]
167 |
168 | def create_micromodel(self):
169 | pred_index = _to_cpp_array_int(self.prediction_column)
170 | cond_index= _to_cpp_array_int(self.conditional_column)
171 | self._saved_model = _IscMarkovGaussMicroModel(pred_index, len(self.prediction_column),
172 | cond_index, len(self.conditional_column))
173 |
174 | pyisc._free_array_int(pred_index)
175 | pyisc._free_array_int(cond_index)
176 |
177 | return self._saved_model
178 |
179 | class P_ConditionalGaussianCombiner(P_ProbabilityModel):
180 |
181 | def __init__(self, gaussian_components):
182 | '''
183 | Combines the contributions from conditionally independent multivariate conditional Gaussian distributions, so that
184 | a Bayesian belief net or Markov chain can be created. The components must form a directed acyclic graph.
185 |
186 | :param gaussian_components: a single P_ConditionalGauss or a list of P_ConditionalGauss.
187 | '''
188 |
189 | assert isinstance(gaussian_components, P_ConditionalGaussian) or \
190 | isinstance(gaussian_components,list) and \
191 | all([isinstance(comp, P_ConditionalGaussian) for comp in gaussian_components])
192 |
193 | self.gaussian_components = gaussian_components
194 |
195 | def get_column_index(self):
196 | cols = []
197 | for comp in self.gaussian_components:
198 | cols += comp.get_column_index()
199 |
200 | return cols
201 |
202 | def create_micromodel(self):
203 | num_of_components = len(self.gaussian_components)
204 | creator = _IscMarkovGaussMicroModelVector()
205 | for i in range(num_of_components):
206 | creator.push_back(self.gaussian_components[i].create_micromodel())
207 | ptr_creator = pyisc._to_pointer(creator)
208 | self._saved_model = _IscMarkovGaussCombinerMicroModel(ptr_creator, num_of_components)
209 | pyisc._free_pointer(ptr_creator)
210 | return self._saved_model
211 |
212 | class P_ConditionalGaussianDependencyMatrix(P_ProbabilityModel):
213 |
214 | def __init__(self, value_columns, elements_per_row):
215 | '''
216 | Creates a dependency matrix where each element is only dependent on its right neighbour and the element directly
217 | below in all cases where they are present. Otherwise the elements are only dependent on the element of the two
218 | neighbours that is present, or no element.
219 |
220 | :param value_columns: the column indexes that are contained in the matrix as a sequence of the elements
221 | from left to the right and from the first row to the last row.
222 | :param elements_per_row: the number of column indexes (elements) that constitutes a row in the matrix,
223 | all rows are equally long.
224 | '''
225 |
226 | self.value_columns = value_columns
227 | self.slots_per_row = elements_per_row
228 |
229 | def get_column_index(self):
230 | return self.value_columns
231 |
232 | def create_micromodel(self):
233 | value_array = _to_cpp_array_int(self.value_columns)
234 | self._saved_model = _IscMarkovGaussMatrixMicroModel(value_array, len(self.value_columns), self.slots_per_row)
235 | pyisc._free_array_int(value_array)
236 | return self._saved_model
237 |
238 | class BaseISC(object):
239 | component_models = None
240 |
241 | def __init__(self, component_models=P_Gaussian(0), output_combination_rule=cr_max, anomaly_threshold = 0.0):
242 | '''
243 | The base class for all pyISC classes for statistical inference
244 |
245 | :param component_models: a statistical model reused for all mixture components, or an list of statistical models.
246 | Available statistical models are: P_Gaussian, P_Poisson, P_PoissonOneside.
247 | :param output_combination_rule: an input defining which type of rule to use for combining the anomaly score
248 | output from each model in component_model. Available combination rules are: cr_max and cr_plus.
249 | :param anomaly_threshold: the threshold at which a row in the input is considered a anomaly during training,
250 | might differ from what is used for anomaly decision.
251 | :return:
252 | '''
253 |
254 | feature_column_start=0
255 |
256 |
257 | assert isinstance(anomaly_threshold, float) and anomaly_threshold >= 0
258 | assert isinstance(feature_column_start, int) and feature_column_start >= 0
259 | assert isinstance(component_models, P_ProbabilityModel) or \
260 | isinstance(component_models, list) and len(component_models) > 0 and \
261 | all([isinstance(m, P_ProbabilityModel) for m in component_models])
262 | assert output_combination_rule in [cr_min, cr_max, cr_plus]
263 |
264 |
265 |
266 | self.anomaly_threshold = anomaly_threshold
267 | self.is_clustering = False #clustering not used in the python wrapper since it does not seem to work in the C++ code.
268 | self.output_combination_rule = output_combination_rule
269 |
270 | #//AnomalyDetector(int n, int off, int splt, double th, int cl); // Sublasses must know the numbers and types of micromodels
271 |
272 | #/**
273 | #* n is number of isc mixture components
274 | # * off is the first column containing features used by the detector
275 | # * splt is a the column containing a known class
276 | # * th is a threshold on when to consider a vector of data as anomalous
277 | # * cl is a variable if zero indicate no clustering else indicates that clustering should be done
278 | # * cr is variable indicating how the anomaly scores for the different isc mixture components should be combined
279 | # * cf is a function that creates a isc micro component for each of the n isc mixture component.
280 |
281 | off = feature_column_start
282 |
283 | # no split class
284 | self.class_column = None
285 | splt = -1
286 |
287 | th = anomaly_threshold
288 | cl = 1 if self.is_clustering else 0
289 |
290 | if isinstance(component_models, P_ProbabilityModel):
291 | n = 1
292 | component_models = [component_models]
293 | else:
294 | n = len(component_models)
295 |
296 | self.component_models = component_models
297 |
298 | self._max_index = numpy.vstack([numpy.max(_.get_column_index()) for _ in component_models]).flatten().max()
299 |
300 |
301 | self.classes_ = None
302 | self.num_of_partitions = n
303 |
304 | self._create_inner_anomaly_detector(cl, n, off, output_combination_rule, splt, th)
305 |
306 | def _create_inner_anomaly_detector(self, cl, n, off, output_combination_rule, splt, th):
307 | # Map argument to C++ argument
308 | comp_distributions = _IscMicroModelVector()
309 | for i in range(n):
310 | comp_distributions.push_back(self.component_models[i].create_micromodel())
311 | self._anomaly_detector = _AnomalyDetector(off, splt, th, cl, output_combination_rule, comp_distributions);
312 |
313 | def fit(self, X, y=None):
314 | '''
315 | Train the anomaly detector using a DataObject or an array of arrays
316 |
317 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject
318 | :param y: must be an array,list, a column index (integer) or None
319 | :return:
320 | '''
321 |
322 | return self._fit(X,y)
323 |
324 | def _fit(self,X,y=None):
325 |
326 |
327 | if isinstance(X, pyisc.DataObject) and y is None:
328 | assert self._max_index < X.length() # ensure that data distribution has not to large index into the data
329 |
330 | assert y is None # Contained in the data object
331 | self.class_column = X.class_column
332 | if self.class_column >= 0:
333 | self.classes_ = X.classes_
334 |
335 | self._anomaly_detector._SetParams(
336 | 0,
337 | -1 if X.class_column is None else X.class_column,
338 | self.anomaly_threshold,
339 | 1 if self.is_clustering else 0
340 | )
341 | self._anomaly_detector._TrainData(X)
342 | return self
343 | if isinstance(X, ndarray):
344 |
345 | class_column = -1
346 | data_object = None
347 | assert X.ndim <= 2
348 | if X.ndim == 2:
349 | max_class_column = X.shape[1]
350 | else:
351 | max_class_column = 1
352 | if isinstance(y, list) or isinstance(y, ndarray):
353 | assert len(X) == len(y)
354 | class_column = max_class_column
355 | data_object = pyisc.DataObject(numpy.c_[X, y], class_column=class_column)
356 | elif y is None or int(y) == y and y > -1 and y <= max_class_column:
357 | self.class_column = y
358 | data_object = pyisc.DataObject(X,class_column=y)
359 |
360 | if data_object is not None:
361 | assert self._max_index < data_object.length() # ensure that data distribution has not to large index into the data
362 |
363 | return self._fit(data_object)
364 |
365 | raise ValueError("Unknown type of data to fit X, y:", type(X), type(y))
366 |
367 | def fit_incrementally(self, X, y=None):
368 | '''
369 | Incrementally train the anomaly detector. Call reset() to restart learning. Requires being trained using the fit
370 | method before first call.
371 |
372 | :param format: a Format describing the types of the data per single array
373 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject
374 | :param y: a single array with classes or None, optional, only required if previously trained with classes
375 | :return: self
376 | '''
377 |
378 |
379 | if isinstance(X, pyisc.DataObject) and y is None and X.class_column == self.class_column:
380 |
381 | assert self._max_index < X.length() # enusre that data distribution has not to large index into the data
382 |
383 | self._anomaly_detector._TrainDataIncrementally(X)
384 | return self
385 | elif isinstance(X, ndarray) or isinstance(X, list):
386 |
387 | assert self._max_index < len(X[0]) # enusre that data distribution has not to large index into the data
388 |
389 | data_object = self._convert_to_data_object_in_scoring(array(X), y)
390 |
391 | if data_object is not None:
392 | return self.fit_incrementally(data_object)
393 |
394 | raise ValueError("Unknown type of data to fit X, y", type(X), type(y))
395 |
396 | def unfit_incrementally(self, X, y=None):
397 | if isinstance(X, pyisc.DataObject) and y is None and X.class_column == self.class_column:
398 | self._anomaly_detector._UntrainDataIncrementally(X)
399 | return self
400 | elif isinstance(X, ndarray) or isinstance(X, list):
401 | data_object = self._convert_to_data_object_in_scoring(array(X), y)
402 |
403 | if data_object is not None:
404 | return self.unfit_incrementally(data_object)
405 |
406 | raise ValueError("Unknown type of data to fit X, y", type(X), type(y))
407 |
408 | def _convert_to_data_object_in_scoring(self, X, y):
409 | data_object = None
410 | if isinstance(y, list) or isinstance(y, ndarray):
411 | assert X.ndim == 2 and self.class_column == X.shape[1] or X.ndim == 1 and self.class_column == 1
412 | data_object = pyisc.DataObject(numpy.c_[X, y], class_column=self.class_column,classes=self.classes_)
413 | else:
414 | assert self.class_column == y
415 | data_object = pyisc.DataObject(X, class_column=self.class_column,classes=self.classes_ if y is not None else None)
416 | return data_object
417 |
418 | def reset(self):
419 | self._anomaly_detector._Reset();
420 |
421 |
422 | def compute_logp(self, X1):
423 | if self.class_column is not None and not isinstance(X1, pyisc._DataObject):
424 | if X1.ndim == 2 and self.class_column >= 0 and self.class_column < X1.shape[1]:
425 | data_object = self. \
426 | _convert_to_data_object_in_scoring(
427 | X1,
428 | y=self.class_column
429 | )
430 | else:
431 | data_object = self. \
432 | _convert_to_data_object_in_scoring(
433 | X1,
434 | y=array([None] * len(X1))
435 | )
436 | logps = []
437 | clss = list(self.classes_)
438 | for clazz in self.classes_:
439 | pyisc._DataObject.set_column_values(data_object, self.class_column, [clss.index(clazz)] * len(data_object))
440 |
441 | logps += [self._anomaly_detector._LogProbabilityOfData(data_object, len(X1))]
442 |
443 | return logps
444 | else:
445 | data_object = pyisc.DataObject(X1) if not isinstance(X1, pyisc._DataObject) else X1
446 | return self._anomaly_detector._LogProbabilityOfData(data_object, len(X1))
447 |
448 |
449 | def loglikelihood(self,X,y=None):
450 | assert isinstance(X, ndarray) and (self.class_column is None and y is None or len(y) == len(X))
451 |
452 | if y is not None:
453 | return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(c_[X,y], class_column=len(X[0])), len(X)).sum()
454 | else:
455 | return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(X), len(X)).sum()
456 |
457 |
458 | def exportJSon(self):
459 | '''
460 | Export the learned model to JSon.
461 | :return: string with JSon
462 | '''
463 | #TODO: add export/import of constructor arguments
464 |
465 | exporter = pyisc._JSonExporter()
466 | self._anomaly_detector.exportModel(exporter)
467 | return exporter.getJSonString()
468 |
469 | def importJSon(self, json):
470 | '''
471 | Parses and imports a learned model from JSon.
472 |
473 | Observe that the constructor arguments of the anomaly detector must be known and defined before importing.
474 | That is, the component_models, output_combination_rule, anomaly_threshold, etc. are not exported/imported due to
475 | limitation of the underlying C++ implementation, but should be provided to the importing detector at construction.
476 |
477 | :param json: string
478 | :return: True if successful, False otherwise
479 | '''
480 | importer = pyisc._JSonImporter()
481 | success = importer.parseJSon(json)
482 | if success:
483 | self._anomaly_detector.importModel(importer)
484 | return success
485 |
486 |
487 | # The getstate setstate let us handle the pickleing of the swig object using json instead
488 | def __getstate__(self):
489 | odict = self.__dict__.copy() # copy the dict since we change it
490 | del odict['_anomaly_detector'] # remove swig object entry
491 | odict['_anomaly_detector_json'] = self.exportJSon()
492 | return odict
493 |
494 | def __setstate__(self, dict):
495 | _anomaly_detector_json = dict['_anomaly_detector_json']
496 | del dict['_anomaly_detector_json']
497 | self.__dict__.update(dict) # update attributes
498 | self._create_inner_anomaly_detector(False, self.num_of_partitions, 0, self.output_combination_rule, -1, self.anomaly_threshold)
499 | if not self.importJSon(_anomaly_detector_json):
500 | raise Exception("Import of JSON did not work properly")
501 |
--------------------------------------------------------------------------------
/_pyisc_modules/DataObject.py:
--------------------------------------------------------------------------------
1 | """
2 | The Python Wrapper of all ISC DataObject methods.
3 | """
4 | # --------------------------------------------------------------------------
5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
6 | #
7 | # Main author: Tomas Olsson
8 | #
9 | # This code is free software: you can redistribute it and/or modify it
10 | # under the terms of the GNU Lesser General Public License as published
11 | # by the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # This code is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU Lesser General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU Lesser General Public License
20 | # along with this code. If not, see .
21 | # --------------------------------------------------------------------------
22 |
23 |
24 | from numpy import ndarray, array
25 | from numpy.ma.extras import unique
26 |
27 | import pyisc
28 | from pyisc import Format
29 |
30 | __author__ = 'tol'
31 |
32 | class DataObject(pyisc._DataObject):
33 |
34 | '''
35 | The classes_ used to generate indexes into the class_column
36 | '''
37 | classes_ = None
38 | '''
39 | The column index that contains the class parameter.
40 | '''
41 | class_column = None
42 |
43 | def __init__(self, X, format=None, class_column=None, classes='auto'):
44 | '''
45 | The DataObject class represents the data analysed using a AnomalyDetector.
46 |
47 | X can be an Format instance or an numpy array. In the previous case, we assume
48 | it is used to describe the content that is added to the object using add2Darray
49 | or add1Darray methods. In the other case, we automatically generate a format instance,
50 | unless the format argument is provided. If the class_column is specified, we use it
51 | to generate a column in the auto-generated format where the elements are index into
52 | the classes_ list. If the classes_ list is set to 'auto', the elements in X of the
53 | class_column are used to auto-create a classes_ list.
54 |
55 | :param X: a Format instance or a numpy array
56 | :param format: None or a pyisc Format instance
57 | :param class_column: None or an integer
58 | :param classes: 'auto' or a list of elements in X[class_column]
59 | :return:
60 | '''
61 | self.class_column = class_column
62 | if isinstance(X, pyisc.Format):
63 | self._format = X
64 | pyisc._DataObject.__init__(self,X)
65 | return
66 | elif isinstance(X, ndarray):
67 | if format is None:
68 | format = Format()
69 | num_cols = len(X.T)
70 | if class_column is not None:
71 | assert class_column >= 0 and class_column < num_cols
72 | for col in range(num_cols):
73 | if col != class_column:
74 | format.addColumn("Column %i"%col, Format.Continuous)
75 | else:
76 | format.addColumn("Column %i"%col, Format.Symbol)
77 | A = X.T.copy()
78 | if classes == 'auto':
79 | self.classes_ = list(sorted(unique(A[class_column])))
80 | else:
81 | self.classes_ = classes
82 | class_col = format.get_nth_column(class_column)
83 | for c in self.classes_:
84 | class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c))
85 | A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]]
86 | X = A.T
87 | self._format = format
88 | if X.ndim == 1: # This fixes a problem of converting it to c++ data object
89 | X = array([X.copy()]).T
90 |
91 | pyisc._DataObject.__init__(self,format,X.astype(float))
92 | return
93 | elif isinstance(format, pyisc.Format):
94 | self._format = format
95 | pyisc._DataObject.__init__(self,format,X)
96 | return
97 | pyisc._DataObject.__init__(self,X)
98 |
99 | def as_1d_array(self):
100 | array1D = self._as1DArray(self.size()*self.length()).astype(object)
101 |
102 | #print self.class_column, self.classes_, unique(array1D[range(self.class_column,len(array1D),self.length())])
103 | if self.class_column is not None:
104 | array1D[list(range(self.class_column,len(array1D),self.length()))] = \
105 | [self.classes_[int(c)] if int(c) != -1 else None for c in array1D[list(range(self.class_column,len(array1D),self.length()))] ]
106 |
107 | return array1D
108 |
109 | def as_2d_array(self):
110 | array1D = self.as_1d_array()
111 | return array1D.reshape((self.size(),self.length()))
112 |
113 | def set_column_values(self, column_index, values):
114 | '''
115 | Sets all values in a column, if the column is the class column, then the values must be one of the ones provieded in the constructor.
116 | :param column_index:
117 | :param values:
118 | :return:
119 | '''
120 | if column_index == self.class_column:
121 | values = [self.classes_.index(c) for c in values]
122 | pyisc._DataObject.set_column_values(self, column_index, array(values).astype(float))
123 |
124 |
125 | def __getitem__(self,index):
126 | if index <= -1:
127 | index = self.size()+index
128 | if index < self.size():
129 | return self._getRow(index, self.length())
130 | else:
131 | return None
132 |
133 | def __len__(self):
134 | return self.size()
135 |
--------------------------------------------------------------------------------
/_pyisc_modules/OutlierClustering.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------------------------
2 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
3 | #
4 | # Main author: Tomas Olsson
5 | #
6 | # This code is free software: you can redistribute it and/or modify it
7 | # under the terms of the GNU Lesser General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # This code is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public License
17 | # along with this code. If not, see .
18 | # ------------------------------------------------------------------------
19 |
20 | from pyisc import SklearnOutlierDetector
21 | from .AnomalyClustering import AnomalyClustering
22 |
23 | class OutlierClustering(AnomalyClustering,SklearnOutlierDetector):
24 | max_num_of_iterations = 1000
25 |
26 | def __init__(self, n_clusters=2, n_repeat=10, *anomaly_detector_params0, **anomaly_detector_params1):
27 | self.n_clusters = n_clusters
28 | self.n_repeat = n_repeat
29 | self.ad_parms0 = anomaly_detector_params0
30 | self.ad_parms1 = anomaly_detector_params1
31 | self.clf_ = None
32 | SklearnOutlierDetector.__init__(self,*anomaly_detector_params0, **anomaly_detector_params1)
33 |
34 | def _create_detector(self, *ad_parms0, **ad_parms1):
35 | return SklearnOutlierDetector(*ad_parms0, **ad_parms1)
36 |
37 | def _detector_fit(self, X, y):
38 | return SklearnOutlierDetector.fit(self, X, y)
39 |
40 | def fit(self,X,verbose=False):
41 | return AnomalyClustering.fit(self,X,verbose=verbose)
42 |
43 | def predict(self, X):
44 | return SklearnOutlierDetector.predict(self, X, self.clf_.predict(X))
45 |
46 | def anomaly_score(self, X, y=None):
47 | return SklearnOutlierDetector.anomaly_score(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y)
48 |
49 | def loglikelihood(self,X,y=None):
50 | return AnomalyClustering.loglikelihood(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y)
51 |
--------------------------------------------------------------------------------
/_pyisc_modules/SklearnClassifier.py:
--------------------------------------------------------------------------------
1 | """
2 | The Python Wrapper of all ISC classification methods that is compatible with scikit-learn
3 | classifiers (http://scikit-learn.org)
4 | """
5 | # --------------------------------------------------------------------------
6 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
7 | #
8 | # Main author: Tomas Olsson
9 | #
10 | # This code is free software: you can redistribute it and/or modify it
11 | # under the terms of the GNU Lesser General Public License as published
12 | # by the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # This code is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU Lesser General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU Lesser General Public License
21 | # along with this code. If not, see .
22 | # --------------------------------------------------------------------------
23 | from _pyisc import _AnomalyDetector__ClassifyData
24 |
25 | from numpy import array, ndarray
26 | from numpy.ma.core import exp
27 | from scipy.misc import logsumexp
28 |
29 | from sklearn.base import ClassifierMixin, BaseEstimator
30 | from pyisc import P_Gaussian, BaseISC, cr_max
31 | import pyisc
32 |
33 |
34 | class SklearnClassifier(BaseISC, BaseEstimator, ClassifierMixin):
35 | classification_threshold = None
36 |
37 | def __init__(self, component_models=P_Gaussian(0),
38 | classification_threshold=1e12,
39 | output_combination_rule=cr_max,
40 | training_anomaly_threshold = 0.0):
41 |
42 | '''
43 |
44 | :param classification_threshold: (optional) a threshold for specifying that instances with anomaly scores below
45 | the threshold should be classified. If not specified, the anomaly threshold is set to very large.
46 | :return:
47 | '''
48 | self.classification_threshold = classification_threshold
49 | super(SklearnClassifier, self).__init__(component_models,output_combination_rule,training_anomaly_threshold)
50 |
51 | @staticmethod
52 | def clf(anomaly_detector,classification_threshold=1e12):
53 | '''
54 | Converts a pyisc AnomalyDetector into a SklearnClassifier
55 | :param self:
56 | :param anomaly_detector:
57 | :param classification_threshold:
58 | :return:
59 | '''
60 | assert isinstance(anomaly_detector, pyisc.AnomalyDetector)
61 | classifier = SklearnClassifier()
62 | classifier._anomaly_detector = anomaly_detector._anomaly_detector
63 | classifier.class_column = anomaly_detector.class_column
64 | classifier.anomaly_threshold = anomaly_detector.anomaly_threshold
65 | classifier.classes_ = anomaly_detector.classes_
66 | classifier.is_clustering = anomaly_detector.is_clustering
67 | classifier.num_of_partitions = anomaly_detector.num_of_partitions
68 | classifier.classification_threshold = classification_threshold
69 |
70 | return classifier
71 |
72 | def predict(self, X):
73 | '''
74 | This method classifies each instance in X with a class, if the anomaly detector was trained with classes.
75 |
76 | :param X: a numpy array or a pyisc DataObject
77 | :return: an array with a classification for each instance in X, an anomalous instance below given classification threshold is classified as None.
78 | '''
79 |
80 | probs = self.predict_log_proba(X)
81 | return array(self.classes_)[probs.argmax(1)]
82 |
83 | # assert self.class_column > -1
84 | #
85 | # DO = None
86 | # if isinstance(X, pyisc.DataObject):
87 | # assert X.class_column == self.class_column
88 | # DO = X
89 | # elif isinstance(X, ndarray):
90 | # if self.class_column == len(X[0]):
91 | # DO = self._convert_to_data_object_in_scoring(X, [None]*len(X))
92 | # else:
93 | # X1 = X.copy()
94 | # X1.T[self.class_column] = None
95 | # DO = self._convert_to_data_object_in_scoring(X1, self.class_column)
96 | #
97 | # class_ids, _ = self._anomaly_detector._ClassifyData(DO, len(X), len(X))
98 | #
99 | # return array(self.classes_)[class_ids]#[probs.argmax(1)]
100 |
101 |
102 |
103 | def predict_log_proba(self,X):
104 | assert self.class_column > -1
105 |
106 | X1 = None
107 | if isinstance(X, pyisc.DataObject):
108 | assert X.class_column == self.class_column
109 | X1 = X.as_2d_array()
110 | elif isinstance(X, ndarray):
111 | X1 = X.copy()
112 |
113 |
114 | if X1 is not None:
115 |
116 | logps = self.compute_logp(X1)
117 |
118 | LogPs = [x-logsumexp(x) for x in array(logps).T] #normalized
119 |
120 | return array(LogPs)
121 | else:
122 | raise ValueError("Unknown type of data to score:", type(X))
123 |
124 |
125 | def predict_proba(self,X):
126 | Ps = exp(self.predict_log_proba(X))
127 |
128 | return array([p/s for p,s in zip(Ps,Ps.sum(1))])
129 |
--------------------------------------------------------------------------------
/_pyisc_modules/SklearnOutlierDetector.py:
--------------------------------------------------------------------------------
1 | """
2 | The Python Wrapper of all ISC classification methods that is compatible with scikit-learn
3 | classifiers (http://scikit-learn.org)
4 | """
5 | # --------------------------------------------------------------------------
6 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB
7 | #
8 | # Main author: Tomas Olsson
9 | #
10 | # This code is free software: you can redistribute it and/or modify it
11 | # under the terms of the GNU Lesser General Public License as published
12 | # by the Free Software Foundation, either version 3 of the License, or
13 | # (at your option) any later version.
14 | #
15 | # This code is distributed in the hope that it will be useful,
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | # GNU Lesser General Public License for more details.
19 | #
20 | # You should have received a copy of the GNU Lesser General Public License
21 | # along with this code. If not, see .
22 | # --------------------------------------------------------------------------
23 |
24 | import pyisc
25 | from numpy import percentile, abs, c_, array
26 |
27 | from pyisc import DataObject
28 |
29 |
30 | class SklearnOutlierDetector(pyisc.AnomalyDetector):
31 | threshold_ = None
32 |
33 | def __init__(self,contamination=0.01, *anomaly_detector_params0, **anomaly_detector_params1):
34 | '''
35 | This class can be used for classifying anomalies when the contamination fraction is known.
36 | It is implemented to be used together with the methods listed at
37 | http://scikit-learn.org/stable/auto_examples/covariance/plot_outlier_detection.html
38 |
39 | :param contamination: fraction of outliers in the data set
40 | :param anomaly_detector_params0: the same parameters as in the pyisc.AnomalyDetector
41 | :param anomaly_detector_params1: the same parameters as in the pyisc.AnomalyDetector
42 | '''
43 | self.contamination = contamination
44 | super(pyisc.AnomalyDetector,self).__init__(*anomaly_detector_params0, **anomaly_detector_params1)
45 |
46 | def fit(self, X, y=None):
47 | old_threshold = None
48 | threshold = None
49 | self.threshold_ = 0.0
50 |
51 | self._fit(X,y)
52 |
53 | count = 0
54 | while count < 100 and (old_threshold is None or abs(threshold - old_threshold) > 0.01):
55 | old_threshold = threshold
56 | ss = self.decision_function(X,y)
57 | threshold = percentile(ss, 100 * self.contamination)
58 |
59 | self._fit(X[ss > threshold],y[ss > threshold] if y is not None else None)
60 |
61 | count += 1
62 |
63 | self.threshold_ = threshold
64 |
65 | return self
66 |
67 | def decision_function(self,X,y=None):
68 | '''
69 | Returns a measure of anomaly (the log probability of the data) from smallest (most anomalous) to high (least anomalous).
70 | :param X: an numpy array
71 | :param y: an numpy array or None
72 | :return: numpy array
73 | '''
74 |
75 | ss = (1.0/(self.anomaly_score(X,y)+1e-10) - self.threshold_)
76 |
77 | return ss
78 |
79 | def predict(self, X,y=None):
80 | '''
81 | Returns an numpy array with 1 if a row is not anomlaous and -1 if anomalous
82 | :param X: an numpy array
83 | :param y: an numpy array or None (default)
84 | :param decision_threshold: float value for deciding whether a point is anomalous
85 | :return: numpy array
86 | '''
87 | return 2 * (self.decision_function(X,y) > 0) - 1
--------------------------------------------------------------------------------
/_pyisc_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/_pyisc_modules/__init__.py
--------------------------------------------------------------------------------
/bld.bat:
--------------------------------------------------------------------------------
1 | "%PYTHON%" setup.py install --single-version-externally-managed --record=record.txt
2 | if errorlevel 1 exit 1
3 |
--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | $PYTHON setup.py install --single-version-externally-managed --record=record.txt # Python command to install the script.
2 |
--------------------------------------------------------------------------------
/conda_build_config.yaml:
--------------------------------------------------------------------------------
1 | python:
2 | - 2.7
3 | - 3.6
--------------------------------------------------------------------------------
/docs/pyISC_multivariable_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# pyISC Example: MultivariableAnomaly Detection\n",
8 | "In this example, we extend the simple example with one Poisson distributed variable to the multivariate case with three variables, two Poisson distributed variables and one Gaussian distributed variable."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import pyisc;\n",
18 | "import numpy as np\n",
19 | "from scipy.stats import poisson, norm\n",
20 | "%matplotlib inline\n",
21 | "from pylab import plot"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "metadata": {},
27 | "source": [
28 | "## Create Data\n",
29 | "Create a data set with 3 columns from different probablity distributions:"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "po_normal = poisson(10)\n",
39 | "po_anomaly = poisson(25)\n",
40 | "\n",
41 | "po_normal2 = poisson(2)\n",
42 | "po_anomaly2 = poisson(3)\n",
43 | "\n",
44 | "gs_normal = norm(1, 12)\n",
45 | "gs_anomaly = norm(2,30)\n",
46 | "\n",
47 | "normal_len = 10000\n",
48 | "anomaly_len = 15\n",
49 | "\n",
50 | "data = np.column_stack(\n",
51 | " [\n",
52 | " [1] * (normal_len+anomaly_len),\n",
53 | " list(po_normal.rvs(normal_len))+list(po_anomaly.rvs(anomaly_len)),\n",
54 | " list(po_normal2.rvs(normal_len))+list(po_anomaly2.rvs(anomaly_len)),\n",
55 | " list(gs_normal.rvs(normal_len))+list(gs_anomaly.rvs(anomaly_len)),\n",
56 | " ]\n",
57 | ")"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "## Used Anomaly Detector\n",
65 | "Create an anomaly detector using as first argument the used statistical models. The we use \n",
66 | "- a onesided Poisson distribution for modelling the first fequency column (column 1) (as in the first example),\n",
67 | "- a twosided Poisson distribution for the second frequency column (column 2),\n",
68 | "- and a Gaussin (Normal) distribution for the last column (column 3).\n",
69 | "\n",
70 | "Given that we now have more than one variable, it is necessary to also add a method to combine the output from the statistical models, which in this case is the maximum anomaly score of each component model:"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 3,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "anomaly_detector = pyisc.AnomalyDetector(\n",
80 | " component_models=[\n",
81 | " pyisc.P_PoissonOnesided(1,0), # columns 1 and 0\n",
82 | " pyisc.P_Poisson(2,0), # columns 2 and 0\n",
83 | " pyisc.P_Gaussian(3) # column 3\n",
84 | " ],\n",
85 | " output_combination_rule=pyisc.cr_max\n",
86 | ")"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "Train the anomaly detector:"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 4,
99 | "metadata": {},
100 | "outputs": [],
101 | "source": [
102 | "anomaly_detector.fit(data);"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 5,
108 | "metadata": {},
109 | "outputs": [],
110 | "source": []
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "Compute the anomaly scores for each data point:"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {},
123 | "outputs": [],
124 | "source": [
125 | "scores = anomaly_detector.anomaly_score(data)"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "## Anomaly Scores\n",
133 | "Now we can print some example of normal frequencies vs. anomaly scores for the 15 first normal data points: "
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {},
140 | "outputs": [],
141 | "source": [
142 | "from pandas import DataFrame\n",
143 | "df= DataFrame(data[:15], columns=['#Days', 'Freq1','Freq2','Measure'])\n",
144 | "df['Anomaly Score'] = scores[:15]\n",
145 | "print df.to_string()"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "The anomalous frequencies vs. anomaly scores for the 15 anomalous data points:"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "metadata": {
159 | "scrolled": true
160 | },
161 | "outputs": [],
162 | "source": [
163 | "df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])\n",
164 | "df['Anomaly Score'] = scores[-15:]\n",
165 | "print df.to_string()"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "As can be seen above, the anomalous data also have higher anomaly scores than the normal frequencies as it should be.
\n",
173 | "This becomes even more visible if we plot the anomaly scores (y-axis) against each data point (x-axis):"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": null,
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "plot(scores, '.');"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "We can also look at the details of each column in terms of their individual anomaly scores:"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": [
198 | "score_details = anomaly_detector.anomaly_score_details(data)"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": null,
204 | "metadata": {},
205 | "outputs": [],
206 | "source": [
207 | "df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])\n",
208 | "df['Anomaly:Freq1'] = [detail[1][0] for detail in score_details[-15:]] # Anomaly Score of Freq1\n",
209 | "df['Anomaly:Freq2'] = [detail[1][1] for detail in score_details[-15:]] # Anomaly Score of Freq2\n",
210 | "df['Anomaly:Measure'] = [detail[1][2] for detail in score_details[-15:]] # Anomaly Score of Measure\n",
211 | "df['Anomaly Score'] = [detail[0] for detail in score_details[-15:]] # Combined Anomaly Score\n",
212 | "df"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "Above, the last column corresponds to the same anomaly score as before, where we can se that it corresponds to the maximum of the individual anomaly score to the left, thus, it is the result of the combination rule specified to the anomaly detector."
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": null,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": []
228 | }
229 | ],
230 | "metadata": {
231 | "kernelspec": {
232 | "display_name": "Python 2",
233 | "language": "python",
234 | "name": "python2"
235 | },
236 | "language_info": {
237 | "codemirror_mode": {
238 | "name": "ipython",
239 | "version": 2
240 | },
241 | "file_extension": ".py",
242 | "mimetype": "text/x-python",
243 | "name": "python",
244 | "nbconvert_exporter": "python",
245 | "pygments_lexer": "ipython2",
246 | "version": "2.7.14"
247 | },
248 | "widgets": {
249 | "state": {},
250 | "version": "1.1.1"
251 | }
252 | },
253 | "nbformat": 4,
254 | "nbformat_minor": 1
255 | }
256 |
--------------------------------------------------------------------------------
/docs/pyISC_simple_anomaly_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# pyISC Example: Simple Anomaly Detection with Frequency Data\n",
8 | "This is a simple example on how to use the pyISC anomaly detector for computing the anomaly scores of Poisson distributed data."
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 9,
14 | "metadata": {
15 | "collapsed": false
16 | },
17 | "outputs": [],
18 | "source": [
19 | "import pyisc;\n",
20 | "import numpy as np\n",
21 | "from scipy.stats import poisson\n",
22 | "%matplotlib inline\n",
23 | "from pylab import hist, plot, figure"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "## Data Creation\n",
31 | "Create two arrays with normal and anomalous frequency data respectively."
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 10,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": [
42 | "po_normal = poisson(10)\n",
43 | "po_anomaly = poisson(25)\n",
44 | "\n",
45 | "freq_normal = po_normal.rvs(10000)\n",
46 | "freq_anomaly = po_anomaly.rvs(15)"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {},
52 | "source": [
53 | "Create an 2D array with two columns that combines random frequency and time period equal to 1."
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 11,
59 | "metadata": {
60 | "collapsed": false
61 | },
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "array([[ 12., 1.],\n",
67 | " [ 7., 1.],\n",
68 | " [ 16., 1.],\n",
69 | " [ 9., 1.],\n",
70 | " [ 17., 1.]])"
71 | ]
72 | },
73 | "execution_count": 11,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "data = np.column_stack([\n",
80 | " list(freq_normal)+list(freq_anomaly), \n",
81 | " [1.0]*(len(freq_normal)+len(freq_anomaly))\n",
82 | " ])\n",
83 | "data[:5]"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {},
89 | "source": [
90 | "If we plot the histogram from the frequency data, we can see that the distribution tail is thicker at the right side because of the anomalous data points:"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 12,
96 | "metadata": {
97 | "collapsed": false,
98 | "scrolled": true
99 | },
100 | "outputs": [
101 | {
102 | "data": {
103 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEACAYAAABVtcpZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE8NJREFUeJzt3X+s3fV93/Hni3g0SUsZSYWvYjfYFEFN1DRlm5cpTD1r\nVH50kkFbRZ12DYQomgptom2KZrM/fP/ZGlfakkgTqbpSYqpkzMnU4UQUCIKjKduCGQmFxA5YqmyM\nW992E2OLGk328t4f52tzcHx97z3n+p5z7uf5kI74ns/5nu/3fT/43tf3+/n+SlUhSWrTJZMuQJI0\nOYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDlgyBJA8kWUjywnk++2dJfpDkHUNtu5McSXI4yU1D7Tck\neSHJy0k+s3o/giRpVMvZE3gQuPncxiSbgV8Ejg21bQPuALYBtwL3J0n38eeAj1bVtcC1SX5omZKk\ntbVkCFTV14HXzvPRp4FPntN2G/BwVZ2uqqPAEWB7kjngsqp6tpvvIeD2kauWJK2KkY4JJNkBHK+q\nF8/5aBNwfOj9ia5tE/DqUPurXZskaYI2rPQLSd4G3MdgKEiSNMNWHALATwFbgD/pxvs3A99Msp3B\nlv+7h+bd3LWdAH7yPO3nlcQbGknSCKoqS8/1huUOB6V7UVXfrqq5qrq6qrYyGNr5uar6C+AA8CtJ\nLk2yFbgGOFhVJ4HXk2zvguPDwCNL/CAz+9qzZ8/Ea2ixduuf/Mv6J/saxXJOEf0i8F8ZnNHzSpKP\nnPv3mjcC4hCwHzgEPArcU29Udi/wAPAycKSqHhupYknSqllyOKiqfnWJz68+5/1vA799nvmeA35m\npQVKki4erxi+CHq93qRLGNks1w7WP2nWP3sy6jjSxZSkprEuSZpmSaiLdGBYkrQOGQKS1DBDQJIa\nZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQaMDe3\nhSQkYW5uy6TLkTRFfJ5AAwaPdT7Tnxn5WaSSppvPE5AkrYghIEkNMwQkqWGGgCQ1zBCQpIYtGQJJ\nHkiykOSFobbfSXI4yfNJ/mOSHx/6bHeSI93nNw2135DkhSQvJ/nM6v8okqSVWs6ewIPAzee0PQG8\np6reBxwBdgMkuR64A9gG3Arcn8H5iQCfAz5aVdcC1yY5d5mSpDW2ZAhU1deB185pe7KqftC9/Qaw\nuZveATxcVaer6iiDgNieZA64rKqe7eZ7CLh9FeqXJI1hNY4J3A082k1vAo4PfXaia9sEvDrU/mrX\nJkmaoA3jfDnJvwBOVdW/X6V6zpqfnz873ev16PV6q70KSZpp/X6ffr8/1jKWdduIJFcBX6mq9w61\n3QV8DPiFqvq/XdsuoKpqb/f+MWAPcAx4uqq2de07gZ+vqt9YZH3eNmIVLfe2EXNzW1hYOAbAxo1X\ncfLk0TWpT9LquJi3jUj3OrOiW4BPAjvOBEDnALAzyaVJtgLXAAer6iTwepLt3YHiDwOPrKRQXXyD\nACigzoaBpPVtyeGgJF8EesA7k7zCYMv+PuBS4GvdyT/fqKp7qupQkv3AIeAUcM/QJv29wOeBtwKP\nVtVjq/yzSJJWyLuINmC5w0HebVSabd5FVJK0IoaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapgh\nIEkNMwRm2NzcFpKcfc3NbZl0SZJmjFcMz7A3X+ELi13l6xXDUhu8YlhrYngPxL0Paba5JzDDJrUn\n4B6DNJ3cE5AkrYghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlh\nS4ZAkgeSLCR5YajtiiRPJHkpyeNJLh/6bHeSI0kOJ7lpqP2GJC8keTnJZ1b/R5EkrdRy9gQeBG4+\np20X8GRVXQc8BewGSHI9cAewDbgVuD+Du40BfA74aFVdC1yb5NxlSpLW2JIhUFVfB147p/k2YF83\nvQ+4vZveATxcVaer6ihwBNieZA64rKqe7eZ7aOg7kqQJGfWYwJVVtQBQVSeBK7v2TcDxoflOdG2b\ngFeH2l/t2iRJE7RhlZaz6jeUn5+fPzvd6/Xo9XqrvQpJmmn9fp9+vz/WMpb1UJkkVwFfqar3du8P\nA72qWuiGep6uqm1JdgFVVXu7+R4D9gDHzszTte8Efr6qfmOR9flQmWXwoTKShl3Mh8qke51xALir\nm74TeGSofWeSS5NsBa4BDnZDRq8n2d4dKP7w0HckSROy5HBQki8CPeCdSV5hsGX/KeBLSe5msJV/\nB0BVHUqyHzgEnALuGdqkvxf4PPBW4NGqemx1fxRJ0kr5jOEZ5nCQpGE+Y1iStCKGgCQ1zBCQpIYZ\nApLUMENAF8Xc3BaSnH3NzW2ZdEmSzsOzg2bYNJ8dtNzaJK0ezw6SJK2IISBJDTMEptTwmLrj6ZIu\nFo8JTKnVHHf3mIDUBo8JSJJWxBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQ\nkKSGGQKS1DBDQJIaZghIUsPGCoEk/yTJt5O8kOQLSS5NckWSJ5K8lOTxJJcPzb87yZEkh5PcNH75\nkqRxjBwCSd4F/BZwQ1W9F9gAfAjYBTxZVdcBTwG7u/mvB+4AtgG3AvdncL9hSdKEjDsc9BbgR5Ns\nAN4GnABuA/Z1n+8Dbu+mdwAPV9XpqjoKHAG2j7l+SdIYRg6Bqvoz4F8DrzD44/96VT0JbKyqhW6e\nk8CV3Vc2AceHFnGia5MkTciGUb+Y5K8z2Oq/Cngd+FKSX+PNj5PiPO+XZX5+/ux0r9ej1+uNVKck\nrVf9fp9+vz/WMkZ+vGSSXwZurqqPde9/HXg/8AtAr6oWkswBT1fVtiS7gKqqvd38jwF7quqZ8yzb\nx0v6eElJK7TWj5d8BXh/krd2B3g/CBwCDgB3dfPcCTzSTR8AdnZnEG0FrgEOjrF+SdKYRh4OqqqD\nSb4MfAs41f3394DLgP1J7gaOMTgjiKo6lGQ/g6A4BdzT/Oa+JE3YyMNBF5PDQQ4HSVq5tR4OkiTN\nOENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQ0ETNzW0hCUmYm9sy6XKk5nix2JRq5WKx5a5T\n0tK8WEyStCKGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGGQJr\nzBumSZom3kBujU3zTdqmuTZJS/MGcpKkFRkrBJJcnuRLSQ4n+U6Sv53kiiRPJHkpyeNJLh+af3eS\nI938N41fviRpHOPuCXwWeLSqtgE/C3wX2AU8WVXXAU8BuwGSXA/cAWwDbgXuz2AsQJI0ISOHQJIf\nB/5uVT0IUFWnq+p14DZgXzfbPuD2bnoH8HA331HgCLB91PVLksY3zp7AVuB/JHkwyTeT/F6StwMb\nq2oBoKpOAld2828Cjg99/0TXJkmakA1jfvcG4N6q+u9JPs1gKOjc0ztGOt1jfn7+7HSv16PX641W\npSStU/1+n36/P9YyRj5FNMlG4L9V1dXd+xsZhMBPAb2qWkgyBzxdVduS7AKqqvZ28z8G7KmqZ86z\nbE8R9RRRSSu0pqeIdkM+x5Nc2zV9EPgOcAC4q2u7E3ikmz4A7ExyaZKtwDXAwVHXL0ka3zjDQQAf\nB76Q5K8Bfwp8BHgLsD/J3cAxBmcEUVWHkuwHDgGngHvW7ea+JM0IrxheY9M85DLNtUlamlcMS5JW\nxBDQTPDGe9LF4XDQGpvmIZf1UJvUMoeDJEkrYghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkC\nktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJ\nDRs7BJJckuSbSQ50769I8kSSl5I8nuTyoXl3JzmS5HCSm8ZdtyRpPKuxJ/AJ4NDQ+13Ak1V1HfAU\nsBsgyfXAHcA24Fbg/iRZhfVLkkY0Vggk2Qz8EvD7Q823Afu66X3A7d30DuDhqjpdVUeBI8D2cdYv\nSRrPuHsCnwY+CdRQ28aqWgCoqpPAlV37JuD40HwnujZJ0oRsGPWLSf4+sFBVzyfpXWDWusBni5qf\nnz873ev16PUutApJak+/36ff74+1jFSN9DeaJP8K+EfAaeBtwGXAHwF/E+hV1UKSOeDpqtqWZBdQ\nVbW3+/5jwJ6qeuY8y65R65p2g8MgZ362sNjPuZz53jzPcucbb53TXpvUsiRU1YqOtY48HFRV91XV\nu6vqamAn8FRV/TrwFeCubrY7gUe66QPAziSXJtkKXAMcHHX9kqTxjTwcdAGfAvYnuRs4xuCMIKrq\nUJL9DM4kOgXcs2439yVpRow8HHQxORw03UMu01yb1LI1HQ7Sm83NbSEJSZib2zLpciRpWdwTWCUt\nbW1Pc21Sy9wTUPPcI5NWxj2BVdLS1vas1yatV+4JSJJWxBCQpIYZApLUMENAkhpmCEhSwwwBSWqY\nISBJDTMEJKlhhoAkNcwQkKSGGQKS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEFBzhh9B\n6WMo1TofL7lKfITj+qtNmjVr+njJJJuTPJXkO0leTPLxrv2KJE8keSnJ40kuH/rO7iRHkhxOctOo\n65YkrY5xhoNOA/+0qt4D/B3g3iQ/DewCnqyq64CngN0ASa4H7gC2AbcC92ewSSZJmpCRQ6CqTlbV\n893094DDwGbgNmBfN9s+4PZuegfwcFWdrqqjwBFg+6jrlySNb1UODCfZArwP+AawsaoWYBAUwJXd\nbJuA40NfO9G1SZImZMO4C0jyY8CXgU9U1feSnHuEbaQjbvPz82ene70evV5v1BIlaV3q9/v0+/2x\nljHW2UFJNgBfBf64qj7btR0GelW1kGQOeLqqtiXZBVRV7e3mewzYU1XPnGe5nh3UyBk401ybNGvW\n9Oygzh8Ah84EQOcAcFc3fSfwyFD7ziSXJtkKXAMcHHP9kqQxjDwclOQDwK8BLyb5FoNNq/uAvcD+\nJHcDxxicEURVHUqyHzgEnALumbnNfUlaZ7xYbJVM87CGtY1WmzRrJjEcJEmaYYaAJDXMEFiCNxuT\ntJ55TGAJ62Fs29pGq02aNR4TkCStiCEgSQ0zBKRFDB8P8liQ1qux7x0krVcLC8c4c+xgYcG7nmt9\nck9AkhpmCEhSwwwBSWqYISBJDWs6BDz7Q1Lrmj47yLM/JLWu6T0BSWqdISCNyWFFzbKmh4Ok1eCw\nomaZewKS1DBDQJIaZghIUsMMAUlqmCEgSQ1b8xBIckuS7yZ5Ock/X+v1S2vF51NrFqxpCCS5BPi3\nwM3Ae4APJfnp1V6P522Poz/pAtaNN04dHbwG73/Y8L/Xd7xjbi1LXHX9fn/SJYxl1usfxVrvCWwH\njlTVsao6BTwM3LbaKxn+5VvsF0+L6U+6gOYM/3t97bWFSZczlln/Izrr9Y9irS8W2wQcH3r/KoNg\nWJGq4qtf/Srf//73AXjXu97FjTfeuDoVSlJDZvKK4eeee44dO3a8qe3UqVNs2DCTP460LHNzW87u\n2W7ceBUnTx5dlWWt9vJ+93c/P9ayVttq9tt6lKpau5Ul7wfmq+qW7v0uoKpq7znzrV1RkrSOVNWK\n7l2y1iHwFuAl4IPAnwMHgQ9V1eE1K0KSdNaajp9U1f9L8pvAEwwOSj9gAEjS5KzpnoAkabpM1RXD\ns34hWZKjSf4kybeSHJx0PUtJ8kCShSQvDLVdkeSJJC8leTzJ5ZOs8UIWqX9PkleTfLN73TLJGheT\nZHOSp5J8J8mLST7etc9E/5+n/t/q2mel/38kyTPd7+qLSfZ07bPS/4vVv+L+n5o9ge5CspcZHC/4\nM+BZYGdVfXeiha1Akj8F/kZVvTbpWpYjyY3A94CHquq9Xdte4H9W1e90QXxFVe2aZJ2LWaT+PcD/\nqap/M9HilpBkDpirqueT/BjwHINrZj7CDPT/Ber/FWag/wGSvL2q/qo7VvlfgI8D/5AZ6H9YtP5b\nWWH/T9OewJpcSHaRhenq0wuqqq8D5wbWbcC+bnofcPuaFrUCi9QPg/8PU62qTlbV893094DDwGZm\npP8XqX9T9/HU9z9AVf1VN/kjDI6PFjPS/7Bo/bDC/p+mP1jnu5Bs0yLzTqsCvpbk2SQfm3QxI7qy\nqhZg8IsOXDnhekbxm0meT/L707o7PyzJFuB9wDeAjbPW/0P1P9M1zUT/J7kkybeAk8DXqupZZqj/\nF6kfVtj/0xQC68EHquoG4JeAe7vhilk3HeOFy3c/cHVVvY/BL8dUD0t0QylfBj7RbVGf299T3f/n\nqX9m+r+qflBVP8dgD2x7kvcwQ/1/nvqvZ4T+n6YQOAG8e+j95q5tZlTVn3f//UvgjxjhlhhTYCHJ\nRjg77vsXE65nRarqL+uNA13/Dvhbk6znQpJsYPAH9A+r6pGueWb6/3z1z1L/n1FV/5vBTbNuYYb6\n/4zh+kfp/2kKgWeBa5JcleRSYCdwYMI1LVuSt3dbRST5UeAm4NuTrWpZwpvHEA8Ad3XTdwKPnPuF\nKfOm+rtf3DP+AdP9/+APgENV9dmhtlnq/x+qf1b6P8lPnBkqSfI24BcZHNeYif5fpP7vjtL/U3N2\nEAxOEQU+yxsXkn1qwiUtW5KtDLb+i8FBmi9Me/1Jvgj0gHcCC8Ae4D8BXwJ+EjgG3FFV/2tSNV7I\nIvX/PQbj0z8AjgL/+MwY7zRJ8gHgPwMv8sb9pu9jcBX9fqa8/y9Q/68yG/3/MwwO/F7Svf5DVf3L\nJO9gNvp/sfofYoX9P1UhIElaW9M0HCRJWmOGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJ\nDfv/d11AEyefjAAAAAAASUVORK5CYII=\n",
104 | "text/plain": [
105 | ""
106 | ]
107 | },
108 | "metadata": {},
109 | "output_type": "display_data"
110 | }
111 | ],
112 | "source": [
113 | "hist(data.T[0],100);"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "## Used Anomaly Detector\n",
121 | "Create an anomaly detector using as first argument the P_PoissonOneside statistical model, that is, we use a Poisson distribution for modelling data, but we only care of anomalous large frequencies. The second argument is an array containg the column index used as input to the statsitical model where 0 is the column index of frequencies and 1 is the column index of the period in the data object:"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 13,
127 | "metadata": {
128 | "collapsed": false
129 | },
130 | "outputs": [],
131 | "source": [
132 | "anomaly_detector = pyisc.AnomalyDetector(\n",
133 | " pyisc.P_PoissonOnesided(frequency_column=0,period_column=1)\n",
134 | ")"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "metadata": {},
140 | "source": [
141 | "Train the anomaly detector:"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 14,
147 | "metadata": {
148 | "collapsed": false
149 | },
150 | "outputs": [
151 | {
152 | "name": "stdout",
153 | "output_type": "stream",
154 | "text": [
155 | "1000 loops, best of 3: 1.54 ms per loop\n"
156 | ]
157 | }
158 | ],
159 | "source": [
160 | "%timeit anomaly_detector.fit(data);"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "Compute the anomaly scores for each data point:"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 17,
173 | "metadata": {
174 | "collapsed": false
175 | },
176 | "outputs": [],
177 | "source": [
178 | "scores = anomaly_detector.anomaly_score(data)"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "## Anomaly Scores\n",
186 | "Now we can print some example of normal frequencies vs. anomaly scores:"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 18,
192 | "metadata": {
193 | "collapsed": false
194 | },
195 | "outputs": [
196 | {
197 | "name": "stdout",
198 | "output_type": "stream",
199 | "text": [
200 | "(12, 1.1745941638946533)\n",
201 | "(7, 0.0)\n",
202 | "(16, 2.9855577945709229)\n",
203 | "(9, 0.0)\n",
204 | "(17, 3.570124626159668)\n",
205 | "(11, 0.28747570514678955)\n",
206 | "(10, 0.0)\n",
207 | "(9, 0.0)\n",
208 | "(15, 2.4521820545196533)\n",
209 | "(11, 0.28747570514678955)\n",
210 | "(18, 4.2041616439819336)\n",
211 | "(11, 0.28747570514678955)\n",
212 | "(8, 0.0)\n",
213 | "(9, 0.0)\n",
214 | "(13, 1.5453963279724121)\n"
215 | ]
216 | }
217 | ],
218 | "source": [
219 | "for s in zip(freq_normal[:15], scores[:15]):\n",
220 | " print s"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {},
226 | "source": [
227 | "The anomalous frequencies vs. anomaly scores:"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 19,
233 | "metadata": {
234 | "collapsed": false,
235 | "scrolled": true
236 | },
237 | "outputs": [
238 | {
239 | "name": "stdout",
240 | "output_type": "stream",
241 | "text": [
242 | "(32, 17.405155181884766)\n",
243 | "(25, 9.8875513076782227)\n",
244 | "(23, 8.0570564270019531)\n",
245 | "(25, 9.8875513076782227)\n",
246 | "(16, 2.9855577945709229)\n",
247 | "(34, 19.836088180541992)\n",
248 | "(17, 3.570124626159668)\n",
249 | "(23, 8.0570564270019531)\n",
250 | "(25, 9.8875513076782227)\n",
251 | "(27, 11.866734504699707)\n",
252 | "(29, 13.985079765319824)\n",
253 | "(35, 21.094324111938477)\n",
254 | "(28, 12.909066200256348)\n",
255 | "(29, 13.985079765319824)\n",
256 | "(29, 13.985079765319824)\n"
257 | ]
258 | }
259 | ],
260 | "source": [
261 | "for s in zip(freq_anomaly, scores[-15:]):\n",
262 | " print s"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "As can be seen above, the anomalous frequences also have higher anomaly scores than the normal frequencies as it should be.
This becomes even more visible if we plot the frequency (x-axis) against anomaly scores (y-axis): "
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 20,
275 | "metadata": {
276 | "collapsed": false
277 | },
278 | "outputs": [
279 | {
280 | "data": {
281 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEB9JREFUeJzt3W+MHPV9x/HPh9hNSnCQS2usxBRDKtf1GeQGHFWitjag\nxi6qBEolkphICQ8iHpSA+sguT+4ipUkhEhJSxYMEYzmVrSZBavij/DGNu0qIDBiME/vO5qpQu6XG\nF0ShXSuKVJdvH8wcPh+3e7uzuzczv32/pNXMzu2fr2Z9H//uO/PbcUQIAFB/l5RdAABgMAh0AEgE\ngQ4AiSDQASARBDoAJIJAB4BELBrottfYPmh70vYx21/Kt4/bfs32kfy2ffjlAgDa8WLnodteLWl1\nRBy1fZmklyTdJunTkloR8dDwywQALGbZYg+IiLOSzubr52yfkPSR/MceYm0AgB701EO3vVbSJknP\n55vusX3U9qO2Lx9wbQCAHnQd6Hm75XFJ90XEOUmPSLo2IjYpG8HTegGAEi3aQ5ck28skPS3pBxHx\n8AI/v1rSUxFx/QI/48tiAKCAiOiprd3tCP0xSVNzwzw/WDrrU5KOdyiqtrfx8fHSaxjV+utcO/WX\nf6t7/UUselDU9k2S7pR0zPbLkkLS/ZJ22N4k6R1JpyTdXagCAMBAdHOWy88kvW+BH/1w8OUAAIpi\npugiGo1G2SX0pc7117l2ifrLVvf6i+jqoGhfb2DHsN8DAFJjWzGkg6IAgIoj0AEgEQQ6ACSCQAeA\nRBDoAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgEQQ6ACQCAIdABJBoANAIgh0AEgE\ngQ4AiSDQASARBDoAJIJAB4BEEOgAkAgCHQASQaADQCIIdABIBIEOAIkg0AEgEQQ6AFRMq1XseQQ6\nAFRIqyVt2VLsuQQ6AFTI8ePS5GSx5xLoAFAhGzdKY2PFnuuIGGw189/AjmG/BwCkpNWSPvQhKyLc\ny/MIdACoILv3QKflAgCJINABIBGLBrrtNbYP2p60fcz2vfn2lbYP2H7F9o9sXz78cgEA7SzaQ7e9\nWtLqiDhq+zJJL0m6TdJdkt6MiAdt75S0MiJ2LfB8eugA0KOh9NAj4mxEHM3Xz0k6IWmNslDfmz9s\nr6TbeysXADBIPfXQba+VtEnSc5KujIgZKQt9SasGXRwAoHvLun1g3m55XNJ9EXHO9vw+Stu+ysTE\nxLvrjUZDjUajtyoBIAGtVjYTdONGacWKi3/WbDbVbDb7ev2uzkO3vUzS05J+EBEP59tOSGpExEze\nZ/+XiPijBZ5LDx3AyJv9jpbJyWwm6E9/+t5Qn2uY56E/JmlqNsxzT0r6Qr7+eUlP9PLGADBKZr+j\n5fx5aWqq+Pe1dNLNWS43SfqJpGPK2ioh6X5JL0j6jqSrJJ2WdEdEvL3A8xmhAxh5syP0qSlpw4bh\njNCZ+g8AS6TVutBy6RTmEoEOAMngu1wAYIQR6ACQCAIdABJBoANAIgh0AEgEgQ4AiSDQASARBDoA\nJIJAB4BEEOgAkAgCHQASQaADQCIIdABIBIEOAIkg0AEgEQQ6AHTQakmHDmXLqiPQAaCN2cvGbd2a\nLase6gQ6ALSxFBd2HiQCHQDa2Lgxu/7n8uXZhZ3HxsquqDOuKQoAHfRyYedB4iLRAJAILhINACOM\nQAeARBDoAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgEQQ6ACQCAIdABJBoANAIhYN\ndNu7bc/Y/sWcbeO2X7N9JL9tH26ZAIDFdDNC3yNp2wLbH4qIj+W3Hw64LgBAjxYN9Ih4VtJbC/yo\np+/pBQAMVz899HtsH7X9qO3LB1YRAKCQooH+iKRrI2KTpLOSHhpcSQCAIpYVeVJEvDHn7jclPdXp\n8RMTE++uNxoNNRqNIm8LAMlqNptqNpt9vUZX1xS1vVbSUxFxXX5/dUSczdf/WtLmiNjR5rlcUxQA\nelTkmqKLjtBt75fUkHSF7X+XNC7pE7Y3SXpH0ilJd/dcLQBgoLoaoff1BozQAaBnRUbozBQFgEQQ\n6ACQCAIdABJBoAOojelpaefObIn34qAogFqYnpbWr5ciJFs6eVJat67sqoaHg6IAkrV7dxbmUrbc\ns6fceqqIETqAWmCEvjhG6ABqYd26LMR37Uo/zItihA4AFcQIHQBGGIEOAIkg0AEgEQQ6ACSCQAdQ\nCa2WdOhQtkQxBDqA0rVa0pYt0tat2ZJQL4ZAB1C648elyUnp/HlpaipbR+8IdACl27hRGhuTli+X\nNmzI1tE7JhYBqIRWKxuZj41JK1aUXU35ikwsItABoIKYKQoAI4xAB4BEEOgAkAgCHcBQMWFo6RDo\nAIaGCUNLi0AHMDRMGFpaBDqAoWHC0NLiPHQAQ8WEoWKYWAQAiWBiEQCMMAIdABJBoANAIgh0AIUx\naahaCHQAhTBpqHoIdACFMGmoegh0AIUwaah6OA8dQGFMGhqeoUwssr1b0l9ImomI6/NtKyV9W9LV\nkk5JuiMi/rvN8wl0AOjRsCYW7ZG0bd62XZL+OSL+UNJBSX/Ty5sCAAZv0UCPiGclvTVv822S9ubr\neyXdPuC6AAA9KnpQdFVEzEhSRJyVtGpwJQEAihjUWS40yYGETE9LO3dmS9THsoLPm7F9ZUTM2F4t\n6VedHjwxMfHueqPRUKPRKPi2AIZtelpav16KkL7+denkSWndurKrSl+z2VSz2ezrNbo6bdH2WklP\nRcR1+f0HJP1XRDxge6eklRGxq81zOcsFqJGdO6UHH7xwf9cu6WtfK6+eUTWs0xb3S2pIukLSjKRx\nSd+T9F1JV0k6rey0xbfbPJ9AB2pk7gjdZoReFi5wAWAgpqelPXuku+4izMtCoANAIrhiEQCMMAId\nABJBoANAIgh0AEgEgQ4AiSDQASARBDoAJIJAB4BEEOgAkAgCHQASQaADiThzRvrGN7IlRlPR70MH\nUCFnzkgf/aj0m99IH/iA9MtfSh/+cNlVYakxQgcS8PTTWZhL2fL73y+3HpSDb1sEEsAIPT18fS4w\nws6cyUbmt95KmKeAQAeARPB96AAwwgh0AEgEgQ5UXKslHTqULYFOCHSgwlotacsWaevWbEmooxMC\nHaiw48elyUnp/HlpaipbB9oh0IEK27hRGhuTli+XNmzI1oF2OG0RqLhWKxuZj41JK1aUXQ2WCueh\nA0AiOA8dAEYYgQ6UaP9+ae3abAn0i6/PBUqyf790553Z+uxyx47y6kH90UMHSrJ2rXT69IX711wj\nvfpqaeWgYuihAzXy1a9efP8rXymnDqSDQAdKsmOHtG9fNjLft492C/pHywUAKoiWCwCMMAIdGIJb\nbpHsbAksFQIdGLBbbpEOHszWDx4k1LF06KEDA+YFup78CqBXS95Dt33K9s9tv2z7hX5eC0jFzTd3\nvg8MS18jdNuvSrohIt7q8BhG6Bg5s22Xm2+WfvzjsqtBHS35ty3a/jdJN0bEmx0eQ6ADQI/KOG0x\nJD1j+7DtL/b5WkAtbN6c9ck3by67EuBi/X45100R8brt31MW7Cci4tn5D5qYmHh3vdFoqNFo9Pm2\nQDk2b5ZefDFbf/HF7P7hw+XWhDQ0m001m82+XmNgZ7nYHpfUioiH5m2n5YJkcAYLlsqStlxsX2r7\nsnz9g5I+Kel40dcD6uDGGzvfB8rUTw/9SknP2n5Z0nOSnoqIA4MpC6imw4cvhPiNN9JuQbUwsQgA\nKqhIy4UrFgG5uf1xxiCoI77LBdB7D3YudPATqDoCHQASQaADQCIIdEDv7ZnTQ0cdcVAUyBHiqDsC\nHSOBM1gwCmi5IHmcwYJRQaADQCIIdABIBIGO5HEGC0YFB0VRa90e7CTEMQoYoaO2ONgJXIxAB4BE\nEOgAkAgCHbXFwU7gYhwURSVxsBPoHSN0VA4HO4FiCHQASASBDgCJINCx5OwLt4VwsBMohoOiWFIL\n9ccXCmxCHOgdI3QASASBDgCJINAxMIv1xiX648Aw0UPHQHTbG5cIcWBYGKEDQCIYoaMrXGQZqD5G\n6FhUN1Px6Y0D5WOEjoEhxIFyMUIHgEQQ6FgU7RSgHmi5oCuEOFB9jNABIBEEOgAkoq9At73d9knb\n07Z3DqooLJ1upusDqIfCgW77Ekl/L2mbpDFJn7W9flCFVUWz2Sy7hL50qr/ql3pLed/XAfXXTz8j\n9I9L+teIOB0R/yvpHyXdNpiyqqPu/yjqXH+da5eov2x1r7+IfgL9I5L+Y8791/JtPTtyRPrc57Jl\nO9u2ZSPIbds6v1Y3LYRu2wy29OUvD+61BlnXoOoHkI7SD4oeOSLdcIO0b1+2XCjUt22TDhzI1g8c\naB/q3bQQum0z1Pm1un0c55cDaXEU/C22/SeSJiJie35/l6SIiAfmPY6YAIACIqKnv6/7CfT3SXpF\n0i2SXpf0gqTPRsSJQi8IAOhL4ZmiEfF/tu+RdEBZ62Y3YQ4A5Sk8QgcAVMvQDorWfdKR7VO2f277\nZdsvlF3PYmzvtj1j+xdztq20fcD2K7Z/ZPvyMmvspE3947Zfs30kv20vs8ZObK+xfdD2pO1jtu/N\nt1f+M1ig9i/l22ux/22/3/bz+e/qMdvj+fbK73upY/097/+hjNDzSUfTyvrrZyQdlvSZiDg58Dcb\nEtuvSrohIt4qu5Zu2P5TSeckfSsirs+3PSDpzYh4MP9PdWVE7Cqzznba1D8uqRURD5VaXBdsr5a0\nOiKO2r5M0kvK5mXcpYp/Bh1q/7Tqs/8vjYhf58f2fibpXkl/qYrv+1lt6v9z9bj/hzVCT2HSkVWB\n0zq7FRHPSpr/n89tkvbm63sl3b6kRfWgTf1S9jlUXkScjYij+fo5SSckrVENPoM2tc/OKanL/v91\nvvp+ZccGQzXY97Pa1C/1uP+HFVgDm3RUopD0jO3Dtr9YdjEFrYqIGSn7pZW0quR6irjH9lHbj1b1\nT+b5bK+VtEnSc5KurNNnMKf25/NNtdj/ti+x/bKks5KeiYjDqtG+b1O/1OP+r80ItAQ3RcTHJN0q\n6a/ylkDd1e0I+COSro2ITcr+odfhT//LJD0u6b58tDt/n1f2M1ig9trs/4h4JyL+WNlfRR+3PaYa\n7fsF6t+gAvt/WIH+n5J+f879Nfm22oiI1/PlG5L+SVkbqW5mbF8pvdsn/VXJ9fQkIt6ICwd5vilp\nc5n1LMb2MmWB+A8R8US+uRafwUK1123/S1JE/I+kpqTtqsm+n2tu/UX2/7AC/bCkP7B9te3fkvQZ\nSU8O6b0Gzval+WhFtj8o6ZOSjpdbVVesi3tuT0r6Qr7+eUlPzH9CxVxUf/5LOOtTqv5n8JikqYh4\neM62unwG76m9Lvvf9u/OtiNs/7akP1N2HKAW+75N/SeL7P+hnYeen2LzsC5MOvq7obzRENi+Rtmo\nPJQdoNhX9fpt75fUkHSFpBlJ45K+J+m7kq6SdFrSHRHxdlk1dtKm/k8o6+e+I+mUpLtne6JVY/sm\nST+RdEzZv5uQdL+yGdTfUYU/gw6171AN9r/t65Qd9Lwkv307Iv7W9u+o4vte6lj/t9Tj/mdiEQAk\ngoOiAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgET8P6LeLy4RSm+kAAAAAElFTkSu\nQmCC\n",
282 | "text/plain": [
283 | ""
284 | ]
285 | },
286 | "metadata": {},
287 | "output_type": "display_data"
288 | }
289 | ],
290 | "source": [
291 | "plot(data.T[0], scores, '.');"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {},
297 | "source": [
298 | "So, depending on at what level we would consider a frequency an anomaly, we can set a threshold to decide if a frequency is anomalous."
299 | ]
300 | },
301 | {
302 | "cell_type": "markdown",
303 | "metadata": {},
304 | "source": [
305 | "We can also \"confuse\" the anomaly detector by adding more normal training data closer to the anomalous data:"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 21,
311 | "metadata": {
312 | "collapsed": false
313 | },
314 | "outputs": [],
315 | "source": [
316 | "data2 = np.column_stack([\n",
317 | " poisson(15).rvs(15), \n",
318 | " [1.0]*15\n",
319 | " ])\n",
320 | "anomaly_detector.fit_incrementally(data2);"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 22,
326 | "metadata": {
327 | "collapsed": false
328 | },
329 | "outputs": [],
330 | "source": [
331 | "scores_ = anomaly_detector.anomaly_score(data)"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 23,
337 | "metadata": {
338 | "collapsed": false
339 | },
340 | "outputs": [
341 | {
342 | "data": {
343 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFfFJREFUeJzt3X1sHPWdx/HPN4SH4ymhPUiupcFpUI7iFExJ6ElOYKm5\nFkokqh7QNq1UIrUiPBREJZRQqnNyqhsgEhXSxUVq0xROja4k0rXFKS004DaxaBNSQsgD+KB5OC6N\ni7iaGqGUQr73x+zaXnvX3h3vemZ++35J1u6OZ2a/jO0Pv3xnfjvm7gIAZN+UpAsAANQGgQ4AgSDQ\nASAQBDoABIJAB4BAEOgAEIhxA93MzjWzp81sr5m9aGZfyy9vN7PXzOz3+a+r618uAKAcG+86dDOb\nKWmmu+8ys9Ml7ZR0naTPSRpw9wfrXyYAYDxTx1vB3Y9KOpp//paZ7Zf0wfy3rY61AQCqUFUP3cya\nJLVI+l1+0e1mtsvMvm9m02pcGwCgChUHer7dsknSne7+lqROSR929xZFI3haLwCQoHF76JJkZlMl\ndUl6wt0fKvH98yQ97u4XlfgeHxYDADG4e1Vt7UpH6D+QtG94mOdPlhZ8VtKeMYrK7Fd7e3viNTRq\n/VmunfqT/8p6/XGMe1LUzFolfVHSi2b2vCSX9A1JS8ysRdJxSQcl3RyrAgBATVRylUuPpBNKfOsX\ntS8HABAXM0XHkcvlki5hQrJcf5Zrl6g/aVmvP46KTopO6A3MvN7vAQChMTN5nU6KAgBSjkAHgEAQ\n6AAQCAIdAAJBoANAIAh0AAgEgQ4AgSDQASAQBDoABIJAB4BAEOgAEAgCHQACQaADQCAIdAAIBIEO\nAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQACASBDgCBINABIBAEOgAEgkAHgEAQ6ACQ\nMgMD8bYj0AEgRQYGpEWL4m1LoANAiuzZI+3dG29bAh0AUmTePKm5Od625u61rWbkG5h5vd8DAEIy\nMCCdeabJ3a2a7Qh0AEghs+oDnZYLAASCQAeAQIwb6GZ2rpk9bWZ7zexFM7sjv/wsM3vSzF42s1+a\n2bT6lwsAKGfcHrqZzZQ00913mdnpknZKuk7SUklvuPsDZrZc0lnuvqLE9vTQAaBKdemhu/tRd9+V\nf/6WpP2SzlUU6o/kV3tE0meqKxcAUEtV9dDNrElSi6TfSprh7n1SFPqSzql1cQCAyk2tdMV8u2WT\npDvd/S0zG9lHKdtXWbly5eDzXC6nXC5XXZUAEICBgWgm6Lx50hlnFH+vu7tb3d3dE9p/Rdehm9lU\nSV2SnnD3h/LL9kvKuXtfvs/+jLt/pMS29NABNLzCZ7Ts3RvNBN26dXSoD1fP69B/IGlfIczzfibp\npvzzL0v6aTVvDACNpPAZLe++K+3bF//zWsZSyVUurZJ+I+lFRW0Vl/QNSdslPSbpQ5IOSbrR3ftL\nbM8IHUDDK4zQ9+2TLrywPiN0pv4DwCQZGBhquYwV5hKBDgDB4LNcAKCBEegAEAgCHQACQaADQCAI\ndAAIBIEOAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQACASBDgCBINABIBAEOgCMYWBA\nevbZ6DHtCHQAKKNw27jLL48e0x7qBDoAlDEZN3auJQIdAMqYNy+6/+eJJ0Y3dm5uTrqisXFPUQAY\nQzU3dq4lbhINAIHgJtEA0MAIdAAIBIEOAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQA\nCASBDgCBINABIBAEOgAEYtxAN7N1ZtZnZruHLWs3s9fM7Pf5r6vrWyYAYDyVjNDXS/pUieUPuvvH\n8l+/qHFdAIAqjRvo7r5N0p9LfKuqz+kFANTXRHrot5vZLjP7vplNq1lFAIBY4gZ6p6QPu3uLpKOS\nHqxdSQCAOKbG2cjdXx/28nuSHh9r/ZUrVw4+z+VyyuVycd4WAILV3d2t7u7uCe2jonuKmlmTpMfd\n/aP51zPd/Wj++V2SFrj7kjLbck9RAKhSnHuKjjtCN7MNknKS3m9mhyW1S7rSzFokHZd0UNLNVVcL\nAKipikboE3oDRugAULU4I3RmigJAIAh0AAgEgQ4AgSDQAWRGb6+0fHn0iNE4KQogE3p7pQsukNwl\nM+mll6S5c5Ouqn44KQogWOvWRWEuRY/r1ydbTxoxQgeQCYzQx8cIHUAmzJ0bhfiKFeGHeVyM0AEg\nhRihA0ADI9ABIBAEOgAEgkAHgEAQ6ABSYWBAevbZ6BHxEOgAEjcwIC1aJF1+efRIqMdDoANI3J49\n0t690rvvSvv2Rc9RPQIdQOLmzZOam6UTT5QuvDB6juoxsQhAKgwMRCPz5mbpjDOSriZ5cSYWEegA\nkELMFAWABkagA0AgCHQACASBDqCumDA0eQh0AHXDhKHJRaADqBsmDE0uAh1A3TBhaHJxHTqAumLC\nUDxMLAKAQDCxCAAaGIEOAIEg0AEgEAQ6gNiYNJQuBDqAWJg0lD4EOoBYmDRUe5t7N6v/WH/s7Ql0\nALEwaaj2Wme16t4t98YOda5DBxAbk4Zqr/9Yv+7dcq86F3fWfmKRma2TtFhSn7tflF92lqQfSzpP\n0kFJN7r7m2W2J9ABoAoH+w9q9lmz6zKxaL2kT41YtkLSr9z9HyU9Lemeat4UAFBa/7F+relZE2vb\ncQPd3bdJ+vOIxddJeiT//BFJn4n17gCAQYV2S0dbR6zt454UPcfd+yTJ3Y9KOifmfgAAeT2He9TR\n1qHpp0yPtX2trnKhSQ4EpLdXWr48esTElbocsf9Yvzb3bi5adu3ca2OHuSRNjbldn5nNcPc+M5sp\n6U9jrbxy5crB57lcTrlcLubbAqi33l7pggskd2nNGumll6S5c5OuKtsKlyMWRt+lWivd3d3q7u6e\n0PtUdNmimTVJetzdP5p/fb+k/3P3+81suaSz3H1FmW25ygXIkOXLpQceGHq9YoW0enVy9YSiEOJ3\nt96tNT1rxm2t1OXz0M1sg6ScpPdL6pPULuknkjZK+pCkQ4ouWyx5JTyBDmTL8BG6GSP0WjrYf1Cz\nH5qtA3ceUNP0pjHX5QYXAGqit1dav15aupQwr5VUjNAnikAH0OiG98xH9tDLhTqBDgAptLl3s1pn\ntRaFd/+xfvUc7tG1c68tuQ2BDgCB4J6iANDACHQACASBDgCBINABIBAEOgAEgkAHgBIq/UCtNCHQ\nAaCEkff3LEwGap3VmnBl5XEdOgCUUe10/VpiYhHQwI4ckbq6pMWLpQ98IOlqwlHNB2rVEhOLgAZ1\n5Ig0Z450883R45EjSVcUhsL9PQ/ceUBretaM6qmnDYEOBKCrSzp2LHp+7Jj0858nW08Ihn+AVtP0\nJnW0dRT11NOIlgsQgMII/dgx6ZRTpFdfpe0yUXE+UKuW6KEDDezIkWhk/ulPE+YhINABIBCcFAWA\nBkagA0AgCHQg5QYGpGefjR6BsRDoQIoNDEiLFkmXXx49EuoYC4EOpNiePdLevdK770r79kXPgXII\ndCDF5s2TmpulE0+ULrwweg6Uw2WLQMoNDEQj8+Zm6Ywzkq4Gk4Xr0AEgEFyHDgANjEAHErRhg9TU\nFD0CE0WgAwnZsEH64helQ4eiR0K9vCWblmh33+6iZbv7dmvJpiUJVZRO9NCBhDQ1RWFeMHu29Ic/\nJFZOqu3u260rf3ilnrnpGV0046JRr0NEDx3IkG9/u/j1t76VTB1ZcNGMi/TMTc/oyh9eqY17NwYf\n5nExQgcStGGD9M1vRmG+hO7BuDbu3agbN92ox65/TDc035B0OXXFZYsAglVoszy8+GEt61oW/Aid\nlguAIA3vmd/QfMNg+2XkidJGR6ADddDWJplFjxjb5t7No+7T2X+sX5t7Nw++vm/rfUUj8kJP/b6t\n901qrWlHywWosbY26emnh15/4hPSli3J1ZN2w2/GPP2U6aNeNyp66EAKWIk/Qf4ExlYI8btb79aa\nnjUNH+ZSAoFuZgclvSnpuKS/uftlJdYh0NFQGKHHc7D/oGY/NFsH7jygpulNSZeTuCROih6XlHP3\nS0qFOdCItmyJQlwizCvVf6xfa3rW6MCdB7SmZ82onjoqM9FAtxrsAwjOli1Rm6WRw7ySk52FZYWe\nedP0JnW0dejeLfcS6jFMNIxd0lNmtsPMvlqLgoC0W7Ag6pMvWJB0JenWOqu1KJgLwd06q7VovZ7D\nPUU98+mnTFdHW4d6DvdMes1ZN9Ee+j+4+x/N7GxJT0m63d23jVjH29vbB1/ncjnlcrnY7wkkacEC\n6bnnhl7Pny/t2JFcPWnHyc7KdXd3q7u7e/D1qlWrkrvKxczaJQ24+4MjlnNSFMHgCpbqcbIznkk9\nKWpmp5rZ6fnnp0n6pKQ9cfcHZMH8+WO/RjFOdk6uifTQZ0jaZmbPS/qtpMfd/cnalAWk044dQyHe\nyO2WSk54crJz8jGxCEDVKpndubl3s1pntRb1zPuP9avncI+unXttUqVnBjNFgQkY3h/nV3Z8nPCs\nLwIdiImTnfFwwrN++PhcAJOGE57pQ6ADGFTpzZg54ZlOtFyAPHrold+MmROe9UcPHcCENdqt3tKK\nQAfKYPRdnUa6GXNacVIUKGHkFSylrmjBkN19u7Wsa5keu/4xLetaxn07M4RABzCImzFnG4EOYBA3\nY842euhoCPTQkTVxeuhT61UMMBkqDWpCHI2Algsyi5OdkZXPrNSh/kNFyw71H9LKZ1YmUxASQ6AD\nGbf0kqVavGHxYKgf6j+kxRsWa+klSxOuDJONHjoyiw/UGlII8e8u/q5u6bpFXUu6dN7085IuCxPA\nxCI0HE52Dtl2eJsWrV+krUu3auGshUmXgwliYhGCYTb0NRb3oa/QVHJXoIJD/Yd0S9ct2rp0q27p\numVUTx2NgUBH6nCyM9I6q7XoEwwLn3DYOqu1aL1Cu6VrSZcWzlqoriVdRT11NA4CHUip6adMH/xY\n2oP9B0fd4q1g/fPri3rm500/T11LurT++fVJlI0E0UNH6nCysxh3BWpM9NCRCeP1x0eGd4hhPm31\nNHVu7yxa1rm9U9NWTytaxl2BUA0CHZOq0v54yCc7JWl122rd9sRtg6Heub1Ttz1xm1a3rR5ch7sC\noVq0XDCpaKcMKYT49R+5Xpv2b9Laa9bq1stuHfw+dwVqbFyHjtQj0Ivd8NgN2rR/k67/yPXaeOPG\npMtBitBDR6IquXY89P64rTK1dLYULWvpbJGtGn1QOrd3Dob5pv2bRvXUgWoR6KiJaq4dD7k/fvHZ\nF+uF118YDPWWzha98PoLuvjsi4vWK7Rb1l6zVhtv3Ki116wt6qkDcdByQU3QShlSCPGT7CS94+/o\n4rMv1q5bdxWtM231NK1uW13UM+/c3ql7ttyjN+95c7JLRgrRQ0fdjPeZKY0Q6LbKNOfMOXrlrlcG\nl53/nfP16l9elbcX/8ee/G8n6x1/RyfZSfrrv/51sktFAOihoy4qaaeE3huXpDlnztGrf3lV53/n\nfElDYT7nzDlF67V0tgyG+Tv+zqieOlAvjNAxrkYYfVeqEOJTNEXHdXzUiH14z3zXrbtGvQYqRcsF\ndRF6oBeuQBneNim1rOCEVSfouI5riqbovfb3Ru1rZHgXQr3UvoByCHTUTcifOz78kkJv91Gvhxtv\nhA7UCj101E1WLzW0VTbqGvCRy0qNzEcul4p75u+1vzeqpw4kjUBHQygEdakJPtLo8C7VHimEeWFE\n/spdrwyGOpAGtFyQSdX0vUuFeJx1gMk06S0XM7vazF4ys14zWz6RfSEZld7qLa3GG3lL44++K2m/\nAFkQO9DNbIqkf5f0KUnNkr5gZhfUqrC06O7uTrqECRmr/rTf6m2s2ivte4/8fqnXI7et1cg85N+d\nLMh6/XFMZIR+maT/dvdD7v43Sf8p6bpqdnDF+iu07fC2omXbDm/TFeuvGHxtq0zzH55ftM78h+eP\ne6Kr1LJK1hm5rPBLUYt91bKuWtSvu0YE213J119J7QWV9L0rPflZal8TDfasBwr1Z89EAv2Dkv5n\n2OvX8ssq1tHWoasevUrrntqmL31JWvfUNl316FXqaOsYXOfSGZdqZ99OnXbHfJlJp90xXzv7durS\nGZeW3Kd93aIWwtfLDzcrWaew3qpVtdtXLeuqSf1naijU77Lodbl9VdDaqGSdWu6r0pG3VPvRN5BG\niV7lsnDWQq39+K/0lV9fpR+9vFZf+fVVWvvxX2nhrIWD6zy37Dmd+salevt9O6Xlp+jt9+3UqW9c\nqueWPVe0L2936U1FoXRPPpzeLP4DrmSdUestXFW7fdWyrlrUbyP2ZWOPesuNdittf1S7r1Xdq8ru\nq9K+d71G30Aaxb7Kxcz+SdJKd786/3qFJHf3+0esx18OAMQwaTNFzewESS9LapP0R0nbJX3B3ffH\n2iEAYEKmxt3Q3d8zs9slPamodbOOMAeA5NR9YhEAYHLU7aRo1icdmdlBM3vBzJ43s+1J1zMeM1tn\nZn1mtnvYsrPM7Ekze9nMfmlm05KscSxl6m83s9fM7Pf5r6uTrHEsZnaumT1tZnvN7EUzuyO/PPU/\ngxK1fy2/PBPH38xONrPf5f9WXzSz9vzy1B97acz6qz7+dRmh5ycd9Srqrx+RtEPS5939pZq/WZ2Y\n2R8kXeruf066lkqY2UJJb0l61N0vyi+7X9Ib7v5A/n+qZ7n7iiTrLKdM/e2SBtz9wUSLq4CZzZQ0\n0913mdnpknYqmpexVCn/GYxR++eUneN/qru/nT+31yPpDkn/opQf+4Iy9V+jKo9/vUboE550lAKm\nDH14mbtvkzTyfz7XSXok//wRSZ+Z1KKqUKZ+Kfo5pJ67H3X3Xfnnb0naL+lcZeBnUKb2wpySrBz/\nt/NPT1Z0btCVgWNfUKZ+qcrjX6/AmvCkoxRwSU+Z2Q4z+2rSxcR0jrv3SdEfraRzEq4njtvNbJeZ\nfT+t/2QeycyaJLVI+q2kGVn6GQyr/Xf5RZk4/mY2xcyel3RU0lPuvkMZOvZl6peqPP6ZGYEmoNXd\nPybp05Juy7cEsi5rZ8A7JX3Y3VsU/aJn4Z/+p0vaJOnO/Gh35DFP7c+gRO2ZOf7uftzdL1H0r6LL\nzKxZGTr2Jeq/UDGOf70C/X8lzRr2+tz8ssxw9z/mH1+X9F+K2khZ02dmM6TBPumfEq6nKu7++rDP\nXv6epAVJ1jMeM5uqKBD/w91/ml+ciZ9Bqdqzdvwlyd3/Iqlb0tXKyLEfbnj9cY5/vQJ9h6Tzzew8\nMztJ0ucl/axO71VzZnZqfrQiMztN0icl7Um2qoqYintuP5N0U/75lyX9dOQGKVNUf/6PsOCzSv/P\n4AeS9rn7Q8OWZeVnMKr2rBx/M/v7QjvCzP5O0j8rOg+QiWNfpv6X4hz/ul2Hnr/E5iENTTq6ry5v\nVAdmNlvRqNwVnaD4UdrrN7MNknKS3i+pT1K7pJ9I2ijpQ5IOSbrR3fuTqnEsZeq/UlE/97ikg5Ju\nLvRE08bMWiX9RtKLin5vXNI3FM2gfkwp/hmMUfsSZeD4m9lHFZ30nJL/+rG7d5jZ+5TyYy+NWf+j\nqvL4M7EIAALBSVEACASBDgCBINABIBAEOgAEgkAHgEAQ6AAQCAIdAAJBoANAIP4fvctOtUsuGfQA\nAAAASUVORK5CYII=\n",
344 | "text/plain": [
345 | ""
346 | ]
347 | },
348 | "metadata": {},
349 | "output_type": "display_data"
350 | }
351 | ],
352 | "source": [
353 | "figure(1);plot(data.T[0], scores, 'b.');plot(data.T[0], scores_, 'gx');"
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {},
359 | "source": [
360 | "Above, if we compare with previous plot, we can see that the updated anomaly scores end at below 12 (green crosses) while in previous plot, the anomaly scores end at below 20 (blue dots). Thus, the anomalous data got less anomalous given the new observed data set (data_object_2)."
361 | ]
362 | }
363 | ],
364 | "metadata": {
365 | "kernelspec": {
366 | "display_name": "Python 2",
367 | "language": "python",
368 | "name": "python2"
369 | },
370 | "language_info": {
371 | "codemirror_mode": {
372 | "name": "ipython",
373 | "version": 2
374 | },
375 | "file_extension": ".py",
376 | "mimetype": "text/x-python",
377 | "name": "python",
378 | "nbconvert_exporter": "python",
379 | "pygments_lexer": "ipython2",
380 | "version": "2.7.11"
381 | },
382 | "widgets": {
383 | "state": {},
384 | "version": "1.1.1"
385 | }
386 | },
387 | "nbformat": 4,
388 | "nbformat_minor": 0
389 | }
390 |
--------------------------------------------------------------------------------
/docs/pyISC_sklearn_anomaly_detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Anomaly Detection Overview"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "An anomaly detector for computing anomaly scores is constructed by providing a set of component distributions that defines the models used by the anomaly detector. Then, in order to train the anomaly detector, the $fit$ method can be called with some training data, and then compute the anomaly scores with the $anomaly\\_score$ method.\n",
15 | "Below, we show how to create and train a bivariate Gaussian distribution and how to compute anomaly scores."
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 1,
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "data": {
25 | "text/plain": [
26 | "array([ 0.09595115, 1.07745075, 0.0999642 , 0.05291047, 0.67480946,\n",
27 | " 0.77318013])"
28 | ]
29 | },
30 | "execution_count": 1,
31 | "metadata": {},
32 | "output_type": "execute_result"
33 | }
34 | ],
35 | "source": [
36 | "import numpy as np\n",
37 | "import pyisc\n",
38 | "\n",
39 | "# Get some data:\n",
40 | "X = np.array([[20, 4], [1200, 130], [12, 8], [27, 8], [-9, 13], [2, -6]])\n",
41 | "\n",
42 | "# Create an anomaly detector where the numbers are column indices of the data:\n",
43 | "anomaly_detector = pyisc.AnomalyDetector(\n",
44 | " pyisc.P_Gaussian([0,1])\n",
45 | ")\n",
46 | "\n",
47 | "# The anomaly detector is trained\n",
48 | "anomaly_detector.fit(X) \n",
49 | "\n",
50 | "# Then, we can compute the anomaly scores for the data:\n",
51 | "anomaly_detector.anomaly_score(X)\n",
52 | "\n",
53 | "# The result is anomaly scores (with two decimal precision):\n",
54 | "#array([ 0.10, 1.08, 0.10, 0.05, 0.67, 0.77])"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "By comparing the number pairs in the list, the second element easily stands out as the \"most anomalous\". \n",
62 | "Similarly, we can create a anomaly detector with the Gamma or Poisson distributions where the numbers are the column indices into the input data:\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 11,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "pyisc.P_Gamma(frequency_column=0,period_column=1)\n",
72 | "\n",
73 | "pyisc.P_Poisson(frequency_column=0,period_column=1);"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "In case we have more than one known class of data points, it is also possible to train the detector to make a separate model for each class. \n",
81 | "In this case, if $y$ is an array with two or more class labels, the anomaly detector can still be similarly trained and likewise compute the anomaly scores:"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 28,
87 | "metadata": {},
88 | "outputs": [
89 | {
90 | "data": {
91 | "text/plain": [
92 | "array([ 0.09595115, 1.07745081, 0.0999642 , 0.05291047, 0.67480948,\n",
93 | " 0.77318014])"
94 | ]
95 | },
96 | "execution_count": 28,
97 | "metadata": {},
98 | "output_type": "execute_result"
99 | }
100 | ],
101 | "source": [
102 | "#Create classes (only one class)\n",
103 | "y = np.zeros(len(X))\n",
104 | "\n",
105 | "#Fit classes\n",
106 | "anomaly_detector.fit(X,y)\n",
107 | "\n",
108 | "anomaly_detector.anomaly_score(X,y)"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {
115 | "collapsed": true
116 | },
117 | "outputs": [],
118 | "source": []
119 | }
120 | ],
121 | "metadata": {
122 | "kernelspec": {
123 | "display_name": "Python 2",
124 | "language": "python",
125 | "name": "python2"
126 | },
127 | "language_info": {
128 | "codemirror_mode": {
129 | "name": "ipython",
130 | "version": 2
131 | },
132 | "file_extension": ".py",
133 | "mimetype": "text/x-python",
134 | "name": "python",
135 | "nbconvert_exporter": "python",
136 | "pygments_lexer": "ipython2",
137 | "version": "2.7.13"
138 | },
139 | "widgets": {
140 | "state": {},
141 | "version": "1.1.1"
142 | }
143 | },
144 | "nbformat": 4,
145 | "nbformat_minor": 1
146 | }
147 |
--------------------------------------------------------------------------------
/docs/pyISC_sklearn_outlier_detection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Outlier Detection Overview"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "In a similar fashion as when we create an anomaly detector, we can create an outlier detector. \n",
15 | "The outlier detector differs from the anomaly detector since a fraction of outliers (contamination) is known beforehand and the output is a prediction of whether a data point is an outlier or not.\n",
16 | "Consequently, the outlier detector can dynamically select a threshold for deciding when a data point is an outlier or inlier from the training data. \n",
17 | "Below, we use the same data set as in previous section but we now know that there is one anomalous data point - an outlier - and five inliers in the data set."
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 4,
23 | "metadata": {
24 | "collapsed": false
25 | },
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/plain": [
30 | "array([ 1, -1, 1, 1, 1, 1])"
31 | ]
32 | },
33 | "execution_count": 4,
34 | "metadata": {},
35 | "output_type": "execute_result"
36 | }
37 | ],
38 | "source": [
39 | "import numpy as np\n",
40 | "import pyisc\n",
41 | "\n",
42 | "# Data with an outlier in element 1:\n",
43 | "X = [[20, 4], [1200, 130], [12, 8], [27, 8], [-9, 13], [2, -6]] \n",
44 | "\n",
45 | "# Create an outlier detector with the known fraction of outliers: 1 of 6:\n",
46 | "outlier_detector = pyisc.SklearnOutlierDetector(\n",
47 | " contamination=1.0/len(X),\n",
48 | " component_models=pyisc.P_Gaussian([0,1])\n",
49 | ")\n",
50 | "\n",
51 | "# The outlier detector is trained\n",
52 | "outlier_detector.fit(np.array(X)) \n",
53 | "\n",
54 | "# Then, the data is classified into being outliers or not:\n",
55 | "outlier_detector.predict(np.array(X))\n",
56 | "\n",
57 | "# The result is classification of outliers (-1) and inliers (1):\n",
58 | "#array([ 1, -1, 1, 1, 1, 1, 1])"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "Thus, we are able to detect the second element as an outlier. The outlier detector follows the API used in scikit-learn for outlier detection with known contamination (see http://scikit-learn.org/stable/modules/outlier_detection.html) "
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "metadata": {
72 | "collapsed": true
73 | },
74 | "outputs": [],
75 | "source": []
76 | }
77 | ],
78 | "metadata": {
79 | "kernelspec": {
80 | "display_name": "Python 2",
81 | "language": "python",
82 | "name": "python2"
83 | },
84 | "language_info": {
85 | "codemirror_mode": {
86 | "name": "ipython",
87 | "version": 2
88 | },
89 | "file_extension": ".py",
90 | "mimetype": "text/x-python",
91 | "name": "python",
92 | "nbconvert_exporter": "python",
93 | "pygments_lexer": "ipython2",
94 | "version": "2.7.11"
95 | },
96 | "widgets": {
97 | "state": {},
98 | "version": "1.1.1"
99 | }
100 | },
101 | "nbformat": 4,
102 | "nbformat_minor": 0
103 | }
104 |
--------------------------------------------------------------------------------
/docs/pyISC_tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# The pyISC Interactive Tutorial"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Anomaly Detection\n",
15 | "(with unknown fraction of outliers)"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "Anomaly Detection Overview"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "Simple Anomaly Detection with Frequency Data"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "metadata": {},
35 | "source": [
36 | "Multivariate Anomaly Detection"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "Anomaly Detection with Classes"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "## Outlier Detection \n",
51 | "(with known fraction of outliers)"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "metadata": {},
57 | "source": [
58 | "Outlier Detection Overview"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "Comparison of outlier detectors"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "Outlier detection using real-world data"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "## Classification"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "Using pyISC as Classifier"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "metadata": {
93 | "collapsed": true
94 | },
95 | "outputs": [],
96 | "source": []
97 | }
98 | ],
99 | "metadata": {
100 | "kernelspec": {
101 | "display_name": "Python 2",
102 | "language": "python",
103 | "name": "python2"
104 | },
105 | "language_info": {
106 | "codemirror_mode": {
107 | "name": "ipython",
108 | "version": 2
109 | },
110 | "file_extension": ".py",
111 | "mimetype": "text/x-python",
112 | "name": "python",
113 | "nbconvert_exporter": "python",
114 | "pygments_lexer": "ipython2",
115 | "version": "2.7.13"
116 | },
117 | "widgets": {
118 | "state": {},
119 | "version": "1.1.1"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 1
124 | }
125 |
--------------------------------------------------------------------------------
/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: pyisc
3 | version: "develop"
4 |
5 | source:
6 | path: .
7 |
8 | build:
9 | detect_binary_files_with_prefix: True
10 |
11 |
12 | requirements:
13 | build:
14 | - swig
15 | - python {{python}}
16 | - numpy
17 | - requests
18 | - setuptools
19 | - mingw [win]
20 | - libpython 1.0 [win]
21 | - mkl 2017.0.3 [win]
22 |
23 | run:
24 | - libcxx [not win]
25 | - python
26 | - numpy
27 | - scipy
28 | - scikit-learn
29 | - swig
30 | - libpython 1.0 [win]
31 | - mkl 2017.0.3 [win]
32 |
33 |
34 | test:
35 | imports:
36 | - pyisc
37 |
38 | about:
39 | home: https//www.sics.se
40 | license: LGPLv3
41 | license_file: LICENSE
42 |
--------------------------------------------------------------------------------
/pyisc.i:
--------------------------------------------------------------------------------
1 |
2 | %module pyisc
3 |
4 | %{
5 | #define SWIG_FILE_WITH_INIT
6 |
7 | /* Includes the header in the wrapper code */
8 | #include "dataformat/format.hh"
9 | #include "dataformat/formatbinary.hh"
10 | #include "dataformat/formatcont.hh"
11 | #include "dataformat/formatdiscr.hh"
12 | #include "dataformat/formatsymbol.hh"
13 | #include "dataformat/dynindvector.hh"
14 | #include "dataformat/data.hh"
15 | #include "isc2/isc_exportimport.hh"
16 | #include "isc2/isc_micromodel.hh"
17 | #include "isc2/hmatrix.hh"
18 | #include "isc2/isc_micromodel_gaussian.hh"
19 | #include "isc2/isc_component.hh"
20 | #include "isc2/isc_mixture.hh"
21 | #include "isc2/gamma.hh"
22 | #include "isc2/isc_micromodel_multigaussian.hh"
23 | #include "isc2/hgf.hh"
24 | #include "isc2/isc_micromodel_poissongamma.hh"
25 | #include "isc2/isc_micromodel_markovgaussian.hh"
26 | //#include "isc2/isc_micromodel_multidirichlet.hh"
27 | #include "isc2/anomalydetector.hh"
28 | #include "src/_Format.hh"
29 | #include "src/_DataObject.hh"
30 | #include "src/_AnomalyDetector.hh"
31 | #include "src/_JSonExporter.hh"
32 | #include "src/_JSonImporter.hh"
33 |
34 | %}
35 | %include
36 | %include "numpy.i"
37 | %include
38 | %include
39 | %init %{
40 | import_array();
41 | %}
42 |
43 |
44 |
45 |
46 |
47 | %inline %{
48 | /* Create any sort of [size] array */
49 |
50 | int *_int_array(int size) {
51 | return (int *) new int[size];
52 | }
53 |
54 | int **_int_pointer_array(int size) {
55 | return (int **) new int*[size];
56 | }
57 |
58 | void _free_array_int_pointer(int** array, int length) {
59 | delete [] array;
60 | }
61 |
62 | void _set_int_array(int** array2D, int index, int*array1D) {
63 | array2D[index] = array1D;
64 | }
65 |
66 | void _set_array_value(int *array1, int index, int val) {
67 | array1[index] = val;
68 | }
69 |
70 | int _get_array_value(int *array1, int index) {
71 | return array1[index];
72 | }
73 |
74 | double* _double_array(int size) {
75 | return (double*) new double[size];
76 | }
77 |
78 | int _get_int_value(int *array1, int index) {
79 | return array1[index];
80 | }
81 |
82 |
83 | intfloat* _intfloat_array(int size) {
84 | return (intfloat*) new intfloat[size];
85 | }
86 |
87 | void _free_array_intfloat(intfloat* array) {
88 | delete [] array;
89 | }
90 |
91 | float _get_intfloat_value(intfloat *array1, int index) {
92 | return (float) array1[index];
93 | }
94 |
95 |
96 | double _get_double_value(double* array1, int index) {
97 |
98 | return array1[index];
99 | }
100 |
101 | double _set_double_value(double* array1, int index, double value) {
102 | array1[index] = value;
103 | }
104 |
105 | double* _to_cpp_array(double* IN_ARRAY1, int DIM1) {
106 | double* out_array = new double[DIM1];
107 | for(int i=0; i < DIM1; i++) {
108 | out_array[i] = IN_ARRAY1[i];
109 | }
110 |
111 | return out_array;
112 | }
113 |
114 | void _free_array_double(double* array) {
115 | delete [] array;
116 | }
117 |
118 | int* _to_cpp_array_int(int* IN_ARRAY1, int DIM1) {
119 | int* out_array = new int[DIM1];
120 | for(int i=0; i < DIM1; i++) {
121 | out_array[i] = IN_ARRAY1[i];
122 | }
123 |
124 | return out_array;
125 | }
126 |
127 | void _free_array_int(int* array) {
128 | delete [] array;
129 | }
130 |
131 | void _to_numpy_array_double(double* inarray, double* ARGOUT_ARRAY1, int DIM1) {
132 | for(int i=0; i < DIM1; i++) {
133 | ARGOUT_ARRAY1[i] = inarray[i];
134 | }
135 | }
136 |
137 | void _to_numpy_array_int(int* inarray, int* ARGOUT_ARRAY1, int DIM1) {
138 | for(int i=0; i < DIM1; i++) {
139 | ARGOUT_ARRAY1[i] = inarray[i];
140 | }
141 | }
142 |
143 |
144 | char* _get_string_value(char** strings, int i) {
145 | return strings[i];
146 | }
147 |
148 |
149 | IscMarkovGaussMicroModel** _to_pointer(std::vector vec) {
150 | IscMarkovGaussMicroModel** new_vec = new IscMarkovGaussMicroModel*[vec.size()];
151 | for(int i=0; i < vec.size(); i++) {
152 | new_vec[i] = vec[i];
153 | }
154 | return new_vec;
155 | }
156 |
157 | void _free_pointer(IscMarkovGaussMicroModel** new_vec) {
158 | delete [] new_vec;
159 | }
160 |
161 |
162 | %}
163 |
164 | %apply (double* IN_ARRAY1, int DIM1) {(double* in_array1D, int num_of_columns)}
165 | %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(double* in_array2D, int num_of_rows, int num_of_columns)}
166 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* deviations, int deviations_length)}
167 | %apply (int* ARGOUT_ARRAY1, int DIM1) {(int* class_ids, int class_ids_length)}
168 | %apply (int* ARGOUT_ARRAY1, int DIM1) {(int* cluster_ids, int cluster_ids_length)}
169 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* out_1DArray, int num_of_elements)}
170 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* logp, int size)}
171 |
172 | /* Parse the header file to generate wrappers */
173 |
174 | enum IscCombinationRule {IscMin, IscMax, IscPlus};
175 |
176 |
177 | %ignore IscCombinationRule;
178 | %ignore IscMin;
179 | %ignore IscMax;
180 | %ignore IscPlus;
181 |
182 | %rename ("_%s", regexmatch$name="^Isc") "";
183 |
184 | %include "isc2/isc_exportimport.hh"
185 | %include "src/_Format.hh"
186 | %include "src/_DataObject.hh"
187 | %include "src/_AnomalyDetector.hh"
188 | %include "src/_JSonExporter.hh"
189 | %include "src/_JSonImporter.hh"
190 |
191 | %include "isc2/isc_component.hh"
192 | %include "isc2/isc_micromodel.hh"
193 | %include "isc2/isc_micromodel_multigaussian.hh"
194 | %include "isc2/isc_micromodel_poissongamma.hh"
195 | %include "isc2/isc_micromodel_markovgaussian.hh"
196 |
197 |
198 | #%include "isc2/isc_micromodel_multidirichlet.hh"
199 | %template(_IscMicroModelVector) std::vector;
200 | %template(_IscMarkovGaussMicroModelVector) std::vector;
201 |
202 |
203 | %pythoncode %{
204 | from _pyisc_modules.BaseISC import *
205 | from _pyisc_modules.AnomalyDetector import *
206 | from _pyisc_modules.DataObject import *
207 | from _pyisc_modules.SklearnClassifier import *
208 | from _pyisc_modules.SklearnOutlierDetector import *
209 | from _pyisc_modules.AnomalyClustering import *
210 | from _pyisc_modules.OutlierClustering import *
211 | from numpy import array, dtype, double
212 |
213 |
214 | def _to_numpy_array(inarray, n, type=double):
215 | if type == double:
216 | return _to_numpy_array_double(inarray,n);
217 | elif type == int:
218 | return _to_numpy_array_int(inarray,n);
219 | print ("Unknown type ", type)
220 |
221 | %}
222 |
223 |
224 |
225 | %extend pyisc::_DataObject {
226 | intfloat* _DataObject::__getitem__(int i) {
227 | return _get_intfloat(i);
228 | }
229 | }
230 |
231 |
232 |
233 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from setuptools import setup,Extension
4 |
5 | from numpy.distutils.misc_util import get_numpy_include_dirs
6 | from distutils.sysconfig import get_python_lib;
7 |
8 |
9 | '''
10 | In order to create a source distribution run setup build_ext sdist, otherwise the pyisc.py will not be generated from
11 | pyisc.i, which is not distributed in the source distribution, only the generated sources are distributed.
12 | '''
13 |
14 | ######## import numpy.i ##########
15 | # Import numpy.i from current version.
16 | # See http://stackoverflow.com/questions/21855775/numpy-i-is-missing-what-is-the-recommended-way-to-install-it
17 |
18 |
19 | np_file_name = 'numpy.i'
20 |
21 | if not os.path.exists(np_file_name):
22 | import re
23 | import requests
24 | import numpy
25 |
26 | np_version = re.compile(r'(?P[0-9]+)\.'
27 | '(?P[0-9]+)') \
28 | .search(numpy.__version__)
29 | np_version_string = np_version.group()
30 | np_version_info = {key: int(value)
31 | for key, value in list(np_version.groupdict().items())}
32 |
33 |
34 | np_file_url = 'https://raw.githubusercontent.com/numpy/numpy/maintenance/' + \
35 | np_version_string + '.x/tools/swig/' + np_file_name
36 | if(np_version_info['MAJOR'] == 1 and np_version_info['MINOR'] < 9):
37 | np_file_url = np_file_url.replace('tools', 'doc')
38 |
39 | chunk_size = 8196
40 | with open(np_file_name, 'wb') as file:
41 | for chunk in requests.get(np_file_url,
42 | stream=True).iter_content(chunk_size):
43 | file.write(chunk)
44 |
45 | ###### END numpy.i import #######
46 |
47 | extra_flags = []
48 |
49 | disc_dir = "."
50 |
51 | arduinojson_dir = os.path.join("ArduinoJson","src")
52 | dataframe_src_dir = os.path.join(disc_dir,'dataformat')
53 | isc_src_dir = os.path.join(disc_dir, 'isc2')
54 | pyisc_src_dir = "src"
55 | pyisc_module_dir = "_pyisc_modules"
56 | isclibraries = ["-Wall", "-O"]
57 |
58 | numpyincdir = get_numpy_include_dirs()
59 |
60 | py_modules = [
61 | os.path.join(pyisc_module_dir, src) for src in
62 | ["__init__",
63 | "BaseISC",
64 | "AnomalyDetector",
65 | "DataObject",
66 | "SklearnOutlierDetector",
67 | "SklearnClassifier",
68 | "AnomalyClustering",
69 | "OutlierClustering",
70 | ]
71 | ]\
72 | +["pyisc"]
73 |
74 |
75 | pylib = get_python_lib()
76 |
77 | # Must be updated if file structure has changed
78 | if "uninstall" in sys.argv:
79 |
80 | from glob import glob
81 | files = [os.path.join(pylib, mod)+".py" for mod in py_modules] + \
82 | [os.path.join(pylib, mod)+".pyc" for mod in py_modules] + \
83 | [os.path.join(pylib,pyisc_module_dir)] + \
84 | [os.path.join(pylib, "pyisc-1.0-py2.7.egg-info")] + \
85 | glob(os.path.os.path.join(pylib, "_pyisc.*"))
86 |
87 |
88 | for file in files:
89 | if os.path.exists(file):
90 | if os.path.isdir(file):
91 | os.removedirs(file)
92 | else:
93 | os.remove(file)
94 | print("removing "+file)
95 |
96 | sys.exit()
97 |
98 | #add extra flags as needed, look in file our-g++
99 |
100 | if sys.platform == 'darwin':
101 | isclibraries += ["z"]
102 | extra_flags = ["-DPLATFORM_MAC"]
103 | elif sys.platform == "win32":
104 | extra_flags = ["-DPLATFORM_MSW"]
105 | else: # Default, works for Linux
106 | isclibraries += ["z"]
107 | extra_flags = ["-Wmissing-declarations","-DUSE_WCHAR -DPLATFORM_GTK"]
108 |
109 | #extra_flags += ['-std=c++11']
110 |
111 | dataframe_sources = [os.path.join(dataframe_src_dir, src)
112 | for src in "readtokens.o table.o format.o formatdispatch.o formatbinary.o " \
113 | "formatdiscr.o formatcont.o formatsymbol.o formattime.o formatunknown.o " \
114 | "data.o datafile.o datadispatch.o".replace(".o", ".cc").split()]
115 |
116 | isc_sources = [os.path.join(isc_src_dir, src)
117 | for src in "anomalydetector.o isc_mixture.o isc_component.o isc_micromodel_poissongamma.o " \
118 | "isc_micromodel_gaussian.o isc_micromodel_multigaussian.o " \
119 | "isc_micromodel_markovgaussian.o " \
120 | "hmatrix.o gamma.o hgf.o"
121 | .replace(".o", ".cc").split()]
122 |
123 | pyisc_sources = [os.path.join(pyisc_src_dir, src) for src in ["_Format.cc", "_DataObject.cc", "_AnomalyDetector.cc", "_JSonExporter.cc", "_JSonImporter.cc", "mystring.cc"]]
124 | pyisc_headers = [s.replace(".cc", ".hh") for s in pyisc_sources]
125 |
126 | # Only run when creating the distribution, not when installing it on someone else computer. Removes dependency on Swig
127 | if os.path.exists('pyisc.i'):
128 | setup(name="pyisc",
129 | author="Tomas Olsson",
130 | author_email="tomas.olsson@ri.se",
131 | url="http://www.sics.se",
132 | version="1.0",
133 | ext_modules=[
134 | Extension("_pyisc",
135 | language='c++',
136 | sources=["pyisc.i"]+dataframe_sources+isc_sources+pyisc_sources,
137 | include_dirs=[disc_dir, isc_src_dir, dataframe_src_dir, pyisc_src_dir, arduinojson_dir]+numpyincdir,
138 | extra_compile_args=extra_flags,
139 | swig_opts=['-c++','-I'+str(disc_dir)])
140 | ],
141 | license="LGPLv3",
142 | classifiers=[
143 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)'
144 | ]
145 | )
146 |
147 | # The following overlapping setup is only run in order to inlcude pyisc.py when all *.py files are copied to the same folder.
148 | setup(name="pyisc",
149 | author="Tomas Olsson",
150 | author_email="tomas.olsson@ri.se",
151 | url="http://www.sics.se",
152 | version="1.0",
153 | ext_modules=[
154 | Extension("_pyisc",
155 | language='c++',
156 | sources=["pyisc.i"]+dataframe_sources+isc_sources+pyisc_sources,
157 | include_dirs=[disc_dir, isc_src_dir,dataframe_src_dir,pyisc_src_dir, arduinojson_dir]+numpyincdir,
158 | extra_compile_args=extra_flags,
159 | swig_opts=['-c++', '-I'+str(disc_dir)])
160 | ],
161 | py_modules=py_modules,
162 | license="LGPLv3+",
163 | classifiers=[
164 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)'
165 | ]
166 | )
167 |
168 |
169 |
--------------------------------------------------------------------------------
/setup2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from distutils.sysconfig import get_python_lib;
4 | pylib = get_python_lib()
5 |
6 | for file in os.listdir('.'):
7 | if not file.startswith('setup'):
8 | print("copy", file, "to", os.path.join(pylib, file))
9 | if os.path.isdir(file):
10 | dst = os.path.join(pylib, file)
11 | if os.path.exists(dst):
12 | shutil.rmtree(dst)
13 | shutil.copytree(file,dst)
14 | else:
15 | shutil.copy(file,pylib)
16 |
--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/src/.DS_Store
--------------------------------------------------------------------------------
/src/_AnomalyDetector.cc:
--------------------------------------------------------------------------------
1 | /*
2 | --------------------------------------------------------------------------
3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB
4 |
5 | Main author: Tomas Olsson
6 |
7 | This code is free software: you can redistribute it and/or modify it
8 | under the terms of the GNU Lesser General Public License as published
9 | by the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This code is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 |
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with this code. If not, see .
19 | --------------------------------------------------------------------------
20 | */
21 |
22 | #include "_AnomalyDetector.hh"
23 | #include
24 | #ifdef WIN32
25 | #define _USE_MATH_DEFINES
26 | #include
27 | #endif
28 |
29 |
30 | /**
31 | * This is a function used to create a micro model for a given mixture component
32 | *
33 | * co is the creating object, that is, the inner anomaly detector.
34 | */
35 | ::IscMicroModel *inner_create_micro_model(const void* co, int mixtureCompIndex)
36 | {
37 | return ((pyisc::_AnomalyDetector*)co)->_CreateMixtureComponet(mixtureCompIndex);
38 | }
39 |
40 | namespace pyisc {
41 |
42 | ::IscMicroModel *_AnomalyDetector::_CreateMixtureComponet(int mixtureComponentIndex) {
43 | return this->component_distribution_creators[mixtureComponentIndex]->create();
44 | }
45 |
46 |
47 | _AnomalyDetector::_AnomalyDetector(
48 | int off, int splt, double th,
49 | int cl, ::IscCombinationRule cr,
50 | std::vector component_distribution_creators) :
51 | ::AnomalyDetector(component_distribution_creators.size(),off,splt,th,cl,cr, inner_create_micro_model) {
52 |
53 | for(int i=0; i < component_distribution_creators.size(); i++) {
54 | this->component_distribution_creators.push_back(component_distribution_creators[i]->create());
55 | }
56 | if(DEBUG)
57 | printf("_AnomalyDetector created\n");
58 | }
59 |
60 |
61 | void _AnomalyDetector::importModel(IscAbstractModelImporter *importer) {
62 | if(DEBUG)
63 | printf("_AnomalyDetector calling importer\n");
64 |
65 | IscAbstractModelImporter *innerImporter = importer->getModelImporter("AnomalyDetector");
66 |
67 | if(DEBUG)
68 | printf("_AnomalyDetector importer cannot reach this far \n");
69 |
70 | ::AnomalyDetector::importModel(innerImporter);
71 |
72 | delete innerImporter;
73 |
74 | if(DEBUG)
75 | printf("_AnomalyDetector imported\n");
76 | }
77 |
78 |
79 | void _AnomalyDetector::exportModel(IscAbstractModelExporter *exporter) {
80 | IscAbstractModelExporter *innerExporter = exporter->createModelExporter("AnomalyDetector");
81 | ::AnomalyDetector::exportModel(innerExporter);
82 | delete innerExporter;
83 |
84 | if(DEBUG)
85 | printf("_AnomalyDetector exported\n");
86 |
87 | }
88 |
89 | _AnomalyDetector::~_AnomalyDetector() {
90 | if(DEBUG)
91 | printf("_AnomalyDetector deletion started\n");
92 |
93 | for(int i=0; i < this->component_distribution_creators.size(); i++) {
94 | delete this->component_distribution_creators[i];
95 | }
96 |
97 | if(DEBUG)
98 | printf("_AnomalyDetector deleted\n");
99 | }
100 |
101 |
102 | void _AnomalyDetector::_SetParams(int off, int splt, double th, int cl) {
103 | ::AnomalyDetector::SetParams(off,splt,th,cl);
104 | }
105 |
106 | void _AnomalyDetector::_Reset() {
107 | ::AnomalyDetector::Reset();
108 | }
109 |
110 | void _AnomalyDetector::_TrainOne(Format* format, double* in_array1D, int num_of_columns) {
111 | intfloat* vec = new intfloat[num_of_columns];
112 | for (int j = 0; j < num_of_columns; j++) {
113 | if (format->get_isc_format()->nth(j)->type() == FORMATSPEC_DISCR) {
114 | vec[j].i = (int) in_array1D[j];
115 | } else if (format->get_isc_format()->nth(j)->type()
116 | == FORMATSPEC_CONT) {
117 | vec[j].f = (float) in_array1D[j];
118 | }
119 | }
120 | ::AnomalyDetector::TrainOne(vec);
121 |
122 | delete [] vec;
123 | }
124 |
125 | void _AnomalyDetector::_UntrainOne(Format* format, double* in_array1D, int num_of_columns) {
126 | intfloat* vec = new intfloat[num_of_columns];
127 | for (int j = 0; j < num_of_columns; j++) {
128 | if (format->get_isc_format()->nth(j)->type() == FORMATSPEC_DISCR) {
129 | vec[j].i = (int) in_array1D[j];
130 | } else if (format->get_isc_format()->nth(j)->type()
131 | == FORMATSPEC_CONT) {
132 | vec[j].f = (float) in_array1D[j];
133 | }
134 | }
135 | ::AnomalyDetector::UntrainOne(vec);
136 |
137 | delete [] vec;
138 | }
139 |
140 | void _AnomalyDetector::_TrainDataIncrementally(pyisc::_DataObject* d) {
141 | for(int i=0; i < d->size(); i++) {
142 | ::AnomalyDetector::TrainOne((*d->get_isc_data_object())[i]);
143 | }
144 |
145 | }
146 |
147 | void _AnomalyDetector::_UntrainDataIncrementally(pyisc::_DataObject* d) {
148 | for(int i=0; i < d->size(); i++) {
149 | ::AnomalyDetector::UntrainOne((*d->get_isc_data_object())[i]);
150 | }
151 |
152 | }
153 |
154 | void _AnomalyDetector::_TrainData(_DataObject* d) {
155 | ::AnomalyDetector::TrainData(d->get_isc_data_object());
156 | }
157 |
158 | void _AnomalyDetector::_CalcAnomaly(class _DataObject* d, double* deviations, int deviantions_length) {
159 | if( deviantions_length != d->size()) {
160 | printf("Wrong deviations lengths");
161 | }
162 | ::AnomalyDetector::CalcAnomaly(d->get_isc_data_object(), deviations);
163 | }
164 |
165 | void _AnomalyDetector::_ClassifyData(class _DataObject* d, int* class_ids, int class_ids_length,
166 | int* cluster_ids, int cluster_ids_length) {
167 | if( class_ids_length != d->size() && cluster_ids_length != d->size()) {
168 | printf("Wrong number of classes or clusters");
169 | }
170 |
171 | ::AnomalyDetector::ClassifyData(d->get_isc_data_object(), class_ids, cluster_ids);
172 |
173 | }
174 |
175 | int _AnomalyDetector::_CalcAnomalyDetails(union intfloat* vec,
176 | double* anom, int* cla, int* clu, double* devs, union intfloat* peak,
177 | union intfloat* min, union intfloat* max, double* expect, double* var) {
178 | return ::AnomalyDetector::CalcAnomalyDetails(vec, *anom, *cla, *clu, devs, peak, min, max, expect, var);
179 | }
180 |
181 | void _AnomalyDetector::_LogProbabilityOfData(class _DataObject* data, double* logp, int size) {
182 | ::DataObject *d = data->get_isc_data_object();
183 | int i, id = -1;
184 | intfloat* vec;
185 | int n = d->size();
186 | double min_logp=HUGE_VALF;
187 | for (i=0; ilogp(vec+offset, id);
192 | if(logp[i] < min_logp) {
193 | min_logp = logp[i];
194 | }
195 | }
196 | }
197 |
198 | /*
199 | int AnomalyDetector::CalcAnomalyDetailsSingle(union intfloat* vec,
200 | int mmind, int cla, int clu, double* devs, union intfloat* peak,
201 | union intfloat* min, union intfloat* max, double* expect, double* var) {
202 | }*/
203 |
204 |
205 |
206 |
207 |
208 | void _AnomalyDetector::_CalcAnomalyDetailPerformanceTest(pyisc::_DataObject* d) {
209 | ::DataObject* data = d->get_isc_data_object();
210 | double* expect2 = new double[d->length()];
211 | double dum3;
212 | int dum1, dum2;
213 |
214 | double *devs = new double[::AnomalyDetector::len];
215 |
216 | for(int i=0; i < d->size(); i++) {
217 | ::AnomalyDetector::CalcAnomalyDetails((*data)[i], dum3, dum1, dum2, devs,
218 | 0,0,0,expect2,0);
219 | }
220 |
221 | delete [] devs;
222 | delete [] expect2;
223 |
224 | }
225 |
226 |
227 |
228 | } /* namespace pyisc */
229 |
230 |
231 |
--------------------------------------------------------------------------------
/src/_AnomalyDetector.hh:
--------------------------------------------------------------------------------
1 | /*
2 | --------------------------------------------------------------------------
3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB
4 |
5 | Main author: Tomas Olsson
6 |
7 | This code is free software: you can redistribute it and/or modify it
8 | under the terms of the GNU Lesser General Public License as published
9 | by the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This code is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 |
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with this code. If not, see .
19 | --------------------------------------------------------------------------
20 | */
21 |
22 | #ifndef ANOMALYDETECTOR2_HH_
23 | #define ANOMALYDETECTOR2_HH_
24 |
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include "_DataObject.hh"
31 | #include
32 | #include
33 | #include "isc_exportimport.hh"
34 |
35 |
36 | namespace pyisc {
37 |
38 |
39 | class _AnomalyDetector : ::AnomalyDetector {
40 | public:
41 |
42 | _AnomalyDetector(
43 | int off,
44 | int splt,
45 | double th,
46 | int cl,
47 | ::IscCombinationRule cr,
48 | std::vector vector);
49 | /**
50 | * n is number of isc mixture models
51 | * off is the first column containing features used by the detector
52 | * splt is a the column containing a known class
53 | * th is a threshold on when to consider a vector of data as anomalous
54 | * cl is a variable if zero indicate no clustering else indicates that clustering should be done
55 | * cr is variable indicating how the anomaly scores for the different isc mixture components should be combined
56 | * cf is a function that creates a isc micro component for each of the n isc mixture component.
57 | *
58 | *
59 | * An isc micro model uses or more columns as input.
60 | *
61 | * Pattern of input data vector: (ignored columns(header), distribution components, #distribution input values per component)
62 | *
63 | */
64 | // _AnomalyDetector(int n, int off, int splt, double th, int cl,
65 | // ::IscCombinationRule cr, ::IscCreateFunc cf); // Or a creation function for the appropriate micromodels can be used
66 | virtual ~_AnomalyDetector();
67 |
68 | virtual void importModel(IscAbstractModelImporter *importer);
69 | virtual void exportModel(IscAbstractModelExporter *exporter);
70 | virtual void _SetParams(int off, int splt, double th, int cl);
71 | virtual void _Reset();
72 | virtual void _TrainOne(Format* format, double* in_array1D, int num_of_columns);
73 | virtual void _UntrainOne(Format* format, double* in_array1D, int num_of_columns);
74 | virtual void _TrainData(_DataObject* d);
75 | virtual void _TrainDataIncrementally(_DataObject* d);
76 | virtual void _UntrainDataIncrementally(_DataObject* d);
77 |
78 | virtual void _CalcAnomaly(class _DataObject* d, double* deviations, int deviations_length);
79 | virtual void _ClassifyData(class _DataObject* d, int* class_ids, int class_ids_length, int* cluster_ids, int cluster_ids_length);
80 |
81 | virtual int _CalcAnomalyDetails(union intfloat* vec, double* anom, int* cla,
82 | int* clu, double* devs = 0, union intfloat* peak = 0,
83 | union intfloat* min = 0, union intfloat* max = 0,
84 | double* expect = 0, double* var = 0);
85 | /*virtual int CalcAnomalyDetailsSingle(union intfloat* vec, int mmind,
86 | int cla, int clu, double* devs = 0, union intfloat* peak = 0,
87 | union intfloat* min = 0, union intfloat* max = 0,
88 | double* expect = 0, double* var = 0);*/
89 |
90 | virtual ::IscMicroModel *_CreateMixtureComponet(int mixtureComponentIndex);
91 |
92 | virtual ::AnomalyDetector* get_isc_anomaly_detector() {return this;};
93 |
94 | virtual void _CalcAnomalyDetailPerformanceTest(pyisc::_DataObject* obj);
95 |
96 | virtual void _LogProbabilityOfData(class _DataObject* d, double* logp, int size);
97 |
98 | private:
99 | std::vector component_distribution_creators;
100 | };
101 |
102 |
103 | } /* namespace pyisc */
104 |
105 | #endif /* ANOMALYDETECTOR2_HH_ */
106 |
--------------------------------------------------------------------------------
/src/_DataObject.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * DataObject.cc
3 | *
4 | * Created on: Mar 6, 2015
5 | * Author: tol
6 | */
7 |
8 | #include "_DataObject.hh"
9 | #include
10 |
11 |
12 | namespace pyisc {
13 |
14 | //double* _to_cpp_array(double* in_array1D, int num_of_columns) {
15 | // return in_array1D;
16 | //}
17 |
18 | void _DataObject::init(pyisc::Format* format) {
19 | is_data_obj_created = 1;
20 | is_data_format_created = 0;
21 |
22 | isc_data_obj = new ::DataObject(format->get_isc_format());
23 | data_format = format;
24 |
25 | if(DEBUG)
26 | printf("Create _DataObject\n");
27 | }
28 |
29 | _DataObject::_DataObject(pyisc::Format *format) {
30 | init(format);
31 | }
32 |
33 | _DataObject::_DataObject(pyisc::Format *format, double* in_array2D, int num_of_rows,
34 | int num_of_columns) {
35 | init(format);
36 | add2DArray(in_array2D, num_of_rows, num_of_columns);
37 | }
38 |
39 | _DataObject::_DataObject(const char* formatfile, const char* datafile) {
40 | isc_data_obj = new ::DataObject(formatfile,datafile);
41 | data_format = new Format(isc_data_obj->format());
42 | is_data_obj_created = 1;
43 | is_data_format_created = 1;
44 | }
45 | _DataObject::_DataObject(::DataObject* data_object) {
46 | isc_data_obj = data_object;
47 | data_format = new Format(isc_data_obj->format());
48 | is_data_obj_created = 0;
49 | is_data_format_created = 1;
50 | }
51 |
52 | _DataObject::~_DataObject() {
53 | if(DEBUG) {
54 | printf("Delete object");
55 | }
56 | if (is_data_obj_created && isc_data_obj) {
57 | delete isc_data_obj;
58 | }
59 | if( is_data_format_created && data_format) {
60 | delete data_format;
61 | }
62 | }
63 |
64 | void _DataObject::add2DArray(double* in_array2D, int num_of_rows, int num_of_columns) {
65 | intfloat* vec;
66 | for (int i = 0; i < num_of_rows; i++) {
67 | vec = isc_data_obj->newentry();
68 | _convert_to_intfloat((in_array2D+i*num_of_columns), num_of_columns, vec);
69 | }
70 | }
71 |
72 |
73 | void _DataObject::_convert_to_intfloat(double* in_array1D, int num_of_columns, intfloat* vec) {
74 | for (int j = 0; j < num_of_columns; j++) {
75 | switch(data_format->get_isc_format()->nth(j)->type()) {
76 | case FORMATSPEC_DISCR:
77 | case FORMATSPEC_SYMBOL:
78 | case FORMATSPEC_BINARY:
79 | case FORMATSPEC_UNKNOWN:
80 | case FormatSpecDatetimeType:
81 | vec[j].i = (int) in_array1D[j];
82 | break;
83 | case FORMATSPEC_CONT:
84 | vec[j].f = (float) in_array1D[j];
85 | break;
86 | default:
87 | printf("An unhandled isc format %i for value %f\n",data_format->get_isc_format()->nth(j)->type(), in_array1D[j]);
88 | }
89 | }
90 | }
91 |
92 | void _DataObject::add1DArray(double* in_array1D, int num_of_columns) {
93 | add2DArray(in_array1D, 1, num_of_columns);
94 | }
95 |
96 |
97 | int _DataObject::size() {
98 | return isc_data_obj->size();
99 | }
100 |
101 |
102 | int _DataObject::length() {
103 | return isc_data_obj->length();
104 | }
105 |
106 | Format* _DataObject::getFormat() {
107 | return data_format;
108 | }
109 |
110 | void pyisc::_DataObject::_as1DArray(double* out_1DArray, int num_of_elements) {
111 | int num_of_rows = isc_data_obj->size();
112 | int num_of_columns = isc_data_obj->length();
113 | if(num_of_elements != num_of_rows*num_of_columns) {
114 | printf("Wrong number of elements");
115 | }
116 |
117 | intfloat* vec;
118 | for (int i = 0; i < num_of_rows; i++) {
119 | vec = (*isc_data_obj)[i];
120 | _convert_to_numpyarray(vec, (out_1DArray+num_of_columns*i), num_of_columns);
121 | }
122 | }
123 |
124 | void pyisc::_DataObject::_convert_to_numpyarray(intfloat* vec, double* out_1DArray, int num_of_elements) {
125 | for (int j = 0; j < num_of_elements; j++) {
126 | switch(data_format->get_isc_format()->nth(j)->type()) {
127 | case FORMATSPEC_DISCR:
128 | case FORMATSPEC_SYMBOL:
129 | case FORMATSPEC_BINARY:
130 | case FORMATSPEC_UNKNOWN:
131 | case FormatSpecDatetimeType:
132 | out_1DArray[j] = (double) vec[j].i;
133 | break;
134 | case FORMATSPEC_CONT:
135 | out_1DArray[j] = (double) vec[j].f;
136 | break;
137 | default:
138 | printf("An unhandled isc format %i for value %i or %f\n",data_format->get_isc_format()->nth(j)->type(), vec[j].i, vec[j].f);
139 | }
140 | }
141 | }
142 |
143 | ::DataObject* _DataObject::get_isc_data_object() {
144 | return isc_data_obj;
145 | }
146 |
147 | void _DataObject::set_column_values(int column_index, double* in_array1D, int num_of_columns) {
148 | if(isc_data_obj->size() != num_of_columns) {
149 | printf("Array is not of same size as column array");
150 | return;
151 | }
152 | for(int index=0; index < isc_data_obj->size(); index++) {
153 | switch(data_format->get_isc_format()->nth(column_index)->type()) {
154 | case FORMATSPEC_DISCR:
155 | case FORMATSPEC_SYMBOL:
156 | case FORMATSPEC_BINARY:
157 | case FORMATSPEC_UNKNOWN:
158 | case FormatSpecDatetimeType:
159 | (*isc_data_obj)[index][column_index].i = (int) in_array1D[index];
160 | break;
161 | case FORMATSPEC_CONT:
162 | (*isc_data_obj)[index][column_index].f = (float) in_array1D[index];
163 | break;
164 | default:
165 | printf("An unhandled isc format %i \n",data_format->get_isc_format()->nth(column_index)->type());
166 | }
167 | }
168 | }
169 |
170 | } /* namespace pyisc */
171 |
172 | void pyisc::_DataObject::_getRow(int row_index, double* out_1DArray,
173 | int num_of_elements) {
174 | int num_of_columns = length();
175 | if(num_of_elements != num_of_columns) {
176 | printf("Wrong number of elements specified");
177 | }
178 | intfloat* vec = (*isc_data_obj)[row_index];
179 | _convert_to_numpyarray(vec, out_1DArray, num_of_columns);
180 | }
181 |
182 | intfloat* pyisc::_DataObject::_get_intfloat(int index) {
183 | return (*isc_data_obj)[index];
184 | }
185 |
186 |
187 |
--------------------------------------------------------------------------------
/src/_DataObject.hh:
--------------------------------------------------------------------------------
1 | /*
2 | --------------------------------------------------------------------------
3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB
4 |
5 | Main author: Tomas Olsson
6 |
7 | This code is free software: you can redistribute it and/or modify it
8 | under the terms of the GNU Lesser General Public License as published
9 | by the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This code is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 |
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with this code. If not, see .
19 | --------------------------------------------------------------------------
20 | */
21 |
22 | #ifndef DATAOBJECT_HH_
23 | #define DATAOBJECT_HH_
24 |
25 | #include
26 | #include
27 | #include "_Format.hh"
28 |
29 | namespace pyisc {
30 |
31 | //extern double* _to_cpp_array(double* in_array1D, int num_of_columns);
32 | class _DataObject {
33 | int is_data_obj_created = 0;
34 | int is_data_format_created = 0;
35 |
36 | protected:
37 | pyisc::Format* data_format;
38 | ::DataObject *isc_data_obj;
39 |
40 | public:
41 | /**
42 | * Create an empty DataObject with a Format that specifies the data types of the columns in a row
43 | */
44 | _DataObject(pyisc::Format *f);
45 | /**
46 | * Create an DataObject for the double array with a Format that specifies the data types of the columns in a row
47 | */
48 | _DataObject(pyisc::Format *format, double* in_array2D, int num_of_rows, int num_of_columns);
49 | virtual ~_DataObject();
50 |
51 | /**
52 | * Read isc original data object from file
53 | */
54 | _DataObject(const char* formatfile, const char* datafile = 0);
55 |
56 | /**
57 | * Convert isc original data object to pyisc
58 | */
59 | _DataObject(::DataObject* data_object0);
60 | /**
61 | * Add a 1D numpy array as a row to the data object
62 | */
63 | virtual void add1DArray(double* in_array1D, int num_of_columns);
64 |
65 |
66 | /**
67 | * Add a 2D numpy array to the data object
68 | */
69 | virtual void add2DArray(double* in_array2D, int num_of_rows, int num_of_columns);
70 |
71 | /**
72 | * Returns number of rows.
73 | */
74 | virtual int size();
75 |
76 | /**
77 | * Returns number of columns.
78 | */
79 | virtual int length();
80 |
81 | virtual Format* getFormat();
82 |
83 | /**
84 | * Returns a 1D array representation of the data rows*cols.
85 | */
86 | virtual void _as1DArray(double* out_1DArray, int num_of_elements);
87 |
88 | /**
89 | * Returns a single array at the given row.
90 | */
91 | virtual void _getRow(int row_index, double* out_1DArray, int num_of_elements);
92 |
93 | virtual ::DataObject* get_isc_data_object();
94 |
95 | /**
96 | * Takes an numpy array from swig and convert it to a provided intfloat pointer
97 | */
98 | virtual void _convert_to_intfloat(double* in_array1D, int num_of_columns, intfloat* vec);
99 | /**
100 | * Takes an intfloat pointer from swig and convert it to a provided numpy array.
101 | */
102 | virtual void _convert_to_numpyarray(intfloat* vec, double* ARGOUT_ARRAY1, int DIM1);
103 |
104 | virtual intfloat* _get_intfloat(int index);
105 |
106 | /**
107 | * Takes a numpy array and sets it values as the given column values.
108 | */
109 | virtual void set_column_values(int column_index, double* in_array1D, int num_of_columns);
110 |
111 | protected:
112 | void init(pyisc::Format* format);
113 |
114 | };
115 |
116 | }
117 |
118 |
119 |
120 | #endif /* DATAOBJECT_HH_ */
121 |
--------------------------------------------------------------------------------
/src/_Format.cc:
--------------------------------------------------------------------------------
1 | /*
2 | --------------------------------------------------------------------------
3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB
4 |
5 | Main author: Tomas Olsson
6 |
7 | This code is free software: you can redistribute it and/or modify it
8 | under the terms of the GNU Lesser General Public License as published
9 | by the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This code is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 |
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with this code. If not, see .
19 | --------------------------------------------------------------------------
20 | */
21 | #include "_Format.hh"
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 |
28 | namespace pyisc {
29 |
30 | Format::Format() {
31 | isc_format = new ::Format();
32 | is_format_created = 1;
33 | if(DEBUG)
34 | printf("Create isc format\n");
35 |
36 | }
37 |
38 | Format::Format(::Format *isc_format0) {
39 | isc_format = isc_format0;
40 |
41 | }
42 |
43 | Format::~Format() {
44 | if(is_format_created && isc_format) {
45 | if(DEBUG)
46 | printf("Delete isc format\n");
47 | delete isc_format;
48 | isc_format=0;
49 | }
50 | }
51 |
52 | void Format::addColumn(const char* name, ColumnType type) {
53 | switch(type) {
54 | case Continuous:
55 | isc_format->add(new ::FormatSpecCont(name));
56 | break;
57 | case Discrete:
58 | isc_format->add(new ::FormatSpecDiscr(name));
59 | break;
60 | case TIME:
61 | isc_format->add(new ::FormatSpecDatetime(name));
62 | break;
63 | case Symbol:
64 | isc_format->add(new ::FormatSpecSymbol(name));
65 | break;
66 | default:
67 | printf("Unknown column type %i", type);
68 | };
69 | }
70 |
71 | void Format::add(FormatSpec* format_spec) {
72 | isc_format->add(format_spec->_isc_format->copy());
73 | }
74 |
75 |
76 | } /* namespace pyisc */
77 |
78 | ::Format* pyisc::Format::get_isc_format() {
79 | return isc_format;
80 | }
81 |
82 | void pyisc::Format::printColumnNames() {
83 | printf("Column names:\n");
84 | for(int j=0; j < size(); j++) {
85 | printf(" %s Type %i\n",isc_format->nth(j)->name, isc_format->nth(j)->type());
86 | }
87 | }
88 |
89 | int pyisc::Format::size() {
90 | return isc_format->length();
91 | }
92 |
93 |
--------------------------------------------------------------------------------
/src/_Format.hh:
--------------------------------------------------------------------------------
1 | /*
2 | --------------------------------------------------------------------------
3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB
4 |
5 | Main author: Tomas Olsson
6 |
7 | This code is free software: you can redistribute it and/or modify it
8 | under the terms of the GNU Lesser General Public License as published
9 | by the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This code is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU Lesser General Public License for more details.
16 |
17 | You should have received a copy of the GNU Lesser General Public License
18 | along with this code. If not, see .
19 | --------------------------------------------------------------------------
20 | */
21 |
22 |
23 | #ifndef FORMAT_HH_
24 | #define FORMAT_HH_
25 |
26 | #include
27 | #include
28 |
29 | #ifndef DEBUG
30 | #define DEBUG 0
31 | #endif
32 |
33 | namespace pyisc {
34 | class FormatSpec{
35 |
36 | public:
37 | FormatSpec(::FormatSpec *isc_format_spec) {_isc_format = isc_format_spec;};
38 | const char* get_name() {return _isc_format->name;};
39 | const char* represent(int v) { return _isc_format->represent(intfloat(v)); };
40 | const char* represent(float v) { return _isc_format->represent(intfloat(v)); };
41 | int getnum() { return _isc_format->getnum(); };
42 | void add(const char* str) {_isc_format->add(str);};
43 | ::FormatSpec *_isc_format;
44 | };
45 |
46 | class Format {
47 | int is_format_created = 0;
48 |
49 | protected:
50 | ::Format* isc_format;
51 |
52 | public:
53 | enum ColumnType {
54 | Discrete,
55 | Continuous,
56 | Symbol,
57 | TIME
58 | };
59 |
60 | Format();
61 | /**
62 | * Convert isc orginial format to pyisc format
63 | */
64 | Format(::Format*);
65 | virtual ~Format();
66 |
67 | /**
68 | * Add a new column to the format with name and type.
69 | */
70 | virtual void addColumn(const char *name, ColumnType type);
71 |
72 | /**
73 | * TODO memory leak!
74 | */
75 | virtual FormatSpec* get_nth_column(int n) {return new FormatSpec(isc_format->nth(n));};
76 | virtual FormatSpec* nth(int n) {return get_nth_column(n);};
77 | virtual void add(FormatSpec*);
78 | virtual int size();
79 |
80 | virtual void printColumnNames();
81 |
82 | virtual ::Format* get_isc_format();
83 | };
84 | }
85 |
86 |
87 |
88 | #endif /* FORMAT_HH_ */
89 |
--------------------------------------------------------------------------------
/src/_JSonExporter.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * _JSonExporter.cc
3 | *
4 | * Created on: Feb 9, 2018
5 | * Author: tol
6 | */
7 |
8 | #include "_JSonExporter.hh"
9 |
10 |
11 | namespace pyisc {
12 |
13 |
14 | void _JSonExporter::notImplemented(){
15 | root[std::string("EXPORT_NOT_IMPLEMENTED")] = true;
16 | }
17 |
18 | void _JSonExporter::addParameter(const char* parameter_name, const char* value){
19 | root[std::string(parameter_name)] = std::string(value);
20 | }
21 |
22 | void _JSonExporter::addParameter(const char* parameter_name, int value){
23 | root[std::string(parameter_name)] = value;
24 | }
25 | void _JSonExporter::addParameter(const char* parameter_name, float value){
26 | root[std::string(parameter_name)] = value;
27 | }
28 | void _JSonExporter::addParameter(const char* parameter_name, double value){
29 | root[std::string(parameter_name)] = value;
30 | }
31 | void _JSonExporter::addParameter(const char* parameter_name, int *values, int length){
32 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name));
33 | for(int i=0; i < length;i++) {
34 | array.add(values[i]);
35 | }
36 | }
37 | void _JSonExporter::addParameter(const char* parameter_name, float *values, int length){
38 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name));
39 | for(int i=0; i < length;i++) {
40 | array.add(values[i]);
41 | }
42 | }
43 | void _JSonExporter::addParameter(const char* parameter_name, double *values, int length){
44 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name));
45 | for(int i=0; i < length;i++) {
46 | array.add(values[i]);
47 | }
48 | }
49 |
50 | IscAbstractModelExporter* _JSonExporter::createModelExporter(const char * parameter_name) {
51 | return ( IscAbstractModelExporter*) new _JSonExporter(root.createNestedObject(std::string(parameter_name)));
52 | }
53 | IscAbstractModelExporter* _JSonExporter::createModelExporter(int parameter_id){
54 | return ( IscAbstractModelExporter*) new _JSonExporter(root.createNestedObject(to_string(parameter_id)));
55 | }
56 |
57 | std::string _JSonExporter::getJSonString() {
58 | std::string str;
59 | root.prettyPrintTo(str);
60 | return str;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/_JSonExporter.hh:
--------------------------------------------------------------------------------
1 | /*
2 | * _JSonExporter.hh
3 | *
4 | * Created on: Feb 9, 2018
5 | * Author: tol
6 | */
7 |
8 | #ifndef JSONEXPORTER_HH_
9 | #define JSONEXPORTER_HH_
10 |
11 | #include "isc_exportimport.hh"
12 | #include "ArduinoJson.hpp"
13 | #include "mystring.hh"
14 |
15 |
16 | namespace pyisc {
17 |
18 |
19 | // for convenience
20 |
21 | class _JSonExporter : ::IscAbstractModelExporter {
22 | public:
23 | _JSonExporter():root(jsonBuffer.createObject()){};
24 | virtual ~_JSonExporter(){};
25 |
26 | virtual void notImplemented();
27 |
28 | virtual void addParameter(const char* parameter_name, const char* value);
29 | virtual void addParameter(const char* parameter_name, int value);
30 | virtual void addParameter(const char* parameter_name, float value);
31 | virtual void addParameter(const char* parameter_name, double value);
32 | virtual void addParameter(const char* parameter_name, int *value, int length);
33 | virtual void addParameter(const char* parameter_name, float *value, int length);
34 | virtual void addParameter(const char* parameter_name, double *value, int length);
35 |
36 | virtual IscAbstractModelExporter* createModelExporter(const char * parameter_name);
37 | virtual IscAbstractModelExporter* createModelExporter(int parameter_id);
38 |
39 | virtual std::string getJSonString();
40 |
41 | protected:
42 | _JSonExporter(ArduinoJson::JsonObject& root):root(root){
43 | };
44 |
45 | private:
46 | ArduinoJson::DynamicJsonBuffer jsonBuffer;
47 | ArduinoJson::JsonObject& root;
48 | };
49 |
50 | }
51 |
52 |
53 | #endif /* JSONEXPORTER_HH_ */
54 |
55 |
--------------------------------------------------------------------------------
/src/_JSonImporter.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * _JSonImporter.cc
3 | *
4 | * Created on: 19 Feb 2018
5 | * Author: tol
6 | */
7 |
8 |
9 | #include "_JSonImporter.hh"
10 |
11 | namespace pyisc {
12 |
13 | void _JSonImporter::notImplemented(){
14 | printf("JSon importer not implemented\n");
15 | }
16 |
17 | void _JSonImporter::fillParameter(const char* parameter_name, int& value){
18 | if(DEBUG)
19 | printf("Import %s as int:%i", parameter_name, value);
20 | value = (*root)[std::string(parameter_name)];
21 | }
22 | void _JSonImporter::fillParameter(const char* parameter_name, float& value){
23 | if(DEBUG)
24 | printf("Import %s as float:%f", parameter_name, value);
25 | value = (*root)[std::string(parameter_name)];
26 | }
27 | void _JSonImporter::fillParameter(const char* parameter_name, double& value){
28 | if(DEBUG)
29 | printf("Import %s as double:%d", parameter_name, value);
30 | value = (*root)[std::string(parameter_name)];
31 | }
32 | void _JSonImporter::fillParameter(const char* parameter_name, int *values, int length){
33 | if(DEBUG)
34 | printf("Import %s as int array", parameter_name);
35 |
36 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)];
37 | for(int i=0; i < length;i++) {
38 | values[i] = array[i];
39 | }
40 | }
41 | void _JSonImporter::fillParameter(const char* parameter_name, float *values, int length){
42 | if(DEBUG)
43 | printf("Import %s as float array", parameter_name);
44 |
45 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)];
46 | for(int i=0; i < length;i++) {
47 | values[i] = array[i];
48 | }
49 | }
50 | void _JSonImporter::fillParameter(const char* parameter_name, double *values, int length){
51 | if(DEBUG)
52 | printf("Import %s as double array", parameter_name);
53 |
54 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)];
55 | for(int i=0; i < length;i++) {
56 | values[i] = array[i];
57 | }
58 | }
59 |
60 | IscAbstractModelImporter* _JSonImporter::getModelImporter(const char * parameter_name) {
61 | if(DEBUG)
62 | printf("Import %s as json object", parameter_name);
63 |
64 | ArduinoJson::JsonObject& object = (*root)[std::string(parameter_name)];
65 |
66 |
67 | return ( IscAbstractModelImporter*) new _JSonImporter(&object);
68 | }
69 | IscAbstractModelImporter* _JSonImporter::getModelImporter(int parameter_id){
70 | if(DEBUG)
71 | printf("Import %i as json object", parameter_id);
72 |
73 | ArduinoJson::JsonObject& object = (*root)[to_string(parameter_id)];
74 |
75 |
76 | return ( IscAbstractModelImporter*) new _JSonImporter(&object);
77 | }
78 |
79 | } // namespace pyisc
80 |
81 |
--------------------------------------------------------------------------------
/src/_JSonImporter.hh:
--------------------------------------------------------------------------------
1 | /*
2 | * _JSonImporter.hh
3 | *
4 | * Created on: 19 Feb 2018
5 | * Author: tol
6 | */
7 |
8 | #ifndef SRC__JSONIMPORTER_HH_
9 | #define SRC__JSONIMPORTER_HH_
10 |
11 | #include "isc_exportimport.hh"
12 | #include "ArduinoJson.hpp"
13 | #include "mystring.hh"
14 |
15 | #ifndef DEBUG
16 | #define DEBUG 0
17 | #endif
18 |
19 | namespace pyisc {
20 |
21 | class _JSonImporter : IscAbstractModelImporter {
22 | public:
23 | _JSonImporter(){
24 | };
25 | virtual ~_JSonImporter(){};
26 | virtual void notImplemented();
27 |
28 | // Methods that sets the values to the provided data structure
29 | virtual void fillParameter(const char* parameter_name, int &value);
30 | virtual void fillParameter(const char* parameter_name, float &value);
31 | virtual void fillParameter(const char* parameter_name, double &value);
32 |
33 | virtual void fillParameter(const char* parameter_name, int *value, int length);
34 | virtual void fillParameter(const char* parameter_name, float *value, int length);
35 | virtual void fillParameter(const char* parameter_name, double *value, int length);
36 | virtual IscAbstractModelImporter* getModelImporter(const char * parameter_name);
37 | virtual IscAbstractModelImporter* getModelImporter(int parameter_id);
38 |
39 | // Return True if succeeds
40 | bool parseJSon(std::string json) {
41 | root = &jsonBuffer.parseObject(json);
42 | return root->success();
43 | }
44 | protected:
45 | _JSonImporter(ArduinoJson::JsonObject* root):root(root){
46 | };
47 |
48 | private:
49 | ArduinoJson::DynamicJsonBuffer jsonBuffer;
50 | ArduinoJson::JsonObject* root;
51 |
52 | };
53 |
54 | } // namespace pyisc
55 |
56 |
57 |
58 |
59 |
60 | #endif /* SRC__JSONIMPORTER_HH_ */
61 |
--------------------------------------------------------------------------------
/src/mystring.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * mystring.cc
3 | *
4 | * Created on: 23 Feb 2018
5 | * Author: tol
6 | */
7 | #include "mystring.hh"
8 |
9 | std::string to_string(int i) {
10 | std::ostringstream stm ;
11 | stm << i;
12 | return stm.str();
13 | }
14 |
--------------------------------------------------------------------------------
/src/mystring.hh:
--------------------------------------------------------------------------------
1 | /*
2 | * mystring.hh
3 | *
4 | * Created on: 23 Feb 2018
5 | * Author: tol
6 | */
7 |
8 | #ifndef SRC_MYSTRING_HH_
9 | #define SRC_MYSTRING_HH_
10 |
11 | #include
12 | #include
13 |
14 | // Replace the std::to_string
15 | std::string to_string(int i);
16 |
17 | #endif /* SRC_MYSTRING_HH_ */
18 |
--------------------------------------------------------------------------------
/unittests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/unittests/__init__.py
--------------------------------------------------------------------------------
/unittests/test_AnomalyDetector.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import pyisc
3 | import numpy as np
4 | class MyTestCase(unittest.TestCase):
5 | def test_something(self):
6 |
7 | X = np.array([0.1, -0.1, 0.05, -0.01, 0.0, 0.11]).reshape((-1,1))
8 |
9 | try:
10 | ad = pyisc.AnomalyDetector(pyisc.P_Gaussian(1)).fit(X)
11 |
12 | self.assertFalse(True, "the probability model use column index that is larger than the data's max column index")
13 | except AssertionError:
14 | pass # OK
15 |
16 | ad = pyisc.AnomalyDetector(pyisc.P_Gaussian(0)).fit(X)
17 |
18 |
19 | self.assertTrue(np.array_equal(ad.anomaly_score(X), ad.anomaly_score(X)))
20 |
21 |
22 |
23 | if __name__ == '__main__':
24 | unittest.main()
25 |
--------------------------------------------------------------------------------
/unittests/test_DataObject.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from pyisc import DataObject
4 | from numpy import array, c_,unique
5 | from scipy.stats import norm
6 | from numpy.testing.utils import assert_allclose, assert_equal
7 |
8 | class test_DataObject(unittest.TestCase):
9 | def test_dataobject_set_column_values(self):
10 | X = array([norm(1.0).rvs(10) for _ in range(1000)])
11 | y = [None] * 1000
12 |
13 | DO = DataObject(c_[X,y], class_column=len(X[0]))
14 | assert_equal(len(X[0]), DO.class_column)
15 | assert_equal(unique(y), DO.classes_)
16 |
17 | classes=[None] + ['1', '2', '3', '4', '5']
18 | DO = DataObject(c_[X,y], class_column=len(X[0]), classes=classes)
19 | assert_equal(len(X[0]), DO.class_column)
20 | assert_equal(classes, DO.classes_)
21 |
22 | X2 = DO.as_2d_array()
23 | assert_allclose(X2.T[:-1].T.astype(float), X)
24 | assert_equal(X2.T[-1],y)
25 |
26 | new_y = ["%i"%(divmod(i,5)[1]+1) for i in range(len(X))]
27 | DO.set_column_values(len(X[0]), new_y)
28 |
29 | assert_equal(len(X[0]), DO.class_column)
30 | assert_equal([None]+list(unique(new_y)), DO.classes_)
31 |
32 | X2 = DO.as_2d_array()
33 | assert_allclose(X2.T[:-1].T.astype(float), X)
34 | assert_equal(X2.T[-1], new_y)
35 |
36 |
37 |
38 | if __name__ == '__main__':
39 | unittest.main()
40 |
--------------------------------------------------------------------------------
/unittests/test_JSonExportImport.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import pyisc;
4 | import numpy as np
5 | from scipy.stats import norm
6 | from numpy.testing.utils import assert_allclose
7 |
8 |
9 | class MyTestCase(unittest.TestCase):
10 | def test_multivariate_gaussian(self):
11 | po_normal = norm(1.1, 5)
12 | po_anomaly = norm(1.5, 7)
13 |
14 | po_normal2 = norm(2.2, 10)
15 | po_anomaly2 = norm(3, 12)
16 |
17 | gs_normal = norm(1, 12)
18 | gs_anomaly = norm(2, 30)
19 |
20 | normal_len = 100
21 | anomaly_len = 15
22 |
23 | data = np.column_stack(
24 | [
25 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)),
26 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)),
27 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)),
28 | ]
29 | )
30 |
31 | anomaly_detector = pyisc.AnomalyDetector(
32 | component_models=[
33 | pyisc.P_Gaussian(0), # columns 1 and 0
34 | pyisc.P_Gaussian(1), # columns 2 and 0
35 | pyisc.P_Gaussian(2) # column 3
36 | ],
37 | output_combination_rule=pyisc.cr_max
38 | )
39 |
40 | anomaly_detector.fit(data);
41 |
42 | json = anomaly_detector.exportJSon()
43 |
44 | print(json)
45 |
46 | anomaly_detector2 = pyisc.AnomalyDetector(
47 | component_models=[
48 | pyisc.P_Gaussian(0), # columns 1 and 0
49 | pyisc.P_Gaussian(1), # columns 2 and 0
50 | pyisc.P_Gaussian(2) # column 3
51 | ],
52 | output_combination_rule=pyisc.cr_max
53 | )
54 |
55 | anomaly_detector2.importJSon(json)
56 |
57 | json2 = anomaly_detector2.exportJSon()
58 |
59 | print(json2)
60 |
61 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data))
62 | self.assertEqual(json, json2)
63 |
64 |
65 |
66 | def test_conditional_gaussian(self):
67 | po_normal = norm(1.1, 5)
68 | po_anomaly = norm(1.5, 7)
69 |
70 | po_normal2 = norm(2.2, 10)
71 | po_anomaly2 = norm(3, 12)
72 |
73 | gs_normal = norm(1, 12)
74 | gs_anomaly = norm(2, 30)
75 |
76 | normal_len = 100
77 | anomaly_len = 15
78 |
79 | data = np.column_stack(
80 | [
81 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)),
82 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)),
83 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)),
84 | ]
85 | )
86 |
87 | anomaly_detector = pyisc.AnomalyDetector(
88 | component_models=[
89 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])])
90 | ],
91 | output_combination_rule=pyisc.cr_max
92 | )
93 |
94 | anomaly_detector.fit(data);
95 |
96 | json = anomaly_detector.exportJSon()
97 |
98 | print(json)
99 |
100 | anomaly_detector2 = pyisc.AnomalyDetector(
101 | component_models=[
102 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])])
103 | ],
104 | output_combination_rule=pyisc.cr_max
105 | )
106 |
107 | anomaly_detector2.importJSon(json)
108 |
109 | json2 = anomaly_detector2.exportJSon()
110 |
111 | print(json2)
112 |
113 | self.assertEqual(json, json2)
114 |
115 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data))
116 | if __name__ == '__main__':
117 | unittest.main()
118 |
--------------------------------------------------------------------------------
/unittests/test_SklearnOutlierDetection.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from random import seed
3 |
4 | from scipy import stats
5 | import numpy as np
6 | import pyisc
7 |
8 | class test_SklearnOutlierDetection(unittest.TestCase):
9 | def test_outlier_detection(self):
10 | print("Start of test")
11 | n_samples = 1000
12 | norm_dist = stats.norm(0, 1)
13 |
14 | truth = np.ones((n_samples,))
15 | truth[-100:] = -1
16 |
17 | X0 = norm_dist.rvs(n_samples)
18 | X = np.c_[X0*5, X0+norm_dist.rvs(n_samples)*2]
19 |
20 | uniform_dist = stats.uniform(-10,10)
21 |
22 | X[-100:] = np.c_[uniform_dist.rvs(100),uniform_dist.rvs(100)]
23 |
24 | outlier_detector = pyisc.SklearnOutlierDetector(
25 | 100.0/n_samples,
26 | pyisc.P_Gaussian([0,1])
27 | )
28 |
29 | outlier_detector.fit(X, np.array([1]*len(X)))
30 |
31 |
32 | self.assertLess(outlier_detector.threshold_, 0.35)
33 | self.assertGreater(outlier_detector.threshold_, 0.25)
34 |
35 | predictions = outlier_detector.predict(X, np.array([1]*len(X)))
36 |
37 | accuracy = sum(truth == predictions)/float(n_samples)
38 |
39 | print("accuracy", accuracy)
40 | self.assertGreater(accuracy, 0.85)
41 |
42 |
43 |
44 |
45 |
46 | if __name__ == '__main__':
47 | unittest.main()
48 |
--------------------------------------------------------------------------------
/unittests/test_max_index_problem.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import pyisc;
4 | import numpy as np
5 | from scipy.stats import norm
6 | from numpy.testing.utils import assert_allclose
7 | import pickle
8 |
9 | class MyTestCase(unittest.TestCase):
10 | def test_multivariate_gaussian(self):
11 | from scipy.stats import poisson, norm
12 |
13 | po_normal = poisson(10)
14 | po_anomaly = poisson(25)
15 |
16 | po_normal2 = poisson(2)
17 | po_anomaly2 = poisson(3)
18 |
19 | gs_normal = norm(1, 12)
20 | gs_anomaly = norm(2, 30)
21 |
22 | normal_len = 10000
23 | anomaly_len = 15
24 |
25 | data = np.column_stack(
26 | [
27 | [1] * (normal_len + anomaly_len),
28 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)),
29 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)),
30 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)),
31 | ]
32 | )
33 | anomaly_detector = pyisc.AnomalyDetector(
34 | component_models=[
35 | pyisc.P_PoissonOnesided(1, 0), # columns 1 and 0
36 | pyisc.P_Poisson(2, 0), # columns 2 and 0
37 | pyisc.P_Gaussian(3) # column 3
38 | ],
39 | output_combination_rule=pyisc.cr_max
40 | )
41 |
42 | anomaly_detector.fit(data);
43 | # This above should fail this test if the problem still occurs:
44 | '''
45 | ---------------------------------------------------------------------------
46 | AssertionError Traceback (most recent call last)
47 | in ()
48 | ----> 1 anomaly_detector.fit(data);
49 |
50 | C:\ProgramData\Anaconda3\envs\pyISC_py27\lib\site-packages\_pyisc_modules\BaseISC.pyc in fit(self, X, y)
51 | 313
52 | 314
53 | --> 315 return self._fit(X,y)
54 | 316
55 | 317 def _fit(self,X,y=None):
56 |
57 | C:\ProgramData\Anaconda3\envs\pyISC_py27\lib\site-packages\_pyisc_modules\BaseISC.pyc in _fit(self, X, y)
58 | 352
59 | 353 if data_object is not None:
60 | --> 354 assert self._max_index < data_object.length() # ensure that data distribution has not to large index into the data
61 | 355
62 | 356 return self._fit(data_object)
63 |
64 | AssertionError:
65 | '''
66 |
67 | assert True;
68 |
69 |
70 |
71 | if __name__ == '__main__':
72 | unittest.main()
73 |
--------------------------------------------------------------------------------
/unittests/test_p_ConditionalGaussian.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest import TestCase
3 |
4 | from numpy.lib.index_tricks import c_
5 | from numpy.ma.extras import vstack
6 | from numpy.testing.utils import assert_allclose, assert_equal
7 | from scipy.stats.stats import pearsonr
8 |
9 | from pyisc import AnomalyDetector, P_Gaussian, P_ConditionalGaussian, P_ConditionalGaussianCombiner, cr_plus
10 | from scipy.stats import norm
11 | from numpy import array
12 |
13 | class TestPConditionalGaussian(TestCase):
14 |
15 | def test_conditional_gaussian(self):
16 | x = array([[x0] for x0 in norm(0,1).rvs(1000)])
17 |
18 | gauss_scores = AnomalyDetector(P_Gaussian(0)).fit(x).anomaly_score(x)
19 | condgauss_scores = \
20 | AnomalyDetector(P_ConditionalGaussian([0], [])). \
21 | fit(x). \
22 | anomaly_score(x)
23 |
24 | assert_allclose(gauss_scores, condgauss_scores,atol=0.01,rtol=0.01)
25 |
26 |
27 | X = array([[x0, x1] for x0,x1 in zip(norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)) ])
28 |
29 | gauss_scores_X = AnomalyDetector(P_Gaussian([0])).fit(X).anomaly_score(X)
30 | condgauss_scores_X = \
31 | AnomalyDetector(P_ConditionalGaussian([0],[1])). \
32 | fit(X). \
33 | anomaly_score(X)
34 |
35 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=0.3)
36 |
37 |
38 | X = array([[x0, x0+0.1*x1] for x0,x1 in zip(norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)) ])
39 |
40 |
41 | # This is not equal at all
42 | gauss_scores_X = AnomalyDetector(P_Gaussian([0,1])).fit(X).anomaly_score(X)
43 | condgauss_scores_X = \
44 | AnomalyDetector(P_ConditionalGaussian([0,1],[])). \
45 | fit(X). \
46 | anomaly_score(X)
47 |
48 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.994), True)
49 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=2) # Very bad
50 |
51 |
52 | X = array([[x0, x0 + 0.1 * x1, x2] for x0, x1, x2 in c_[norm(0, 1).rvs(1000), norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)]])
53 |
54 | # This is not equal at all
55 | gauss_scores_X = AnomalyDetector(P_Gaussian([0, 1])).fit(X).anomaly_score(X)
56 | condgauss_scores_X = \
57 | AnomalyDetector(P_ConditionalGaussian([0, 1], [])). \
58 | fit(X). \
59 | anomaly_score(X)
60 |
61 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.994), True)
62 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=2) # Very bad
63 |
64 |
65 | X = array(
66 | [[x0, x0 + 0.1 * x1, x2] for x0, x1, x2 in c_[norm(0, 1).rvs(1000), norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)]])
67 |
68 | # This is not equal at all
69 | gauss_scores_X = AnomalyDetector(P_Gaussian([0, 1,2])).fit(X).anomaly_score(X)
70 | condgauss_scores_X = \
71 | AnomalyDetector(
72 | P_ConditionalGaussianCombiner([
73 | P_ConditionalGaussian([0], [1,2]),
74 | P_ConditionalGaussian([1], [2]),
75 | P_ConditionalGaussian([2], []),
76 | ])). \
77 | fit(X). \
78 | anomaly_score(X)
79 |
80 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.98), True)
81 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=5) # Very bad
82 |
83 |
84 | # This is very much equal
85 | gauss_scores_X = AnomalyDetector(P_ConditionalGaussian([0, 1, 2], [])).fit(X).anomaly_score(X)
86 | condgauss_scores_X = \
87 | AnomalyDetector(
88 | P_ConditionalGaussianCombiner([
89 | P_ConditionalGaussian([0], [1, 2]),
90 | P_ConditionalGaussian([1], [2]),
91 | P_ConditionalGaussian([2], []),
92 | ])). \
93 | fit(X). \
94 | anomaly_score(X)
95 |
96 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=0.001)
97 |
98 |
99 | # If we combine them using a ordinary combination rule by adding anomaly score together
100 | condgauss_scores_X2 = \
101 | AnomalyDetector(
102 | [
103 | P_ConditionalGaussian([0], [1, 2]),
104 | P_ConditionalGaussian([1], [2]),
105 | P_ConditionalGaussian([2], []),
106 | ], cr_plus). \
107 | fit(X). \
108 | anomaly_score(X)
109 |
110 |
111 | assert_equal((pearsonr(condgauss_scores_X, condgauss_scores_X2) > 0.99), True) # Good
112 |
113 | assert_allclose(condgauss_scores_X2, condgauss_scores_X, atol=2) # Bad
114 |
115 |
116 | #
117 | ad1 = AnomalyDetector(
118 | [P_Gaussian([i]) for i in range(len(X[0]))],
119 | cr_plus
120 | ).fit(X)
121 | s1 = ad1.anomaly_score(X)
122 |
123 | ad2 = AnomalyDetector(
124 | [P_ConditionalGaussian([i], []) for i in range(len(X[0]))],
125 | cr_plus
126 | ).fit(X)
127 | s2 = ad2.anomaly_score(X)
128 |
129 | print("r:", pearsonr(s1,s2))
130 |
131 | assert_allclose(s1, s2, rtol=0.01) # OK
132 |
133 | if __name__ == '__main__':
134 | unittest.main()
--------------------------------------------------------------------------------
/unittests/test_p_ConditionalGaussianDependencyMatrix.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest import TestCase
3 |
4 | from numpy import array,r_
5 |
6 | from numpy.ma.testutils import assert_close
7 | from numpy.testing.utils import assert_allclose, assert_equal
8 | from scipy.stats import norm
9 | from scipy.stats.stats import pearsonr
10 | from sklearn.utils import shuffle
11 |
12 | from pyisc import AnomalyDetector, \
13 | P_ConditionalGaussianDependencyMatrix, \
14 | P_ConditionalGaussianCombiner, \
15 | P_ConditionalGaussian, \
16 | P_Gaussian, cr_plus, cr_max, \
17 | SklearnClassifier, SklearnClusterer
18 |
19 | import pylab as plt
20 |
21 | norm_dist = norm(0, 1)
22 |
23 |
24 | def sample_markov_chain(length, noise=0.1):
25 | global norm_dist
26 | sample = []
27 | sample.append(norm_dist.rvs(1)[0])
28 | for i in range(1,length):
29 | sample.append(norm_dist.rvs(1)[0]*noise+sample[i-1])
30 |
31 | return sample
32 |
33 | class TestPConditionalGaussianDependencyMatrix(TestCase):
34 |
35 |
36 | def test_conditional_gaussian_dependency_matrix(self):
37 | length = 100
38 | n_samples = 1000
39 | X = array([sample_markov_chain(length) for _ in range(n_samples)])
40 |
41 |
42 | # Next two should be equal
43 | s0 = AnomalyDetector(
44 | P_ConditionalGaussianDependencyMatrix(list(range(length)),length)
45 | ).fit(X).anomaly_score(X)
46 |
47 | ad1=AnomalyDetector(
48 | P_ConditionalGaussianCombiner([P_ConditionalGaussian([i + 1], [i]) for i in range(length - 1)]+[P_ConditionalGaussian([0], [])]),
49 | cr_plus
50 | ).fit(X)
51 | s1 = ad1.anomaly_score(X)
52 |
53 | assert_allclose(s0, s1, rtol=0.0001) # OK
54 |
55 | # Most likely, these two are not equal but highly correlated
56 | ad2=AnomalyDetector(
57 | [P_ConditionalGaussian([i], []) for i in range(length)],
58 | cr_plus
59 | ).fit(X)
60 | s2 = ad2.anomaly_score(X)
61 |
62 | ad3=AnomalyDetector(
63 | P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], []) for i in range(length)]),
64 | cr_plus
65 | ).fit(X)
66 | s3 = ad3.anomaly_score(X)
67 |
68 | assert_equal(pearsonr(s2,s3)> 0.985, True)
69 |
70 |
71 | # Test classification
72 | Y = array([sample_markov_chain(length,0.2) for _ in range(n_samples)])
73 | Z = array([sample_markov_chain(length,0.3) for _ in range(n_samples)])
74 |
75 |
76 | data = r_[X,Y,Z]
77 | labels = r_[['X']*len(X), ['Y']*len(Y), ['Z']*len(Z)]
78 |
79 | data_index = shuffle(list(range(len(data))))
80 | training_set = data_index[:n_samples*2]
81 | test_set = data_index[n_samples*2:]
82 |
83 | models = {
84 | 'independent gaussian':
85 | AnomalyDetector([P_Gaussian([i]) for i in range(length)],cr_plus),
86 | 'independent conditional gaussian':
87 | AnomalyDetector([P_ConditionalGaussian([i], []) for i in range(length)],cr_plus),
88 | 'independent conditional gaussian with combiner':
89 | AnomalyDetector(P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], []) for i in range(length)])),
90 | 'single conditional gaussian with combiner':
91 | AnomalyDetector(P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], [i-1]) for i in range(1, length)]+
92 | [P_ConditionalGaussian([0], [])])),
93 | 'dependency matrix':
94 | AnomalyDetector(P_ConditionalGaussianDependencyMatrix(list(range(length)),length))
95 | }
96 |
97 | all_acc = {}
98 | for key in models:
99 | ad=models[key].fit(data[training_set], labels[training_set])
100 |
101 | adclf = SklearnClassifier.clf(ad)
102 |
103 | labels_predicted = adclf.predict(data[test_set])
104 | accuracy = sum(labels[test_set]==labels_predicted)/float(len(test_set))
105 | all_acc[key] = accuracy
106 | print(key, "accuracy = ", accuracy)
107 |
108 |
109 | assert_close(all_acc['independent gaussian'],all_acc['independent conditional gaussian'],decimal=2)
110 | assert_close(all_acc['independent gaussian'], all_acc['independent conditional gaussian with combiner'],decimal=2)
111 | assert_close(all_acc['single conditional gaussian with combiner'], all_acc['dependency matrix'],decimal=2)
112 |
113 |
114 | if __name__ == '__main__':
115 | unittest.main()
--------------------------------------------------------------------------------
/unittests/test_pickle_export_import.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import pyisc;
4 | import numpy as np
5 | from scipy.stats import norm
6 | from numpy.testing.utils import assert_allclose
7 | import pickle
8 |
9 | class MyTestCase(unittest.TestCase):
10 | def test_multivariate_gaussian(self):
11 | po_normal = norm(1.1, 5)
12 | po_anomaly = norm(1.5, 7)
13 |
14 | po_normal2 = norm(2.2, 10)
15 | po_anomaly2 = norm(3, 12)
16 |
17 | gs_normal = norm(1, 12)
18 | gs_anomaly = norm(2, 30)
19 |
20 | normal_len = 100
21 | anomaly_len = 15
22 |
23 | data = np.column_stack(
24 | [
25 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)),
26 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)),
27 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)),
28 | ]
29 | )
30 |
31 | anomaly_detector = pyisc.AnomalyDetector(
32 | component_models=[
33 | pyisc.P_Gaussian(0), # columns 1 and 0
34 | pyisc.P_Gaussian(1), # columns 2 and 0
35 | pyisc.P_Gaussian(2) # column 3
36 | ],
37 | output_combination_rule=pyisc.cr_max
38 | )
39 |
40 | anomaly_detector.fit(data);
41 |
42 | json = anomaly_detector.exportJSon()
43 |
44 |
45 | p = pickle.dumps(anomaly_detector)
46 |
47 | print(p)
48 |
49 | anomaly_detector2 = pickle.loads(p)
50 |
51 | json2 = anomaly_detector2.exportJSon()
52 |
53 | print(json2)
54 |
55 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data))
56 | self.assertEqual(json, json2)
57 |
58 |
59 |
60 | def test_conditional_gaussian(self):
61 | po_normal = norm(1.1, 5)
62 | po_anomaly = norm(1.5, 7)
63 |
64 | po_normal2 = norm(2.2, 10)
65 | po_anomaly2 = norm(3, 12)
66 |
67 | gs_normal = norm(1, 12)
68 | gs_anomaly = norm(2, 30)
69 |
70 | normal_len = 100
71 | anomaly_len = 15
72 |
73 | data = np.column_stack(
74 | [
75 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)),
76 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)),
77 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)),
78 | ]
79 | )
80 |
81 | anomaly_detector = pyisc.AnomalyDetector(
82 | component_models=[
83 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])])
84 | ],
85 | output_combination_rule=pyisc.cr_max
86 | )
87 |
88 | anomaly_detector.fit(data);
89 |
90 | json = anomaly_detector.exportJSon()
91 |
92 | print(json)
93 |
94 | p = pickle.dumps(anomaly_detector)
95 |
96 | print(p)
97 |
98 | anomaly_detector2 = pickle.loads(p)
99 |
100 | json2 = anomaly_detector2.exportJSon()
101 |
102 | print(json2)
103 |
104 | self.assertEqual(json, json2)
105 |
106 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data))
107 | if __name__ == '__main__':
108 | unittest.main()
109 |
--------------------------------------------------------------------------------