├── .DS_Store ├── .cproject ├── .gitignore ├── .gitmodules ├── .project ├── How_to_commit_push_submodules.txt ├── LICENSE ├── README.md ├── _pyisc_modules ├── AnomalyClustering.py ├── AnomalyDetector.py ├── BaseISC.py ├── DataObject.py ├── OutlierClustering.py ├── SklearnClassifier.py ├── SklearnOutlierDetector.py └── __init__.py ├── bld.bat ├── build.sh ├── conda_build_config.yaml ├── docs ├── pyISC_classification_example.ipynb ├── pyISC_classification_example_2.ipynb ├── pyISC_multivariable_example.ipynb ├── pyISC_simple_anomaly_example.ipynb ├── pyISC_sklearn_anomaly_detection.ipynb ├── pyISC_sklearn_outlier_detection.ipynb ├── pyISC_sklearn_outlier_detection_comparison.ipynb ├── pyISC_sklearn_outlier_detection_realworld_data.ipynb └── pyISC_tutorial.ipynb ├── meta.yaml ├── pyisc.i ├── setup.py ├── setup2.py ├── src ├── .DS_Store ├── _AnomalyDetector.cc ├── _AnomalyDetector.hh ├── _DataObject.cc ├── _DataObject.hh ├── _Format.cc ├── _Format.hh ├── _JSonExporter.cc ├── _JSonExporter.hh ├── _JSonImporter.cc ├── _JSonImporter.hh ├── mystring.cc └── mystring.hh └── unittests ├── __init__.py ├── test_AnomalyDetector.py ├── test_DataObject.py ├── test_JSonExportImport.py ├── test_SklearnOutlierDetection.py ├── test_max_index_problem.py ├── test_p_ConditionalGaussian.py ├── test_p_ConditionalGaussianDependencyMatrix.py └── test_pickle_export_import.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/.DS_Store -------------------------------------------------------------------------------- /.cproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 38 | 39 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 103 | 104 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.cpp 2 | numpy.i 3 | build 4 | pyisc.py 5 | visisc.py 6 | .ipynb_checkpoints 7 | *~ 8 | MANIFEST 9 | .idea 10 | *.pyc 11 | 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dataformat"] 2 | path = dataformat 3 | url = https://github.com/sics-dna/dataformat 4 | [submodule "ArduinoJson"] 5 | path = ArduinoJson 6 | url = https://github.com/bblanchon/ArduinoJson.git 7 | [submodule "isc2"] 8 | path = isc2 9 | url = https://github.com/sics-dna/isc2 10 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | pyisc 4 | 5 | 6 | dataformat 7 | isc2 8 | 9 | 10 | 11 | org.python.pydev.PyDevBuilder 12 | 13 | 14 | 15 | 16 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 17 | clean,full,incremental, 18 | 19 | 20 | 21 | 22 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 23 | full,incremental, 24 | 25 | 26 | 27 | 28 | 29 | org.eclipse.cdt.core.cnature 30 | org.eclipse.cdt.core.ccnature 31 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 32 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 33 | org.python.pydev.pythonNature 34 | 35 | 36 | -------------------------------------------------------------------------------- /How_to_commit_push_submodules.txt: -------------------------------------------------------------------------------- 1 | In pyISC folder: 2 | 3 | git commit -a -m "Committing submodule changes from superproject" 4 | 5 | git push --recurse-submodules=on-demand 6 | 7 | See https://stackoverflow.com/questions/14233939/git-submodule-commit-push-pull?noredirect=1&lq=1 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyISC 2 | 3 | The Python API to the ISC anomaly detection and classification framework. The framework implements Baysian statistical methods for anomaly detection and classification. Currently supported statistical models are: Poisson, Gamma and multivariate Gaussian distributions. 4 | 5 | ### Email forum(s) 6 | 7 | Questions regarding the use of the framework: https://groups.google.com/forum/#!forum/pyisc-users 8 | 9 | ## Prerequisite: 10 | 11 | Notice, pyISC/visISC has only been tested using 64 bit Python. 12 | 13 | ### Install Python distribution 14 | 15 | Install Python 2.7 16 | 17 | Anaconda is the recommended Python distribution : https://www.continuum.io/downloads 18 | 19 | Libraries: 20 | - numpy, scipy, scikit-learn (required for running pyisc) 21 | - matplotlib, ipython, jupyter, pandas (only required for running tutorial examples) 22 | 23 | Install with anaconda: 24 | 25 | (If you want to disable ssl verification when installing, you will find the instructions here.) 26 | 27 | `>> conda install numpy pandas scikit-learn ipython jupyter` 28 | 29 | 30 | If you intend to also install visISC, you have to downgrade the numpy installation to version 1.9 31 | 32 | `>> conda install numpy==1.9.3` 33 | 34 | ### Install a c++ compiler if not installed 35 | 36 | Windows: 37 | 38 | `>> conda install mingw libpython==1.0` 39 | 40 | OS X: 41 | 42 | Install the Xcode developer tools from App Store. 43 | 44 | ### Install Swig 45 | 46 | (search for suitable version with `>> anaconda search -t conda swig`) 47 | 48 | Windows: 49 | 50 | `>> conda install --channel https://conda.anaconda.org/salilab swig` 51 | 52 | OS X: 53 | 54 | `>> conda install --channel https://conda.anaconda.org/minrk swig` 55 | 56 | 57 | ## Installation 58 | 59 | For installing from source code, you need a git client 60 | 61 | Then: 62 | 63 | `>> git clone https://github.com/STREAM3/pyisc --recursive` 64 | 65 | `>> cd pyisc` 66 | 67 | `>> python setup.py install` 68 | 69 | ## Run tutorial 70 | 71 | `>> cd docs` 72 | 73 | `>> jupyter notebook pyISC_tutorial.ipynb` 74 | 75 | If not opened automatically, click on `pyISC_tutorial.ipynb` in the web page that was opened in a web browser. 76 | 77 | ### How to Cite 78 | 79 | Emruli, B., Olsson, T., & Holst, A. (2017). pyISC: A Bayesian Anomaly Detection Framework for Python. In Florida Artificial Intelligence Research Society Conference. Retrieved from https://aaai.org/ocs/index.php/FLAIRS/FLAIRS17/paper/view/15527 80 | 81 | -------------------------------------------------------------------------------- /_pyisc_modules/AnomalyClustering.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------- 2 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 3 | # 4 | # Main author: Tomas Olsson 5 | # 6 | # This code is free software: you can redistribute it and/or modify it 7 | # under the terms of the GNU Lesser General Public License as published 8 | # by the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This code is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public License 17 | # along with this code. If not, see . 18 | # ------------------------------------------------------------------------ 19 | 20 | from pyisc import AnomalyDetector, SklearnClassifier 21 | from sklearn import utils 22 | import numpy as np 23 | 24 | 25 | class AnomalyClustering(AnomalyDetector): 26 | max_num_of_iterations = 1000 27 | 28 | def __init__(self, n_clusters=2, n_repeat=10, *anomaly_detector_params0, **anomaly_detector_params1): 29 | self.n_clusters = n_clusters 30 | self.n_repeat = n_repeat 31 | self.ad_parms0 = anomaly_detector_params0 32 | self.ad_parms1 = anomaly_detector_params1 33 | self.clf_ = None 34 | AnomalyDetector.__init__(self,*anomaly_detector_params0, **anomaly_detector_params1) 35 | 36 | def _create_detector(self, *ad_parms0, **ad_parms1): 37 | return AnomalyDetector(*ad_parms0, **ad_parms1) 38 | 39 | def _detector_fit(self, X, y): 40 | return AnomalyDetector.fit(self, X, y) 41 | 42 | def fit(self,X,verbose=False): 43 | ss =[] 44 | labels_list = [] 45 | for i in range(self.n_repeat): 46 | od = self._create_detector(*self.ad_parms0, **self.ad_parms1) 47 | labels = self._train_clf(od, X, self.n_clusters,verbose=verbose) 48 | 49 | ss += [od.loglikelihood(X,labels)] 50 | 51 | labels_list += [labels] 52 | 53 | #print ss, labels 54 | 55 | self._detector_fit(X, np.array(labels_list[np.argmax(ss)])) 56 | 57 | self.clf_ = SklearnClassifier.clf(self) 58 | 59 | return self 60 | 61 | 62 | 63 | def _train_clf(self, ad, X, k=None, default_labels=None, verbose=False): 64 | ''' 65 | 66 | :param ad: anomaly detector that shall be trained 67 | :param X: a DataObject 68 | :param k: the number of clusters 69 | :param default_labels: the clustering is started with the provided clusters/labels, where k is ignored. 70 | :return: 71 | ''' 72 | cluster_labels = default_labels 73 | 74 | count_equal_movements = 0 75 | num_of_last_movements = 5 # the last 5 number of moments are stored 76 | last_movements = [-1 for _ in range(num_of_last_movements)] 77 | num_of_iterations = 0 78 | 79 | while True: 80 | if cluster_labels is None: # Restart the clustering 81 | cluster_labels = np.array(utils.shuffle(np.mod(np.array(list(range(len(X)))), k))) if k > 1 else np.array([0 for _ in range(len(X))]) 82 | last_movements = [-1 for _ in range(num_of_last_movements)] 83 | num_of_iterations = 0 84 | if verbose: 85 | print("Initialized clusters",np.unique(cluster_labels)) 86 | 87 | ad.fit(X, cluster_labels) 88 | if ad.classes_ == []: 89 | ad.fit(X, np.zeros((len(X)),)) 90 | 91 | clf = SklearnClassifier.clf(ad) 92 | cluster_labels_new = clf.predict(X) 93 | 94 | movements = sum((cluster_labels_new != cluster_labels) * 1.0) 95 | 96 | if movements in last_movements: 97 | count_equal_movements += 1 98 | else: 99 | count_equal_movements = 0 100 | last_movements = last_movements[1:] + [movements] 101 | 102 | if count_equal_movements >= 20 or num_of_iterations > self.max_num_of_iterations: # Restart the clustering if the number of movements in last_movements are greater or more equal than 20 103 | cluster_labels = None # Restart clustering 104 | continue 105 | 106 | if verbose: 107 | print("movements", movements) 108 | 109 | if movements == 0: 110 | break 111 | 112 | cluster_labels = cluster_labels_new 113 | 114 | num_of_iterations += 1 115 | 116 | return cluster_labels 117 | 118 | def anomaly_score(self, X,y=None): 119 | return AnomalyDetector.anomaly_score(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y) 120 | 121 | def loglikelihood(self,X,y=None): 122 | return AnomalyDetector.loglikelihood(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y) 123 | -------------------------------------------------------------------------------- /_pyisc_modules/AnomalyDetector.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Python Wrapper of all ISC anomaly scoring methods. 3 | """ 4 | # -------------------------------------------------------------------------- 5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 6 | # 7 | # Main author: Tomas Olsson 8 | # 9 | # This code is free software: you can redistribute it and/or modify it 10 | # under the terms of the GNU Lesser General Public License as published 11 | # by the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This code is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU Lesser General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU Lesser General Public License 20 | # along with this code. If not, see . 21 | # -------------------------------------------------------------------------- 22 | 23 | from numpy import ndarray, array 24 | 25 | from pyisc import BaseISC, _JSonExporter 26 | 27 | import pyisc 28 | 29 | 30 | class AnomalyDetector(BaseISC): 31 | 32 | def anomaly_score(self,X, y=None, n_jobs=1): 33 | ''' 34 | Score each row in X,y with an anomaly score. 35 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject 36 | :param y: must be an array,list or None, cannot be a column_index as when fitting the data 37 | :return: 38 | ''' 39 | 40 | if isinstance(X, pyisc.DataObject): 41 | return self._anomaly_detector._CalcAnomaly(X,X.size()) 42 | elif isinstance(X, ndarray) or isinstance(X, list): 43 | data_object = self._convert_to_data_object_in_scoring(array(X), y) 44 | 45 | if data_object is not None: 46 | return self.anomaly_score(data_object) 47 | 48 | raise ValueError("Unknown type of data to score X, y", type(X), type(y)) 49 | 50 | 51 | def anomaly_score_details(self,X,y=None,index=None): 52 | ''' 53 | Computes the detailed anomaly scores of each element in X, that is, anomaly score for each used statistical component\n 54 | :param X: is a DataObject or numpy array or list\n 55 | :param y: is None or an array of classes, must be consistent with how the data was fitted, cannot be a column_index 56 | :param index: is None or an index into X\n\n 57 | :return: a list with (a list for each element in X if X is two dimensional, otherwise only a single list):\n 58 | [\n 59 | a double value with total deviation, \n 60 | an int value with predicted class (if class_column was set to true in constructor),\n 61 | an int value with predicted cluster (if clustering was set to true in constructor), \n 62 | an array with deviations for each individual component,\n 63 | an array with the peak, that is, the most probable feature value for each feature column,\n 64 | an array with the least acceptable value for each feature column,\n 65 | an array with the largest acceptable value for each feature column\n 66 | ] 67 | ''' 68 | if isinstance(X, pyisc._DataObject) and y is None: 69 | if isinstance(index,int): 70 | return self._anomaly_score_intfloat(X._get_intfloat(index),X.length(), X) 71 | else: 72 | return [self.anomaly_score_details(X,index=i) for i in range(X.size())] 73 | elif isinstance(X, ndarray): 74 | data_object = self._convert_to_data_object_in_scoring(X, y) 75 | if data_object is not None: 76 | return self.anomaly_score_details(data_object,index) 77 | 78 | elif isinstance(X, list): 79 | return self.anomaly_score(array(X),y,index) 80 | 81 | raise ValueError("Unknown type of data to score?", type(X) ) if not isinstance(X, pyisc._DataObject) and not isinstance(X, list) and not isinstance(X, ndarray) else "" 82 | 83 | 84 | 85 | def _anomaly_score_intfloat(self, x_intfloat, length, data_object): 86 | deviations = pyisc._double_array(self.num_of_partitions) 87 | min = pyisc._intfloat_array(length) 88 | max = pyisc._intfloat_array(length) 89 | peak = pyisc._intfloat_array(length) 90 | anom = pyisc._double_array(1) 91 | cla = pyisc._int_array(1) 92 | clu = pyisc._int_array(1) 93 | 94 | self._anomaly_detector._CalcAnomalyDetails(x_intfloat,anom, cla, clu, deviations, peak, min, max) 95 | 96 | if self.is_clustering and self.class_column > -1: 97 | result = [pyisc._get_double_value(anom,0), 98 | pyisc._get_int_value(cla,0), 99 | pyisc._get_int_value(clu,0), 100 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)), 101 | list(data_object._convert_to_numpyarray(peak, length)), 102 | list(data_object._convert_to_numpyarray(min, length)), 103 | list(data_object._convert_to_numpyarray(max, length))] 104 | elif self.is_clustering: 105 | result = [pyisc._get_double_value(anom,0), 106 | pyisc._get_int_value(clu,0), 107 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)), 108 | list(data_object._convert_to_numpyarray(peak, length)), 109 | list(data_object._convert_to_numpyarray(min, length)), 110 | list(data_object._convert_to_numpyarray(max, length))] 111 | elif self.class_column > -1: 112 | result = [pyisc._get_double_value(anom,0), 113 | pyisc._get_int_value(cla,0), 114 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)), 115 | list(data_object._convert_to_numpyarray(peak, length)), 116 | list(data_object._convert_to_numpyarray(min, length)), 117 | list(data_object._convert_to_numpyarray(max, length))] 118 | else: 119 | result = [pyisc._get_double_value(anom,0), 120 | list(pyisc._to_numpy_array(deviations,self.num_of_partitions)), 121 | list(data_object._convert_to_numpyarray(peak, length)), 122 | list(data_object._convert_to_numpyarray(min, length)), 123 | list(data_object._convert_to_numpyarray(max, length))] 124 | 125 | pyisc._free_array_double(deviations); 126 | pyisc._free_array_intfloat(min) 127 | pyisc._free_array_intfloat(max) 128 | pyisc._free_array_intfloat(peak) 129 | pyisc._free_array_double(anom) 130 | pyisc._free_array_int(cla) 131 | pyisc._free_array_int(clu) 132 | 133 | 134 | return result 135 | -------------------------------------------------------------------------------- /_pyisc_modules/BaseISC.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Python Wrapper of all ISC anomaly detector training methods. 3 | """ 4 | # -------------------------------------------------------------------------- 5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 6 | # 7 | # Main author: Tomas Olsson 8 | # 9 | # This code is free software: you can redistribute it and/or modify it 10 | # under the terms of the GNU Lesser General Public License as published 11 | # by the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This code is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU Lesser General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU Lesser General Public License 20 | # along with this code. If not, see . 21 | # -------------------------------------------------------------------------- 22 | from _pyisc import _to_cpp_array 23 | from abc import abstractmethod 24 | import numpy 25 | from numpy import ndarray, array, c_ 26 | from pyisc import _to_cpp_array_int, _AnomalyDetector, \ 27 | _IscMultiGaussianMicroModel, \ 28 | _IscPoissonMicroModel, \ 29 | _IscPoissonMicroModelOneside, \ 30 | _IscMicroModelVector, _IscGammaMicroModel, \ 31 | _IscMarkovGaussMicroModel, \ 32 | _IscMarkovGaussMicroModelVector, \ 33 | _IscMarkovGaussCombinerMicroModel, \ 34 | _IscMarkovGaussMatrixMicroModel 35 | import pyisc 36 | 37 | __author__ = 'tol' 38 | 39 | cr_min = pyisc.IscMin 40 | cr_max = pyisc.IscMax 41 | cr_plus= pyisc.IscPlus 42 | 43 | class P_ProbabilityModel: 44 | _saved_model = None 45 | @abstractmethod 46 | def __init__(self): 47 | pass 48 | 49 | @abstractmethod 50 | def create_micromodel(self): 51 | pass 52 | 53 | @abstractmethod 54 | def get_column_index(self): 55 | ''' 56 | :return: list with used column index 57 | ''' 58 | pass 59 | 60 | def __getstate__(self): 61 | odict = self.__dict__.copy() # copy the dict since we change it 62 | del odict['_saved_model'] # remove swig object entry 63 | return odict 64 | 65 | 66 | class P_Gaussian(P_ProbabilityModel): 67 | 68 | def __init__(self, value_column): 69 | ''' 70 | A Multivaritae Gaussian distribution using value_columns as column index into the data object 71 | :param value_index: an integer (single variable) or an array of integers (multivariate) 72 | :return: 73 | ''' 74 | if isinstance(value_column, list): 75 | self.column_index = value_column 76 | else: 77 | self.column_index = [value_column] 78 | 79 | def get_column_index(self): 80 | return self.column_index 81 | 82 | def create_micromodel(self): 83 | column_array = _to_cpp_array_int(self.column_index) 84 | self._saved_model = _IscMultiGaussianMicroModel(len(self.column_index), column_array) 85 | pyisc._free_array_int(column_array) 86 | return self._saved_model 87 | 88 | 89 | class P_Poisson(P_ProbabilityModel): 90 | 91 | def __init__(self, frequency_column, period_column): 92 | ''' 93 | A Poisson distribution using frequency_column as column index into the data object for the frequency and 94 | period_column into the data object for the period where frequency was counted. 95 | This probability model checks for both unusual high frequency values and unusual small values. 96 | :param frequency_column: 97 | :param period_column: 98 | ''' 99 | self.column_index = [frequency_column,period_column] 100 | 101 | def get_column_index(self): 102 | return self.column_index 103 | 104 | def create_micromodel(self): 105 | self._saved_model = _IscPoissonMicroModel(self.column_index[0], self.column_index[1]) 106 | return self._saved_model 107 | 108 | 109 | class P_PoissonOnesided(P_ProbabilityModel): 110 | 111 | def __init__(self, frequency_column, period_column): 112 | ''' 113 | A Poisson distribution using frequency_column as column index into the data object for the frequency and 114 | period_column into the data object for the period where frequency was counted. 115 | This probability model only checks for unusual high frequency values, but not unusual small values. 116 | :param frequency_column: 117 | :param period_column: 118 | :return: 119 | ''' 120 | self.column_index = [frequency_column,period_column] 121 | 122 | def get_column_index(self): 123 | return self.column_index 124 | 125 | def create_micromodel(self): 126 | self._saved_model = _IscPoissonMicroModelOneside(self.column_index[0], self.column_index[1]) 127 | return self._saved_model 128 | 129 | 130 | class P_Gamma(P_ProbabilityModel): 131 | 132 | def __init__(self, frequency_column, period_column): 133 | ''' 134 | An approximation of the Gamma distribution by the use of Poisson distribution that uses the frequency_column as column index into the data object for the frequency and 135 | period_column into the data object for the period where frequency was counted. 136 | :param frequency_column: 137 | :param period_column: 138 | :return: 139 | ''' 140 | 141 | self.frequency_column = frequency_column 142 | self.period_column = period_column 143 | 144 | def get_column_index(self): 145 | return [self.frequency_column,self.period_column] 146 | 147 | def create_micromodel(self): 148 | self._saved_model = _IscGammaMicroModel(self.frequency_column,self.period_column) 149 | return self._saved_model 150 | 151 | class P_ConditionalGaussian(P_ProbabilityModel): 152 | 153 | def __init__(self, prediction_column, conditional_column): 154 | ''' 155 | Implements a conditional multivariate Gaussian distribution. 156 | 157 | :param prediction_column: an integer or an list of integers 158 | :param condition_column: an integer or an list of integers 159 | ''' 160 | 161 | self.prediction_column = prediction_column 162 | self.conditional_column = conditional_column 163 | 164 | def get_column_index(self): 165 | return self.prediction_column if isinstance(self.prediction_column, list) else [self.prediction_column] + \ 166 | self.conditional_column if isinstance(self.prediction_column, list) else [self.conditional_column] 167 | 168 | def create_micromodel(self): 169 | pred_index = _to_cpp_array_int(self.prediction_column) 170 | cond_index= _to_cpp_array_int(self.conditional_column) 171 | self._saved_model = _IscMarkovGaussMicroModel(pred_index, len(self.prediction_column), 172 | cond_index, len(self.conditional_column)) 173 | 174 | pyisc._free_array_int(pred_index) 175 | pyisc._free_array_int(cond_index) 176 | 177 | return self._saved_model 178 | 179 | class P_ConditionalGaussianCombiner(P_ProbabilityModel): 180 | 181 | def __init__(self, gaussian_components): 182 | ''' 183 | Combines the contributions from conditionally independent multivariate conditional Gaussian distributions, so that 184 | a Bayesian belief net or Markov chain can be created. The components must form a directed acyclic graph. 185 | 186 | :param gaussian_components: a single P_ConditionalGauss or a list of P_ConditionalGauss. 187 | ''' 188 | 189 | assert isinstance(gaussian_components, P_ConditionalGaussian) or \ 190 | isinstance(gaussian_components,list) and \ 191 | all([isinstance(comp, P_ConditionalGaussian) for comp in gaussian_components]) 192 | 193 | self.gaussian_components = gaussian_components 194 | 195 | def get_column_index(self): 196 | cols = [] 197 | for comp in self.gaussian_components: 198 | cols += comp.get_column_index() 199 | 200 | return cols 201 | 202 | def create_micromodel(self): 203 | num_of_components = len(self.gaussian_components) 204 | creator = _IscMarkovGaussMicroModelVector() 205 | for i in range(num_of_components): 206 | creator.push_back(self.gaussian_components[i].create_micromodel()) 207 | ptr_creator = pyisc._to_pointer(creator) 208 | self._saved_model = _IscMarkovGaussCombinerMicroModel(ptr_creator, num_of_components) 209 | pyisc._free_pointer(ptr_creator) 210 | return self._saved_model 211 | 212 | class P_ConditionalGaussianDependencyMatrix(P_ProbabilityModel): 213 | 214 | def __init__(self, value_columns, elements_per_row): 215 | ''' 216 | Creates a dependency matrix where each element is only dependent on its right neighbour and the element directly 217 | below in all cases where they are present. Otherwise the elements are only dependent on the element of the two 218 | neighbours that is present, or no element. 219 | 220 | :param value_columns: the column indexes that are contained in the matrix as a sequence of the elements 221 | from left to the right and from the first row to the last row. 222 | :param elements_per_row: the number of column indexes (elements) that constitutes a row in the matrix, 223 | all rows are equally long. 224 | ''' 225 | 226 | self.value_columns = value_columns 227 | self.slots_per_row = elements_per_row 228 | 229 | def get_column_index(self): 230 | return self.value_columns 231 | 232 | def create_micromodel(self): 233 | value_array = _to_cpp_array_int(self.value_columns) 234 | self._saved_model = _IscMarkovGaussMatrixMicroModel(value_array, len(self.value_columns), self.slots_per_row) 235 | pyisc._free_array_int(value_array) 236 | return self._saved_model 237 | 238 | class BaseISC(object): 239 | component_models = None 240 | 241 | def __init__(self, component_models=P_Gaussian(0), output_combination_rule=cr_max, anomaly_threshold = 0.0): 242 | ''' 243 | The base class for all pyISC classes for statistical inference 244 | 245 | :param component_models: a statistical model reused for all mixture components, or an list of statistical models. 246 | Available statistical models are: P_Gaussian, P_Poisson, P_PoissonOneside. 247 | :param output_combination_rule: an input defining which type of rule to use for combining the anomaly score 248 | output from each model in component_model. Available combination rules are: cr_max and cr_plus. 249 | :param anomaly_threshold: the threshold at which a row in the input is considered a anomaly during training, 250 | might differ from what is used for anomaly decision. 251 | :return: 252 | ''' 253 | 254 | feature_column_start=0 255 | 256 | 257 | assert isinstance(anomaly_threshold, float) and anomaly_threshold >= 0 258 | assert isinstance(feature_column_start, int) and feature_column_start >= 0 259 | assert isinstance(component_models, P_ProbabilityModel) or \ 260 | isinstance(component_models, list) and len(component_models) > 0 and \ 261 | all([isinstance(m, P_ProbabilityModel) for m in component_models]) 262 | assert output_combination_rule in [cr_min, cr_max, cr_plus] 263 | 264 | 265 | 266 | self.anomaly_threshold = anomaly_threshold 267 | self.is_clustering = False #clustering not used in the python wrapper since it does not seem to work in the C++ code. 268 | self.output_combination_rule = output_combination_rule 269 | 270 | #//AnomalyDetector(int n, int off, int splt, double th, int cl); // Sublasses must know the numbers and types of micromodels 271 | 272 | #/** 273 | #* n is number of isc mixture components 274 | # * off is the first column containing features used by the detector 275 | # * splt is a the column containing a known class 276 | # * th is a threshold on when to consider a vector of data as anomalous 277 | # * cl is a variable if zero indicate no clustering else indicates that clustering should be done 278 | # * cr is variable indicating how the anomaly scores for the different isc mixture components should be combined 279 | # * cf is a function that creates a isc micro component for each of the n isc mixture component. 280 | 281 | off = feature_column_start 282 | 283 | # no split class 284 | self.class_column = None 285 | splt = -1 286 | 287 | th = anomaly_threshold 288 | cl = 1 if self.is_clustering else 0 289 | 290 | if isinstance(component_models, P_ProbabilityModel): 291 | n = 1 292 | component_models = [component_models] 293 | else: 294 | n = len(component_models) 295 | 296 | self.component_models = component_models 297 | 298 | self._max_index = numpy.vstack([numpy.max(_.get_column_index()) for _ in component_models]).flatten().max() 299 | 300 | 301 | self.classes_ = None 302 | self.num_of_partitions = n 303 | 304 | self._create_inner_anomaly_detector(cl, n, off, output_combination_rule, splt, th) 305 | 306 | def _create_inner_anomaly_detector(self, cl, n, off, output_combination_rule, splt, th): 307 | # Map argument to C++ argument 308 | comp_distributions = _IscMicroModelVector() 309 | for i in range(n): 310 | comp_distributions.push_back(self.component_models[i].create_micromodel()) 311 | self._anomaly_detector = _AnomalyDetector(off, splt, th, cl, output_combination_rule, comp_distributions); 312 | 313 | def fit(self, X, y=None): 314 | ''' 315 | Train the anomaly detector using a DataObject or an array of arrays 316 | 317 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject 318 | :param y: must be an array,list, a column index (integer) or None 319 | :return: 320 | ''' 321 | 322 | return self._fit(X,y) 323 | 324 | def _fit(self,X,y=None): 325 | 326 | 327 | if isinstance(X, pyisc.DataObject) and y is None: 328 | assert self._max_index < X.length() # ensure that data distribution has not to large index into the data 329 | 330 | assert y is None # Contained in the data object 331 | self.class_column = X.class_column 332 | if self.class_column >= 0: 333 | self.classes_ = X.classes_ 334 | 335 | self._anomaly_detector._SetParams( 336 | 0, 337 | -1 if X.class_column is None else X.class_column, 338 | self.anomaly_threshold, 339 | 1 if self.is_clustering else 0 340 | ) 341 | self._anomaly_detector._TrainData(X) 342 | return self 343 | if isinstance(X, ndarray): 344 | 345 | class_column = -1 346 | data_object = None 347 | assert X.ndim <= 2 348 | if X.ndim == 2: 349 | max_class_column = X.shape[1] 350 | else: 351 | max_class_column = 1 352 | if isinstance(y, list) or isinstance(y, ndarray): 353 | assert len(X) == len(y) 354 | class_column = max_class_column 355 | data_object = pyisc.DataObject(numpy.c_[X, y], class_column=class_column) 356 | elif y is None or int(y) == y and y > -1 and y <= max_class_column: 357 | self.class_column = y 358 | data_object = pyisc.DataObject(X,class_column=y) 359 | 360 | if data_object is not None: 361 | assert self._max_index < data_object.length() # ensure that data distribution has not to large index into the data 362 | 363 | return self._fit(data_object) 364 | 365 | raise ValueError("Unknown type of data to fit X, y:", type(X), type(y)) 366 | 367 | def fit_incrementally(self, X, y=None): 368 | ''' 369 | Incrementally train the anomaly detector. Call reset() to restart learning. Requires being trained using the fit 370 | method before first call. 371 | 372 | :param format: a Format describing the types of the data per single array 373 | :param X: a single array, an array of arrays, or an instance of pyisc DataObject 374 | :param y: a single array with classes or None, optional, only required if previously trained with classes 375 | :return: self 376 | ''' 377 | 378 | 379 | if isinstance(X, pyisc.DataObject) and y is None and X.class_column == self.class_column: 380 | 381 | assert self._max_index < X.length() # enusre that data distribution has not to large index into the data 382 | 383 | self._anomaly_detector._TrainDataIncrementally(X) 384 | return self 385 | elif isinstance(X, ndarray) or isinstance(X, list): 386 | 387 | assert self._max_index < len(X[0]) # enusre that data distribution has not to large index into the data 388 | 389 | data_object = self._convert_to_data_object_in_scoring(array(X), y) 390 | 391 | if data_object is not None: 392 | return self.fit_incrementally(data_object) 393 | 394 | raise ValueError("Unknown type of data to fit X, y", type(X), type(y)) 395 | 396 | def unfit_incrementally(self, X, y=None): 397 | if isinstance(X, pyisc.DataObject) and y is None and X.class_column == self.class_column: 398 | self._anomaly_detector._UntrainDataIncrementally(X) 399 | return self 400 | elif isinstance(X, ndarray) or isinstance(X, list): 401 | data_object = self._convert_to_data_object_in_scoring(array(X), y) 402 | 403 | if data_object is not None: 404 | return self.unfit_incrementally(data_object) 405 | 406 | raise ValueError("Unknown type of data to fit X, y", type(X), type(y)) 407 | 408 | def _convert_to_data_object_in_scoring(self, X, y): 409 | data_object = None 410 | if isinstance(y, list) or isinstance(y, ndarray): 411 | assert X.ndim == 2 and self.class_column == X.shape[1] or X.ndim == 1 and self.class_column == 1 412 | data_object = pyisc.DataObject(numpy.c_[X, y], class_column=self.class_column,classes=self.classes_) 413 | else: 414 | assert self.class_column == y 415 | data_object = pyisc.DataObject(X, class_column=self.class_column,classes=self.classes_ if y is not None else None) 416 | return data_object 417 | 418 | def reset(self): 419 | self._anomaly_detector._Reset(); 420 | 421 | 422 | def compute_logp(self, X1): 423 | if self.class_column is not None and not isinstance(X1, pyisc._DataObject): 424 | if X1.ndim == 2 and self.class_column >= 0 and self.class_column < X1.shape[1]: 425 | data_object = self. \ 426 | _convert_to_data_object_in_scoring( 427 | X1, 428 | y=self.class_column 429 | ) 430 | else: 431 | data_object = self. \ 432 | _convert_to_data_object_in_scoring( 433 | X1, 434 | y=array([None] * len(X1)) 435 | ) 436 | logps = [] 437 | clss = list(self.classes_) 438 | for clazz in self.classes_: 439 | pyisc._DataObject.set_column_values(data_object, self.class_column, [clss.index(clazz)] * len(data_object)) 440 | 441 | logps += [self._anomaly_detector._LogProbabilityOfData(data_object, len(X1))] 442 | 443 | return logps 444 | else: 445 | data_object = pyisc.DataObject(X1) if not isinstance(X1, pyisc._DataObject) else X1 446 | return self._anomaly_detector._LogProbabilityOfData(data_object, len(X1)) 447 | 448 | 449 | def loglikelihood(self,X,y=None): 450 | assert isinstance(X, ndarray) and (self.class_column is None and y is None or len(y) == len(X)) 451 | 452 | if y is not None: 453 | return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(c_[X,y], class_column=len(X[0])), len(X)).sum() 454 | else: 455 | return self._anomaly_detector._LogProbabilityOfData(pyisc.DataObject(X), len(X)).sum() 456 | 457 | 458 | def exportJSon(self): 459 | ''' 460 | Export the learned model to JSon. 461 | :return: string with JSon 462 | ''' 463 | #TODO: add export/import of constructor arguments 464 | 465 | exporter = pyisc._JSonExporter() 466 | self._anomaly_detector.exportModel(exporter) 467 | return exporter.getJSonString() 468 | 469 | def importJSon(self, json): 470 | ''' 471 | Parses and imports a learned model from JSon. 472 | 473 | Observe that the constructor arguments of the anomaly detector must be known and defined before importing. 474 | That is, the component_models, output_combination_rule, anomaly_threshold, etc. are not exported/imported due to 475 | limitation of the underlying C++ implementation, but should be provided to the importing detector at construction. 476 | 477 | :param json: string 478 | :return: True if successful, False otherwise 479 | ''' 480 | importer = pyisc._JSonImporter() 481 | success = importer.parseJSon(json) 482 | if success: 483 | self._anomaly_detector.importModel(importer) 484 | return success 485 | 486 | 487 | # The getstate setstate let us handle the pickleing of the swig object using json instead 488 | def __getstate__(self): 489 | odict = self.__dict__.copy() # copy the dict since we change it 490 | del odict['_anomaly_detector'] # remove swig object entry 491 | odict['_anomaly_detector_json'] = self.exportJSon() 492 | return odict 493 | 494 | def __setstate__(self, dict): 495 | _anomaly_detector_json = dict['_anomaly_detector_json'] 496 | del dict['_anomaly_detector_json'] 497 | self.__dict__.update(dict) # update attributes 498 | self._create_inner_anomaly_detector(False, self.num_of_partitions, 0, self.output_combination_rule, -1, self.anomaly_threshold) 499 | if not self.importJSon(_anomaly_detector_json): 500 | raise Exception("Import of JSON did not work properly") 501 | -------------------------------------------------------------------------------- /_pyisc_modules/DataObject.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Python Wrapper of all ISC DataObject methods. 3 | """ 4 | # -------------------------------------------------------------------------- 5 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 6 | # 7 | # Main author: Tomas Olsson 8 | # 9 | # This code is free software: you can redistribute it and/or modify it 10 | # under the terms of the GNU Lesser General Public License as published 11 | # by the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This code is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU Lesser General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU Lesser General Public License 20 | # along with this code. If not, see . 21 | # -------------------------------------------------------------------------- 22 | 23 | 24 | from numpy import ndarray, array 25 | from numpy.ma.extras import unique 26 | 27 | import pyisc 28 | from pyisc import Format 29 | 30 | __author__ = 'tol' 31 | 32 | class DataObject(pyisc._DataObject): 33 | 34 | ''' 35 | The classes_ used to generate indexes into the class_column 36 | ''' 37 | classes_ = None 38 | ''' 39 | The column index that contains the class parameter. 40 | ''' 41 | class_column = None 42 | 43 | def __init__(self, X, format=None, class_column=None, classes='auto'): 44 | ''' 45 | The DataObject class represents the data analysed using a AnomalyDetector. 46 | 47 | X can be an Format instance or an numpy array. In the previous case, we assume 48 | it is used to describe the content that is added to the object using add2Darray 49 | or add1Darray methods. In the other case, we automatically generate a format instance, 50 | unless the format argument is provided. If the class_column is specified, we use it 51 | to generate a column in the auto-generated format where the elements are index into 52 | the classes_ list. If the classes_ list is set to 'auto', the elements in X of the 53 | class_column are used to auto-create a classes_ list. 54 | 55 | :param X: a Format instance or a numpy array 56 | :param format: None or a pyisc Format instance 57 | :param class_column: None or an integer 58 | :param classes: 'auto' or a list of elements in X[class_column] 59 | :return: 60 | ''' 61 | self.class_column = class_column 62 | if isinstance(X, pyisc.Format): 63 | self._format = X 64 | pyisc._DataObject.__init__(self,X) 65 | return 66 | elif isinstance(X, ndarray): 67 | if format is None: 68 | format = Format() 69 | num_cols = len(X.T) 70 | if class_column is not None: 71 | assert class_column >= 0 and class_column < num_cols 72 | for col in range(num_cols): 73 | if col != class_column: 74 | format.addColumn("Column %i"%col, Format.Continuous) 75 | else: 76 | format.addColumn("Column %i"%col, Format.Symbol) 77 | A = X.T.copy() 78 | if classes == 'auto': 79 | self.classes_ = list(sorted(unique(A[class_column]))) 80 | else: 81 | self.classes_ = classes 82 | class_col = format.get_nth_column(class_column) 83 | for c in self.classes_: 84 | class_col.add("Class %i"%c if isinstance(c, int) else "Class %s"%c if isinstance(c, str) and len(c) == 1 else str(c)) 85 | A[class_column] = [self.classes_.index(v) if v in self.classes_ else -1 for v in A[class_column]] 86 | X = A.T 87 | self._format = format 88 | if X.ndim == 1: # This fixes a problem of converting it to c++ data object 89 | X = array([X.copy()]).T 90 | 91 | pyisc._DataObject.__init__(self,format,X.astype(float)) 92 | return 93 | elif isinstance(format, pyisc.Format): 94 | self._format = format 95 | pyisc._DataObject.__init__(self,format,X) 96 | return 97 | pyisc._DataObject.__init__(self,X) 98 | 99 | def as_1d_array(self): 100 | array1D = self._as1DArray(self.size()*self.length()).astype(object) 101 | 102 | #print self.class_column, self.classes_, unique(array1D[range(self.class_column,len(array1D),self.length())]) 103 | if self.class_column is not None: 104 | array1D[list(range(self.class_column,len(array1D),self.length()))] = \ 105 | [self.classes_[int(c)] if int(c) != -1 else None for c in array1D[list(range(self.class_column,len(array1D),self.length()))] ] 106 | 107 | return array1D 108 | 109 | def as_2d_array(self): 110 | array1D = self.as_1d_array() 111 | return array1D.reshape((self.size(),self.length())) 112 | 113 | def set_column_values(self, column_index, values): 114 | ''' 115 | Sets all values in a column, if the column is the class column, then the values must be one of the ones provieded in the constructor. 116 | :param column_index: 117 | :param values: 118 | :return: 119 | ''' 120 | if column_index == self.class_column: 121 | values = [self.classes_.index(c) for c in values] 122 | pyisc._DataObject.set_column_values(self, column_index, array(values).astype(float)) 123 | 124 | 125 | def __getitem__(self,index): 126 | if index <= -1: 127 | index = self.size()+index 128 | if index < self.size(): 129 | return self._getRow(index, self.length()) 130 | else: 131 | return None 132 | 133 | def __len__(self): 134 | return self.size() 135 | -------------------------------------------------------------------------------- /_pyisc_modules/OutlierClustering.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------- 2 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 3 | # 4 | # Main author: Tomas Olsson 5 | # 6 | # This code is free software: you can redistribute it and/or modify it 7 | # under the terms of the GNU Lesser General Public License as published 8 | # by the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This code is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public License 17 | # along with this code. If not, see . 18 | # ------------------------------------------------------------------------ 19 | 20 | from pyisc import SklearnOutlierDetector 21 | from .AnomalyClustering import AnomalyClustering 22 | 23 | class OutlierClustering(AnomalyClustering,SklearnOutlierDetector): 24 | max_num_of_iterations = 1000 25 | 26 | def __init__(self, n_clusters=2, n_repeat=10, *anomaly_detector_params0, **anomaly_detector_params1): 27 | self.n_clusters = n_clusters 28 | self.n_repeat = n_repeat 29 | self.ad_parms0 = anomaly_detector_params0 30 | self.ad_parms1 = anomaly_detector_params1 31 | self.clf_ = None 32 | SklearnOutlierDetector.__init__(self,*anomaly_detector_params0, **anomaly_detector_params1) 33 | 34 | def _create_detector(self, *ad_parms0, **ad_parms1): 35 | return SklearnOutlierDetector(*ad_parms0, **ad_parms1) 36 | 37 | def _detector_fit(self, X, y): 38 | return SklearnOutlierDetector.fit(self, X, y) 39 | 40 | def fit(self,X,verbose=False): 41 | return AnomalyClustering.fit(self,X,verbose=verbose) 42 | 43 | def predict(self, X): 44 | return SklearnOutlierDetector.predict(self, X, self.clf_.predict(X)) 45 | 46 | def anomaly_score(self, X, y=None): 47 | return SklearnOutlierDetector.anomaly_score(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y) 48 | 49 | def loglikelihood(self,X,y=None): 50 | return AnomalyClustering.loglikelihood(self, X, self.clf_.predict(X) if self.clf_ is not None and y is None else y) 51 | -------------------------------------------------------------------------------- /_pyisc_modules/SklearnClassifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Python Wrapper of all ISC classification methods that is compatible with scikit-learn 3 | classifiers (http://scikit-learn.org) 4 | """ 5 | # -------------------------------------------------------------------------- 6 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 7 | # 8 | # Main author: Tomas Olsson 9 | # 10 | # This code is free software: you can redistribute it and/or modify it 11 | # under the terms of the GNU Lesser General Public License as published 12 | # by the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This code is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU Lesser General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU Lesser General Public License 21 | # along with this code. If not, see . 22 | # -------------------------------------------------------------------------- 23 | from _pyisc import _AnomalyDetector__ClassifyData 24 | 25 | from numpy import array, ndarray 26 | from numpy.ma.core import exp 27 | from scipy.misc import logsumexp 28 | 29 | from sklearn.base import ClassifierMixin, BaseEstimator 30 | from pyisc import P_Gaussian, BaseISC, cr_max 31 | import pyisc 32 | 33 | 34 | class SklearnClassifier(BaseISC, BaseEstimator, ClassifierMixin): 35 | classification_threshold = None 36 | 37 | def __init__(self, component_models=P_Gaussian(0), 38 | classification_threshold=1e12, 39 | output_combination_rule=cr_max, 40 | training_anomaly_threshold = 0.0): 41 | 42 | ''' 43 | 44 | :param classification_threshold: (optional) a threshold for specifying that instances with anomaly scores below 45 | the threshold should be classified. If not specified, the anomaly threshold is set to very large. 46 | :return: 47 | ''' 48 | self.classification_threshold = classification_threshold 49 | super(SklearnClassifier, self).__init__(component_models,output_combination_rule,training_anomaly_threshold) 50 | 51 | @staticmethod 52 | def clf(anomaly_detector,classification_threshold=1e12): 53 | ''' 54 | Converts a pyisc AnomalyDetector into a SklearnClassifier 55 | :param self: 56 | :param anomaly_detector: 57 | :param classification_threshold: 58 | :return: 59 | ''' 60 | assert isinstance(anomaly_detector, pyisc.AnomalyDetector) 61 | classifier = SklearnClassifier() 62 | classifier._anomaly_detector = anomaly_detector._anomaly_detector 63 | classifier.class_column = anomaly_detector.class_column 64 | classifier.anomaly_threshold = anomaly_detector.anomaly_threshold 65 | classifier.classes_ = anomaly_detector.classes_ 66 | classifier.is_clustering = anomaly_detector.is_clustering 67 | classifier.num_of_partitions = anomaly_detector.num_of_partitions 68 | classifier.classification_threshold = classification_threshold 69 | 70 | return classifier 71 | 72 | def predict(self, X): 73 | ''' 74 | This method classifies each instance in X with a class, if the anomaly detector was trained with classes. 75 | 76 | :param X: a numpy array or a pyisc DataObject 77 | :return: an array with a classification for each instance in X, an anomalous instance below given classification threshold is classified as None. 78 | ''' 79 | 80 | probs = self.predict_log_proba(X) 81 | return array(self.classes_)[probs.argmax(1)] 82 | 83 | # assert self.class_column > -1 84 | # 85 | # DO = None 86 | # if isinstance(X, pyisc.DataObject): 87 | # assert X.class_column == self.class_column 88 | # DO = X 89 | # elif isinstance(X, ndarray): 90 | # if self.class_column == len(X[0]): 91 | # DO = self._convert_to_data_object_in_scoring(X, [None]*len(X)) 92 | # else: 93 | # X1 = X.copy() 94 | # X1.T[self.class_column] = None 95 | # DO = self._convert_to_data_object_in_scoring(X1, self.class_column) 96 | # 97 | # class_ids, _ = self._anomaly_detector._ClassifyData(DO, len(X), len(X)) 98 | # 99 | # return array(self.classes_)[class_ids]#[probs.argmax(1)] 100 | 101 | 102 | 103 | def predict_log_proba(self,X): 104 | assert self.class_column > -1 105 | 106 | X1 = None 107 | if isinstance(X, pyisc.DataObject): 108 | assert X.class_column == self.class_column 109 | X1 = X.as_2d_array() 110 | elif isinstance(X, ndarray): 111 | X1 = X.copy() 112 | 113 | 114 | if X1 is not None: 115 | 116 | logps = self.compute_logp(X1) 117 | 118 | LogPs = [x-logsumexp(x) for x in array(logps).T] #normalized 119 | 120 | return array(LogPs) 121 | else: 122 | raise ValueError("Unknown type of data to score:", type(X)) 123 | 124 | 125 | def predict_proba(self,X): 126 | Ps = exp(self.predict_log_proba(X)) 127 | 128 | return array([p/s for p,s in zip(Ps,Ps.sum(1))]) 129 | -------------------------------------------------------------------------------- /_pyisc_modules/SklearnOutlierDetector.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Python Wrapper of all ISC classification methods that is compatible with scikit-learn 3 | classifiers (http://scikit-learn.org) 4 | """ 5 | # -------------------------------------------------------------------------- 6 | # Copyright (C) 2014, 2015, 2016, 2017 SICS Swedish ICT AB 7 | # 8 | # Main author: Tomas Olsson 9 | # 10 | # This code is free software: you can redistribute it and/or modify it 11 | # under the terms of the GNU Lesser General Public License as published 12 | # by the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This code is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU Lesser General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU Lesser General Public License 21 | # along with this code. If not, see . 22 | # -------------------------------------------------------------------------- 23 | 24 | import pyisc 25 | from numpy import percentile, abs, c_, array 26 | 27 | from pyisc import DataObject 28 | 29 | 30 | class SklearnOutlierDetector(pyisc.AnomalyDetector): 31 | threshold_ = None 32 | 33 | def __init__(self,contamination=0.01, *anomaly_detector_params0, **anomaly_detector_params1): 34 | ''' 35 | This class can be used for classifying anomalies when the contamination fraction is known. 36 | It is implemented to be used together with the methods listed at 37 | http://scikit-learn.org/stable/auto_examples/covariance/plot_outlier_detection.html 38 | 39 | :param contamination: fraction of outliers in the data set 40 | :param anomaly_detector_params0: the same parameters as in the pyisc.AnomalyDetector 41 | :param anomaly_detector_params1: the same parameters as in the pyisc.AnomalyDetector 42 | ''' 43 | self.contamination = contamination 44 | super(pyisc.AnomalyDetector,self).__init__(*anomaly_detector_params0, **anomaly_detector_params1) 45 | 46 | def fit(self, X, y=None): 47 | old_threshold = None 48 | threshold = None 49 | self.threshold_ = 0.0 50 | 51 | self._fit(X,y) 52 | 53 | count = 0 54 | while count < 100 and (old_threshold is None or abs(threshold - old_threshold) > 0.01): 55 | old_threshold = threshold 56 | ss = self.decision_function(X,y) 57 | threshold = percentile(ss, 100 * self.contamination) 58 | 59 | self._fit(X[ss > threshold],y[ss > threshold] if y is not None else None) 60 | 61 | count += 1 62 | 63 | self.threshold_ = threshold 64 | 65 | return self 66 | 67 | def decision_function(self,X,y=None): 68 | ''' 69 | Returns a measure of anomaly (the log probability of the data) from smallest (most anomalous) to high (least anomalous). 70 | :param X: an numpy array 71 | :param y: an numpy array or None 72 | :return: numpy array 73 | ''' 74 | 75 | ss = (1.0/(self.anomaly_score(X,y)+1e-10) - self.threshold_) 76 | 77 | return ss 78 | 79 | def predict(self, X,y=None): 80 | ''' 81 | Returns an numpy array with 1 if a row is not anomlaous and -1 if anomalous 82 | :param X: an numpy array 83 | :param y: an numpy array or None (default) 84 | :param decision_threshold: float value for deciding whether a point is anomalous 85 | :return: numpy array 86 | ''' 87 | return 2 * (self.decision_function(X,y) > 0) - 1 -------------------------------------------------------------------------------- /_pyisc_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/_pyisc_modules/__init__.py -------------------------------------------------------------------------------- /bld.bat: -------------------------------------------------------------------------------- 1 | "%PYTHON%" setup.py install --single-version-externally-managed --record=record.txt 2 | if errorlevel 1 exit 1 3 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install --single-version-externally-managed --record=record.txt # Python command to install the script. 2 | -------------------------------------------------------------------------------- /conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | python: 2 | - 2.7 3 | - 3.6 -------------------------------------------------------------------------------- /docs/pyISC_multivariable_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pyISC Example: MultivariableAnomaly Detection\n", 8 | "In this example, we extend the simple example with one Poisson distributed variable to the multivariate case with three variables, two Poisson distributed variables and one Gaussian distributed variable." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [], 16 | "source": [ 17 | "import pyisc;\n", 18 | "import numpy as np\n", 19 | "from scipy.stats import poisson, norm\n", 20 | "%matplotlib inline\n", 21 | "from pylab import plot" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Create Data\n", 29 | "Create a data set with 3 columns from different probablity distributions:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "po_normal = poisson(10)\n", 39 | "po_anomaly = poisson(25)\n", 40 | "\n", 41 | "po_normal2 = poisson(2)\n", 42 | "po_anomaly2 = poisson(3)\n", 43 | "\n", 44 | "gs_normal = norm(1, 12)\n", 45 | "gs_anomaly = norm(2,30)\n", 46 | "\n", 47 | "normal_len = 10000\n", 48 | "anomaly_len = 15\n", 49 | "\n", 50 | "data = np.column_stack(\n", 51 | " [\n", 52 | " [1] * (normal_len+anomaly_len),\n", 53 | " list(po_normal.rvs(normal_len))+list(po_anomaly.rvs(anomaly_len)),\n", 54 | " list(po_normal2.rvs(normal_len))+list(po_anomaly2.rvs(anomaly_len)),\n", 55 | " list(gs_normal.rvs(normal_len))+list(gs_anomaly.rvs(anomaly_len)),\n", 56 | " ]\n", 57 | ")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Used Anomaly Detector\n", 65 | "Create an anomaly detector using as first argument the used statistical models. The we use \n", 66 | "- a onesided Poisson distribution for modelling the first fequency column (column 1) (as in the first example),\n", 67 | "- a twosided Poisson distribution for the second frequency column (column 2),\n", 68 | "- and a Gaussin (Normal) distribution for the last column (column 3).\n", 69 | "\n", 70 | "Given that we now have more than one variable, it is necessary to also add a method to combine the output from the statistical models, which in this case is the maximum anomaly score of each component model:" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "anomaly_detector = pyisc.AnomalyDetector(\n", 80 | " component_models=[\n", 81 | " pyisc.P_PoissonOnesided(1,0), # columns 1 and 0\n", 82 | " pyisc.P_Poisson(2,0), # columns 2 and 0\n", 83 | " pyisc.P_Gaussian(3) # column 3\n", 84 | " ],\n", 85 | " output_combination_rule=pyisc.cr_max\n", 86 | ")" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "Train the anomaly detector:" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "anomaly_detector.fit(data);" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Compute the anomaly scores for each data point:" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "scores = anomaly_detector.anomaly_score(data)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "## Anomaly Scores\n", 133 | "Now we can print some example of normal frequencies vs. anomaly scores for the 15 first normal data points: " 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "from pandas import DataFrame\n", 143 | "df= DataFrame(data[:15], columns=['#Days', 'Freq1','Freq2','Measure'])\n", 144 | "df['Anomaly Score'] = scores[:15]\n", 145 | "print df.to_string()" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "The anomalous frequencies vs. anomaly scores for the 15 anomalous data points:" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "scrolled": true 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])\n", 164 | "df['Anomaly Score'] = scores[-15:]\n", 165 | "print df.to_string()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "As can be seen above, the anomalous data also have higher anomaly scores than the normal frequencies as it should be.

\n", 173 | "This becomes even more visible if we plot the anomaly scores (y-axis) against each data point (x-axis):" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "plot(scores, '.');" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "We can also look at the details of each column in terms of their individual anomaly scores:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "score_details = anomaly_detector.anomaly_score_details(data)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "df= DataFrame(data[-15:], columns=['#Days', 'Freq1','Freq2','Measure'])\n", 208 | "df['Anomaly:Freq1'] = [detail[1][0] for detail in score_details[-15:]] # Anomaly Score of Freq1\n", 209 | "df['Anomaly:Freq2'] = [detail[1][1] for detail in score_details[-15:]] # Anomaly Score of Freq2\n", 210 | "df['Anomaly:Measure'] = [detail[1][2] for detail in score_details[-15:]] # Anomaly Score of Measure\n", 211 | "df['Anomaly Score'] = [detail[0] for detail in score_details[-15:]] # Combined Anomaly Score\n", 212 | "df" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "Above, the last column corresponds to the same anomaly score as before, where we can se that it corresponds to the maximum of the individual anomaly score to the left, thus, it is the result of the combination rule specified to the anomaly detector." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [] 228 | } 229 | ], 230 | "metadata": { 231 | "kernelspec": { 232 | "display_name": "Python 2", 233 | "language": "python", 234 | "name": "python2" 235 | }, 236 | "language_info": { 237 | "codemirror_mode": { 238 | "name": "ipython", 239 | "version": 2 240 | }, 241 | "file_extension": ".py", 242 | "mimetype": "text/x-python", 243 | "name": "python", 244 | "nbconvert_exporter": "python", 245 | "pygments_lexer": "ipython2", 246 | "version": "2.7.14" 247 | }, 248 | "widgets": { 249 | "state": {}, 250 | "version": "1.1.1" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 1 255 | } 256 | -------------------------------------------------------------------------------- /docs/pyISC_simple_anomaly_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pyISC Example: Simple Anomaly Detection with Frequency Data\n", 8 | "This is a simple example on how to use the pyISC anomaly detector for computing the anomaly scores of Poisson distributed data." 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 9, 14 | "metadata": { 15 | "collapsed": false 16 | }, 17 | "outputs": [], 18 | "source": [ 19 | "import pyisc;\n", 20 | "import numpy as np\n", 21 | "from scipy.stats import poisson\n", 22 | "%matplotlib inline\n", 23 | "from pylab import hist, plot, figure" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Data Creation\n", 31 | "Create two arrays with normal and anomalous frequency data respectively." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 10, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "po_normal = poisson(10)\n", 43 | "po_anomaly = poisson(25)\n", 44 | "\n", 45 | "freq_normal = po_normal.rvs(10000)\n", 46 | "freq_anomaly = po_anomaly.rvs(15)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Create an 2D array with two columns that combines random frequency and time period equal to 1." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 11, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([[ 12., 1.],\n", 67 | " [ 7., 1.],\n", 68 | " [ 16., 1.],\n", 69 | " [ 9., 1.],\n", 70 | " [ 17., 1.]])" 71 | ] 72 | }, 73 | "execution_count": 11, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "data = np.column_stack([\n", 80 | " list(freq_normal)+list(freq_anomaly), \n", 81 | " [1.0]*(len(freq_normal)+len(freq_anomaly))\n", 82 | " ])\n", 83 | "data[:5]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "If we plot the histogram from the frequency data, we can see that the distribution tail is thicker at the right side because of the anomalous data points:" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 12, 96 | "metadata": { 97 | "collapsed": false, 98 | "scrolled": true 99 | }, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEACAYAAABVtcpZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE8NJREFUeJzt3X+s3fV93/Hni3g0SUsZSYWvYjfYFEFN1DRlm5cpTD1r\nVH50kkFbRZ12DYQomgptom2KZrM/fP/ZGlfakkgTqbpSYqpkzMnU4UQUCIKjKduCGQmFxA5YqmyM\nW992E2OLGk328t4f52tzcHx97z3n+p5z7uf5kI74ns/5nu/3fT/43tf3+/n+SlUhSWrTJZMuQJI0\nOYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDlgyBJA8kWUjywnk++2dJfpDkHUNtu5McSXI4yU1D7Tck\neSHJy0k+s3o/giRpVMvZE3gQuPncxiSbgV8Ejg21bQPuALYBtwL3J0n38eeAj1bVtcC1SX5omZKk\ntbVkCFTV14HXzvPRp4FPntN2G/BwVZ2uqqPAEWB7kjngsqp6tpvvIeD2kauWJK2KkY4JJNkBHK+q\nF8/5aBNwfOj9ia5tE/DqUPurXZskaYI2rPQLSd4G3MdgKEiSNMNWHALATwFbgD/pxvs3A99Msp3B\nlv+7h+bd3LWdAH7yPO3nlcQbGknSCKoqS8/1huUOB6V7UVXfrqq5qrq6qrYyGNr5uar6C+AA8CtJ\nLk2yFbgGOFhVJ4HXk2zvguPDwCNL/CAz+9qzZ8/Ea2ixduuf/Mv6J/saxXJOEf0i8F8ZnNHzSpKP\nnPv3mjcC4hCwHzgEPArcU29Udi/wAPAycKSqHhupYknSqllyOKiqfnWJz68+5/1vA799nvmeA35m\npQVKki4erxi+CHq93qRLGNks1w7WP2nWP3sy6jjSxZSkprEuSZpmSaiLdGBYkrQOGQKS1DBDQJIa\nZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapghIEkNMwQaMDe3\nhSQkYW5uy6TLkTRFfJ5AAwaPdT7Tnxn5WaSSppvPE5AkrYghIEkNMwQkqWGGgCQ1zBCQpIYtGQJJ\nHkiykOSFobbfSXI4yfNJ/mOSHx/6bHeSI93nNw2135DkhSQvJ/nM6v8okqSVWs6ewIPAzee0PQG8\np6reBxwBdgMkuR64A9gG3Arcn8H5iQCfAz5aVdcC1yY5d5mSpDW2ZAhU1deB185pe7KqftC9/Qaw\nuZveATxcVaer6iiDgNieZA64rKqe7eZ7CLh9FeqXJI1hNY4J3A082k1vAo4PfXaia9sEvDrU/mrX\nJkmaoA3jfDnJvwBOVdW/X6V6zpqfnz873ev16PV6q70KSZpp/X6ffr8/1jKWdduIJFcBX6mq9w61\n3QV8DPiFqvq/XdsuoKpqb/f+MWAPcAx4uqq2de07gZ+vqt9YZH3eNmIVLfe2EXNzW1hYOAbAxo1X\ncfLk0TWpT9LquJi3jUj3OrOiW4BPAjvOBEDnALAzyaVJtgLXAAer6iTwepLt3YHiDwOPrKRQXXyD\nACigzoaBpPVtyeGgJF8EesA7k7zCYMv+PuBS4GvdyT/fqKp7qupQkv3AIeAUcM/QJv29wOeBtwKP\nVtVjq/yzSJJWyLuINmC5w0HebVSabd5FVJK0IoaAJDXMEJCkhhkCktQwQ0CSGmYISFLDDAFJapgh\nIEkNMwRm2NzcFpKcfc3NbZl0SZJmjFcMz7A3X+ELi13l6xXDUhu8YlhrYngPxL0Paba5JzDDJrUn\n4B6DNJ3cE5AkrYghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlh\nS4ZAkgeSLCR5YajtiiRPJHkpyeNJLh/6bHeSI0kOJ7lpqP2GJC8keTnJZ1b/R5EkrdRy9gQeBG4+\np20X8GRVXQc8BewGSHI9cAewDbgVuD+Du40BfA74aFVdC1yb5NxlSpLW2JIhUFVfB147p/k2YF83\nvQ+4vZveATxcVaer6ihwBNieZA64rKqe7eZ7aOg7kqQJGfWYwJVVtQBQVSeBK7v2TcDxoflOdG2b\ngFeH2l/t2iRJE7RhlZaz6jeUn5+fPzvd6/Xo9XqrvQpJmmn9fp9+vz/WMpb1UJkkVwFfqar3du8P\nA72qWuiGep6uqm1JdgFVVXu7+R4D9gDHzszTte8Efr6qfmOR9flQmWXwoTKShl3Mh8qke51xALir\nm74TeGSofWeSS5NsBa4BDnZDRq8n2d4dKP7w0HckSROy5HBQki8CPeCdSV5hsGX/KeBLSe5msJV/\nB0BVHUqyHzgEnALuGdqkvxf4PPBW4NGqemx1fxRJ0kr5jOEZ5nCQpGE+Y1iStCKGgCQ1zBCQpIYZ\nApLUMENAF8Xc3BaSnH3NzW2ZdEmSzsOzg2bYNJ8dtNzaJK0ezw6SJK2IISBJDTMEptTwmLrj6ZIu\nFo8JTKnVHHf3mIDUBo8JSJJWxBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQ\nkKSGGQKS1DBDQJIaZghIUsPGCoEk/yTJt5O8kOQLSS5NckWSJ5K8lOTxJJcPzb87yZEkh5PcNH75\nkqRxjBwCSd4F/BZwQ1W9F9gAfAjYBTxZVdcBTwG7u/mvB+4AtgG3AvdncL9hSdKEjDsc9BbgR5Ns\nAN4GnABuA/Z1n+8Dbu+mdwAPV9XpqjoKHAG2j7l+SdIYRg6Bqvoz4F8DrzD44/96VT0JbKyqhW6e\nk8CV3Vc2AceHFnGia5MkTciGUb+Y5K8z2Oq/Cngd+FKSX+PNj5PiPO+XZX5+/ux0r9ej1+uNVKck\nrVf9fp9+vz/WMkZ+vGSSXwZurqqPde9/HXg/8AtAr6oWkswBT1fVtiS7gKqqvd38jwF7quqZ8yzb\nx0v6eElJK7TWj5d8BXh/krd2B3g/CBwCDgB3dfPcCTzSTR8AdnZnEG0FrgEOjrF+SdKYRh4OqqqD\nSb4MfAs41f3394DLgP1J7gaOMTgjiKo6lGQ/g6A4BdzT/Oa+JE3YyMNBF5PDQQ4HSVq5tR4OkiTN\nOENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQ0ETNzW0hCUmYm9sy6XKk5nix2JRq5WKx5a5T\n0tK8WEyStCKGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJDTMEJKlhhoAkNcwQkKSGGQJr\nzBumSZom3kBujU3zTdqmuTZJS/MGcpKkFRkrBJJcnuRLSQ4n+U6Sv53kiiRPJHkpyeNJLh+af3eS\nI938N41fviRpHOPuCXwWeLSqtgE/C3wX2AU8WVXXAU8BuwGSXA/cAWwDbgXuz2AsQJI0ISOHQJIf\nB/5uVT0IUFWnq+p14DZgXzfbPuD2bnoH8HA331HgCLB91PVLksY3zp7AVuB/JHkwyTeT/F6StwMb\nq2oBoKpOAld2828Cjg99/0TXJkmakA1jfvcG4N6q+u9JPs1gKOjc0ztGOt1jfn7+7HSv16PX641W\npSStU/1+n36/P9YyRj5FNMlG4L9V1dXd+xsZhMBPAb2qWkgyBzxdVduS7AKqqvZ28z8G7KmqZ86z\nbE8R9RRRSSu0pqeIdkM+x5Nc2zV9EPgOcAC4q2u7E3ikmz4A7ExyaZKtwDXAwVHXL0ka3zjDQQAf\nB76Q5K8Bfwp8BHgLsD/J3cAxBmcEUVWHkuwHDgGngHvW7ea+JM0IrxheY9M85DLNtUlamlcMS5JW\nxBDQTPDGe9LF4XDQGpvmIZf1UJvUMoeDJEkrYghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEJCkhhkC\nktQwQ0CSGmYISFLDDAFJapghIEkNMwQkqWGGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJ\nDRs7BJJckuSbSQ50769I8kSSl5I8nuTyoXl3JzmS5HCSm8ZdtyRpPKuxJ/AJ4NDQ+13Ak1V1HfAU\nsBsgyfXAHcA24Fbg/iRZhfVLkkY0Vggk2Qz8EvD7Q823Afu66X3A7d30DuDhqjpdVUeBI8D2cdYv\nSRrPuHsCnwY+CdRQ28aqWgCoqpPAlV37JuD40HwnujZJ0oRsGPWLSf4+sFBVzyfpXWDWusBni5qf\nnz873ev16PUutApJak+/36ff74+1jFSN9DeaJP8K+EfAaeBtwGXAHwF/E+hV1UKSOeDpqtqWZBdQ\nVbW3+/5jwJ6qeuY8y65R65p2g8MgZ362sNjPuZz53jzPcucbb53TXpvUsiRU1YqOtY48HFRV91XV\nu6vqamAn8FRV/TrwFeCubrY7gUe66QPAziSXJtkKXAMcHHX9kqTxjTwcdAGfAvYnuRs4xuCMIKrq\nUJL9DM4kOgXcs2439yVpRow8HHQxORw03UMu01yb1LI1HQ7Sm83NbSEJSZib2zLpciRpWdwTWCUt\nbW1Pc21Sy9wTUPPcI5NWxj2BVdLS1vas1yatV+4JSJJWxBCQpIYZApLUMENAkhpmCEhSwwwBSWqY\nISBJDTMEJKlhhoAkNcwQkKSGGQKS1DBDQJIaZghIUsMMAUlqmCEgSQ0zBCSpYYaAJDXMEFBzhh9B\n6WMo1TofL7lKfITj+qtNmjVr+njJJJuTPJXkO0leTPLxrv2KJE8keSnJ40kuH/rO7iRHkhxOctOo\n65YkrY5xhoNOA/+0qt4D/B3g3iQ/DewCnqyq64CngN0ASa4H7gC2AbcC92ewSSZJmpCRQ6CqTlbV\n893094DDwGbgNmBfN9s+4PZuegfwcFWdrqqjwBFg+6jrlySNb1UODCfZArwP+AawsaoWYBAUwJXd\nbJuA40NfO9G1SZImZMO4C0jyY8CXgU9U1feSnHuEbaQjbvPz82ene70evV5v1BIlaV3q9/v0+/2x\nljHW2UFJNgBfBf64qj7btR0GelW1kGQOeLqqtiXZBVRV7e3mewzYU1XPnGe5nh3UyBk401ybNGvW\n9Oygzh8Ah84EQOcAcFc3fSfwyFD7ziSXJtkKXAMcHHP9kqQxjDwclOQDwK8BLyb5FoNNq/uAvcD+\nJHcDxxicEURVHUqyHzgEnALumbnNfUlaZ7xYbJVM87CGtY1WmzRrJjEcJEmaYYaAJDXMEFiCNxuT\ntJ55TGAJ62Fs29pGq02aNR4TkCStiCEgSQ0zBKRFDB8P8liQ1qux7x0krVcLC8c4c+xgYcG7nmt9\nck9AkhpmCEhSwwwBSWqYISBJDWs6BDz7Q1Lrmj47yLM/JLWu6T0BSWqdISCNyWFFzbKmh4Ok1eCw\nomaZewKS1DBDQJIaZghIUsMMAUlqmCEgSQ1b8xBIckuS7yZ5Ock/X+v1S2vF51NrFqxpCCS5BPi3\nwM3Ae4APJfnp1V6P522Poz/pAtaNN04dHbwG73/Y8L/Xd7xjbi1LXHX9fn/SJYxl1usfxVrvCWwH\njlTVsao6BTwM3LbaKxn+5VvsF0+L6U+6gOYM/3t97bWFSZczlln/Izrr9Y9irS8W2wQcH3r/KoNg\nWJGq4qtf/Srf//73AXjXu97FjTfeuDoVSlJDZvKK4eeee44dO3a8qe3UqVNs2DCTP460LHNzW87u\n2W7ceBUnTx5dlWWt9vJ+93c/P9ayVttq9tt6lKpau5Ul7wfmq+qW7v0uoKpq7znzrV1RkrSOVNWK\n7l2y1iHwFuAl4IPAnwMHgQ9V1eE1K0KSdNaajp9U1f9L8pvAEwwOSj9gAEjS5KzpnoAkabpM1RXD\ns34hWZKjSf4kybeSHJx0PUtJ8kCShSQvDLVdkeSJJC8leTzJ5ZOs8UIWqX9PkleTfLN73TLJGheT\nZHOSp5J8J8mLST7etc9E/5+n/t/q2mel/38kyTPd7+qLSfZ07bPS/4vVv+L+n5o9ge5CspcZHC/4\nM+BZYGdVfXeiha1Akj8F/kZVvTbpWpYjyY3A94CHquq9Xdte4H9W1e90QXxFVe2aZJ2LWaT+PcD/\nqap/M9HilpBkDpirqueT/BjwHINrZj7CDPT/Ber/FWag/wGSvL2q/qo7VvlfgI8D/5AZ6H9YtP5b\nWWH/T9OewJpcSHaRhenq0wuqqq8D5wbWbcC+bnofcPuaFrUCi9QPg/8PU62qTlbV893094DDwGZm\npP8XqX9T9/HU9z9AVf1VN/kjDI6PFjPS/7Bo/bDC/p+mP1jnu5Bs0yLzTqsCvpbk2SQfm3QxI7qy\nqhZg8IsOXDnhekbxm0meT/L707o7PyzJFuB9wDeAjbPW/0P1P9M1zUT/J7kkybeAk8DXqupZZqj/\nF6kfVtj/0xQC68EHquoG4JeAe7vhilk3HeOFy3c/cHVVvY/BL8dUD0t0QylfBj7RbVGf299T3f/n\nqX9m+r+qflBVP8dgD2x7kvcwQ/1/nvqvZ4T+n6YQOAG8e+j95q5tZlTVn3f//UvgjxjhlhhTYCHJ\nRjg77vsXE65nRarqL+uNA13/Dvhbk6znQpJsYPAH9A+r6pGueWb6/3z1z1L/n1FV/5vBTbNuYYb6\n/4zh+kfp/2kKgWeBa5JcleRSYCdwYMI1LVuSt3dbRST5UeAm4NuTrWpZwpvHEA8Ad3XTdwKPnPuF\nKfOm+rtf3DP+AdP9/+APgENV9dmhtlnq/x+qf1b6P8lPnBkqSfI24BcZHNeYif5fpP7vjtL/U3N2\nEAxOEQU+yxsXkn1qwiUtW5KtDLb+i8FBmi9Me/1Jvgj0gHcCC8Ae4D8BXwJ+EjgG3FFV/2tSNV7I\nIvX/PQbj0z8AjgL/+MwY7zRJ8gHgPwMv8sb9pu9jcBX9fqa8/y9Q/68yG/3/MwwO/F7Svf5DVf3L\nJO9gNvp/sfofYoX9P1UhIElaW9M0HCRJWmOGgCQ1zBCQpIYZApLUMENAkhpmCEhSwwwBSWqYISBJ\nDfv/d11AEyefjAAAAAAASUVORK5CYII=\n", 104 | "text/plain": [ 105 | "" 106 | ] 107 | }, 108 | "metadata": {}, 109 | "output_type": "display_data" 110 | } 111 | ], 112 | "source": [ 113 | "hist(data.T[0],100);" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Used Anomaly Detector\n", 121 | "Create an anomaly detector using as first argument the P_PoissonOneside statistical model, that is, we use a Poisson distribution for modelling data, but we only care of anomalous large frequencies. The second argument is an array containg the column index used as input to the statsitical model where 0 is the column index of frequencies and 1 is the column index of the period in the data object:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 13, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "anomaly_detector = pyisc.AnomalyDetector(\n", 133 | " pyisc.P_PoissonOnesided(frequency_column=0,period_column=1)\n", 134 | ")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Train the anomaly detector:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 14, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "1000 loops, best of 3: 1.54 ms per loop\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "%timeit anomaly_detector.fit(data);" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "Compute the anomaly scores for each data point:" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 17, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "scores = anomaly_detector.anomaly_score(data)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Anomaly Scores\n", 186 | "Now we can print some example of normal frequencies vs. anomaly scores:" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 18, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "(12, 1.1745941638946533)\n", 201 | "(7, 0.0)\n", 202 | "(16, 2.9855577945709229)\n", 203 | "(9, 0.0)\n", 204 | "(17, 3.570124626159668)\n", 205 | "(11, 0.28747570514678955)\n", 206 | "(10, 0.0)\n", 207 | "(9, 0.0)\n", 208 | "(15, 2.4521820545196533)\n", 209 | "(11, 0.28747570514678955)\n", 210 | "(18, 4.2041616439819336)\n", 211 | "(11, 0.28747570514678955)\n", 212 | "(8, 0.0)\n", 213 | "(9, 0.0)\n", 214 | "(13, 1.5453963279724121)\n" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "for s in zip(freq_normal[:15], scores[:15]):\n", 220 | " print s" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "The anomalous frequencies vs. anomaly scores:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 19, 233 | "metadata": { 234 | "collapsed": false, 235 | "scrolled": true 236 | }, 237 | "outputs": [ 238 | { 239 | "name": "stdout", 240 | "output_type": "stream", 241 | "text": [ 242 | "(32, 17.405155181884766)\n", 243 | "(25, 9.8875513076782227)\n", 244 | "(23, 8.0570564270019531)\n", 245 | "(25, 9.8875513076782227)\n", 246 | "(16, 2.9855577945709229)\n", 247 | "(34, 19.836088180541992)\n", 248 | "(17, 3.570124626159668)\n", 249 | "(23, 8.0570564270019531)\n", 250 | "(25, 9.8875513076782227)\n", 251 | "(27, 11.866734504699707)\n", 252 | "(29, 13.985079765319824)\n", 253 | "(35, 21.094324111938477)\n", 254 | "(28, 12.909066200256348)\n", 255 | "(29, 13.985079765319824)\n", 256 | "(29, 13.985079765319824)\n" 257 | ] 258 | } 259 | ], 260 | "source": [ 261 | "for s in zip(freq_anomaly, scores[-15:]):\n", 262 | " print s" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "As can be seen above, the anomalous frequences also have higher anomaly scores than the normal frequencies as it should be.

This becomes even more visible if we plot the frequency (x-axis) against anomaly scores (y-axis): " 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 20, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEB9JREFUeJzt3W+MHPV9x/HPh9hNSnCQS2usxBRDKtf1GeQGHFWitjag\nxi6qBEolkphICQ8iHpSA+sguT+4ipUkhEhJSxYMEYzmVrSZBavij/DGNu0qIDBiME/vO5qpQu6XG\nF0ShXSuKVJdvH8wcPh+3e7uzuzczv32/pNXMzu2fr2Z9H//uO/PbcUQIAFB/l5RdAABgMAh0AEgE\ngQ4AiSDQASARBDoAJIJAB4BELBrottfYPmh70vYx21/Kt4/bfs32kfy2ffjlAgDa8WLnodteLWl1\nRBy1fZmklyTdJunTkloR8dDwywQALGbZYg+IiLOSzubr52yfkPSR/MceYm0AgB701EO3vVbSJknP\n55vusX3U9qO2Lx9wbQCAHnQd6Hm75XFJ90XEOUmPSLo2IjYpG8HTegGAEi3aQ5ck28skPS3pBxHx\n8AI/v1rSUxFx/QI/48tiAKCAiOiprd3tCP0xSVNzwzw/WDrrU5KOdyiqtrfx8fHSaxjV+utcO/WX\nf6t7/UUselDU9k2S7pR0zPbLkkLS/ZJ22N4k6R1JpyTdXagCAMBAdHOWy88kvW+BH/1w8OUAAIpi\npugiGo1G2SX0pc7117l2ifrLVvf6i+jqoGhfb2DHsN8DAFJjWzGkg6IAgIoj0AEgEQQ6ACSCQAeA\nRBDoAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgEQQ6ACQCAIdABJBoANAIgh0AEgE\ngQ4AiSDQASARBDoAJIJAB4BEEOgAkAgCHQASQaADQCIIdABIBIEOAIkg0AEgEQQ6AFRMq1XseQQ6\nAFRIqyVt2VLsuQQ6AFTI8ePS5GSx5xLoAFAhGzdKY2PFnuuIGGw189/AjmG/BwCkpNWSPvQhKyLc\ny/MIdACoILv3QKflAgCJINABIBGLBrrtNbYP2p60fcz2vfn2lbYP2H7F9o9sXz78cgEA7SzaQ7e9\nWtLqiDhq+zJJL0m6TdJdkt6MiAdt75S0MiJ2LfB8eugA0KOh9NAj4mxEHM3Xz0k6IWmNslDfmz9s\nr6TbeysXADBIPfXQba+VtEnSc5KujIgZKQt9SasGXRwAoHvLun1g3m55XNJ9EXHO9vw+Stu+ysTE\nxLvrjUZDjUajtyoBIAGtVjYTdONGacWKi3/WbDbVbDb7ev2uzkO3vUzS05J+EBEP59tOSGpExEze\nZ/+XiPijBZ5LDx3AyJv9jpbJyWwm6E9/+t5Qn2uY56E/JmlqNsxzT0r6Qr7+eUlP9PLGADBKZr+j\n5fx5aWqq+Pe1dNLNWS43SfqJpGPK2ioh6X5JL0j6jqSrJJ2WdEdEvL3A8xmhAxh5syP0qSlpw4bh\njNCZ+g8AS6TVutBy6RTmEoEOAMngu1wAYIQR6ACQCAIdABJBoANAIgh0AEgEgQ4AiSDQASARBDoA\nJIJAB4BEEOgAkAgCHQASQaADQCIIdABIBIEOAIkg0AEgEQQ6AHTQakmHDmXLqiPQAaCN2cvGbd2a\nLase6gQ6ALSxFBd2HiQCHQDa2Lgxu/7n8uXZhZ3HxsquqDOuKQoAHfRyYedB4iLRAJAILhINACOM\nQAeARBDoAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgEQQ6ACQCAIdABJBoANAIhYN\ndNu7bc/Y/sWcbeO2X7N9JL9tH26ZAIDFdDNC3yNp2wLbH4qIj+W3Hw64LgBAjxYN9Ih4VtJbC/yo\np+/pBQAMVz899HtsH7X9qO3LB1YRAKCQooH+iKRrI2KTpLOSHhpcSQCAIpYVeVJEvDHn7jclPdXp\n8RMTE++uNxoNNRqNIm8LAMlqNptqNpt9vUZX1xS1vVbSUxFxXX5/dUSczdf/WtLmiNjR5rlcUxQA\nelTkmqKLjtBt75fUkHSF7X+XNC7pE7Y3SXpH0ilJd/dcLQBgoLoaoff1BozQAaBnRUbozBQFgEQQ\n6ACQCAIdABJBoAOojelpaefObIn34qAogFqYnpbWr5ciJFs6eVJat67sqoaHg6IAkrV7dxbmUrbc\ns6fceqqIETqAWmCEvjhG6ABqYd26LMR37Uo/zItihA4AFcQIHQBGGIEOAIkg0AEgEQQ6ACSCQAdQ\nCa2WdOhQtkQxBDqA0rVa0pYt0tat2ZJQL4ZAB1C648elyUnp/HlpaipbR+8IdACl27hRGhuTli+X\nNmzI1tE7JhYBqIRWKxuZj41JK1aUXU35ikwsItABoIKYKQoAI4xAB4BEEOgAkAgCHcBQMWFo6RDo\nAIaGCUNLi0AHMDRMGFpaBDqAoWHC0NLiPHQAQ8WEoWKYWAQAiWBiEQCMMAIdABJBoANAIgh0AIUx\naahaCHQAhTBpqHoIdACFMGmoegh0AIUwaah6OA8dQGFMGhqeoUwssr1b0l9ImomI6/NtKyV9W9LV\nkk5JuiMi/rvN8wl0AOjRsCYW7ZG0bd62XZL+OSL+UNJBSX/Ty5sCAAZv0UCPiGclvTVv822S9ubr\neyXdPuC6AAA9KnpQdFVEzEhSRJyVtGpwJQEAihjUWS40yYGETE9LO3dmS9THsoLPm7F9ZUTM2F4t\n6VedHjwxMfHueqPRUKPRKPi2AIZtelpav16KkL7+denkSWndurKrSl+z2VSz2ezrNbo6bdH2WklP\nRcR1+f0HJP1XRDxge6eklRGxq81zOcsFqJGdO6UHH7xwf9cu6WtfK6+eUTWs0xb3S2pIukLSjKRx\nSd+T9F1JV0k6rey0xbfbPJ9AB2pk7gjdZoReFi5wAWAgpqelPXuku+4izMtCoANAIrhiEQCMMAId\nABJBoANAIgh0AEgEgQ4AiSDQASARBDoAJIJAB4BEEOgAkAgCHQASQaADiThzRvrGN7IlRlPR70MH\nUCFnzkgf/aj0m99IH/iA9MtfSh/+cNlVYakxQgcS8PTTWZhL2fL73y+3HpSDb1sEEsAIPT18fS4w\nws6cyUbmt95KmKeAQAeARPB96AAwwgh0AEgEgQ5UXKslHTqULYFOCHSgwlotacsWaevWbEmooxMC\nHaiw48elyUnp/HlpaipbB9oh0IEK27hRGhuTli+XNmzI1oF2OG0RqLhWKxuZj41JK1aUXQ2WCueh\nA0AiOA8dAEYYgQ6UaP9+ae3abAn0i6/PBUqyf790553Z+uxyx47y6kH90UMHSrJ2rXT69IX711wj\nvfpqaeWgYuihAzXy1a9efP8rXymnDqSDQAdKsmOHtG9fNjLft492C/pHywUAKoiWCwCMMAIdGIJb\nbpHsbAksFQIdGLBbbpEOHszWDx4k1LF06KEDA+YFup78CqBXS95Dt33K9s9tv2z7hX5eC0jFzTd3\nvg8MS18jdNuvSrohIt7q8BhG6Bg5s22Xm2+WfvzjsqtBHS35ty3a/jdJN0bEmx0eQ6ADQI/KOG0x\nJD1j+7DtL/b5WkAtbN6c9ck3by67EuBi/X45100R8brt31MW7Cci4tn5D5qYmHh3vdFoqNFo9Pm2\nQDk2b5ZefDFbf/HF7P7hw+XWhDQ0m001m82+XmNgZ7nYHpfUioiH5m2n5YJkcAYLlsqStlxsX2r7\nsnz9g5I+Kel40dcD6uDGGzvfB8rUTw/9SknP2n5Z0nOSnoqIA4MpC6imw4cvhPiNN9JuQbUwsQgA\nKqhIy4UrFgG5uf1xxiCoI77LBdB7D3YudPATqDoCHQASQaADQCIIdEDv7ZnTQ0cdcVAUyBHiqDsC\nHSOBM1gwCmi5IHmcwYJRQaADQCIIdABIBIGO5HEGC0YFB0VRa90e7CTEMQoYoaO2ONgJXIxAB4BE\nEOgAkAgCHbXFwU7gYhwURSVxsBPoHSN0VA4HO4FiCHQASASBDgCJINCx5OwLt4VwsBMohoOiWFIL\n9ccXCmxCHOgdI3QASASBDgCJINAxMIv1xiX648Aw0UPHQHTbG5cIcWBYGKEDQCIYoaMrXGQZqD5G\n6FhUN1Px6Y0D5WOEjoEhxIFyMUIHgEQQ6FgU7RSgHmi5oCuEOFB9jNABIBEEOgAkoq9At73d9knb\n07Z3DqooLJ1upusDqIfCgW77Ekl/L2mbpDFJn7W9flCFVUWz2Sy7hL50qr/ql3pLed/XAfXXTz8j\n9I9L+teIOB0R/yvpHyXdNpiyqqPu/yjqXH+da5eov2x1r7+IfgL9I5L+Y8791/JtPTtyRPrc57Jl\nO9u2ZSPIbds6v1Y3LYRu2wy29OUvD+61BlnXoOoHkI7SD4oeOSLdcIO0b1+2XCjUt22TDhzI1g8c\naB/q3bQQum0z1Pm1un0c55cDaXEU/C22/SeSJiJie35/l6SIiAfmPY6YAIACIqKnv6/7CfT3SXpF\n0i2SXpf0gqTPRsSJQi8IAOhL4ZmiEfF/tu+RdEBZ62Y3YQ4A5Sk8QgcAVMvQDorWfdKR7VO2f277\nZdsvlF3PYmzvtj1j+xdztq20fcD2K7Z/ZPvyMmvspE3947Zfs30kv20vs8ZObK+xfdD2pO1jtu/N\nt1f+M1ig9i/l22ux/22/3/bz+e/qMdvj+fbK73upY/097/+hjNDzSUfTyvrrZyQdlvSZiDg58Dcb\nEtuvSrohIt4qu5Zu2P5TSeckfSsirs+3PSDpzYh4MP9PdWVE7Cqzznba1D8uqRURD5VaXBdsr5a0\nOiKO2r5M0kvK5mXcpYp/Bh1q/7Tqs/8vjYhf58f2fibpXkl/qYrv+1lt6v9z9bj/hzVCT2HSkVWB\n0zq7FRHPSpr/n89tkvbm63sl3b6kRfWgTf1S9jlUXkScjYij+fo5SSckrVENPoM2tc/OKanL/v91\nvvp+ZccGQzXY97Pa1C/1uP+HFVgDm3RUopD0jO3Dtr9YdjEFrYqIGSn7pZW0quR6irjH9lHbj1b1\nT+b5bK+VtEnSc5KurNNnMKf25/NNtdj/ti+x/bKks5KeiYjDqtG+b1O/1OP+r80ItAQ3RcTHJN0q\n6a/ylkDd1e0I+COSro2ITcr+odfhT//LJD0u6b58tDt/n1f2M1ig9trs/4h4JyL+WNlfRR+3PaYa\n7fsF6t+gAvt/WIH+n5J+f879Nfm22oiI1/PlG5L+SVkbqW5mbF8pvdsn/VXJ9fQkIt6ICwd5vilp\nc5n1LMb2MmWB+A8R8US+uRafwUK1123/S1JE/I+kpqTtqsm+n2tu/UX2/7AC/bCkP7B9te3fkvQZ\nSU8O6b0Gzval+WhFtj8o6ZOSjpdbVVesi3tuT0r6Qr7+eUlPzH9CxVxUf/5LOOtTqv5n8JikqYh4\neM62unwG76m9Lvvf9u/OtiNs/7akP1N2HKAW+75N/SeL7P+hnYeen2LzsC5MOvq7obzRENi+Rtmo\nPJQdoNhX9fpt75fUkHSFpBlJ45K+J+m7kq6SdFrSHRHxdlk1dtKm/k8o6+e+I+mUpLtne6JVY/sm\nST+RdEzZv5uQdL+yGdTfUYU/gw6171AN9r/t65Qd9Lwkv307Iv7W9u+o4vte6lj/t9Tj/mdiEQAk\ngoOiAJAIAh0AEkGgA0AiCHQASASBDgCJINABIBEEOgAkgkAHgET8P6LeLy4RSm+kAAAAAElFTkSu\nQmCC\n", 282 | "text/plain": [ 283 | "" 284 | ] 285 | }, 286 | "metadata": {}, 287 | "output_type": "display_data" 288 | } 289 | ], 290 | "source": [ 291 | "plot(data.T[0], scores, '.');" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "So, depending on at what level we would consider a frequency an anomaly, we can set a threshold to decide if a frequency is anomalous." 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "We can also \"confuse\" the anomaly detector by adding more normal training data closer to the anomalous data:" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 21, 311 | "metadata": { 312 | "collapsed": false 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "data2 = np.column_stack([\n", 317 | " poisson(15).rvs(15), \n", 318 | " [1.0]*15\n", 319 | " ])\n", 320 | "anomaly_detector.fit_incrementally(data2);" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 22, 326 | "metadata": { 327 | "collapsed": false 328 | }, 329 | "outputs": [], 330 | "source": [ 331 | "scores_ = anomaly_detector.anomaly_score(data)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 23, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEACAYAAACj0I2EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFfFJREFUeJzt3X1sHPWdx/HPN4SH4ymhPUiupcFpUI7iFExJ6ElOYKm5\nFkokqh7QNq1UIrUiPBREJZRQqnNyqhsgEhXSxUVq0xROja4k0rXFKS004DaxaBNSQsgD+KB5OC6N\ni7iaGqGUQr73x+zaXnvX3h3vemZ++35J1u6OZ2a/jO0Pv3xnfjvm7gIAZN+UpAsAANQGgQ4AgSDQ\nASAQBDoABIJAB4BAEOgAEIhxA93MzjWzp81sr5m9aGZfyy9vN7PXzOz3+a+r618uAKAcG+86dDOb\nKWmmu+8ys9Ml7ZR0naTPSRpw9wfrXyYAYDxTx1vB3Y9KOpp//paZ7Zf0wfy3rY61AQCqUFUP3cya\nJLVI+l1+0e1mtsvMvm9m02pcGwCgChUHer7dsknSne7+lqROSR929xZFI3haLwCQoHF76JJkZlMl\ndUl6wt0fKvH98yQ97u4XlfgeHxYDADG4e1Vt7UpH6D+QtG94mOdPlhZ8VtKeMYrK7Fd7e3viNTRq\n/VmunfqT/8p6/XGMe1LUzFolfVHSi2b2vCSX9A1JS8ysRdJxSQcl3RyrAgBATVRylUuPpBNKfOsX\ntS8HABAXM0XHkcvlki5hQrJcf5Zrl6g/aVmvP46KTopO6A3MvN7vAQChMTN5nU6KAgBSjkAHgEAQ\n6AAQCAIdAAJBoANAIAh0AAgEgQ4AgSDQASAQBDoABIJAB4BAEOgAEAgCHQACQaADQCAIdAAIBIEO\nAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQACASBDgCBINABIBAEOgAEgkAHgEAQ6ACQ\nMgMD8bYj0AEgRQYGpEWL4m1LoANAiuzZI+3dG29bAh0AUmTePKm5Od625u61rWbkG5h5vd8DAEIy\nMCCdeabJ3a2a7Qh0AEghs+oDnZYLAASCQAeAQIwb6GZ2rpk9bWZ7zexFM7sjv/wsM3vSzF42s1+a\n2bT6lwsAKGfcHrqZzZQ00913mdnpknZKuk7SUklvuPsDZrZc0lnuvqLE9vTQAaBKdemhu/tRd9+V\nf/6WpP2SzlUU6o/kV3tE0meqKxcAUEtV9dDNrElSi6TfSprh7n1SFPqSzql1cQCAyk2tdMV8u2WT\npDvd/S0zG9lHKdtXWbly5eDzXC6nXC5XXZUAEICBgWgm6Lx50hlnFH+vu7tb3d3dE9p/Rdehm9lU\nSV2SnnD3h/LL9kvKuXtfvs/+jLt/pMS29NABNLzCZ7Ts3RvNBN26dXSoD1fP69B/IGlfIczzfibp\npvzzL0v6aTVvDACNpPAZLe++K+3bF//zWsZSyVUurZJ+I+lFRW0Vl/QNSdslPSbpQ5IOSbrR3ftL\nbM8IHUDDK4zQ9+2TLrywPiN0pv4DwCQZGBhquYwV5hKBDgDB4LNcAKCBEegAEAgCHQACQaADQCAI\ndAAIBIEOAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQACASBDgCBINABIBAEOgCMYWBA\nevbZ6DHtCHQAKKNw27jLL48e0x7qBDoAlDEZN3auJQIdAMqYNy+6/+eJJ0Y3dm5uTrqisXFPUQAY\nQzU3dq4lbhINAIHgJtEA0MAIdAAIBIEOAIEg0AEgEAQ6AASCQAeAQBDoABAIAh0AAkGgA0AgCHQA\nCASBDgCBINABIBAEOgAEYtxAN7N1ZtZnZruHLWs3s9fM7Pf5r6vrWyYAYDyVjNDXS/pUieUPuvvH\n8l+/qHFdAIAqjRvo7r5N0p9LfKuqz+kFANTXRHrot5vZLjP7vplNq1lFAIBY4gZ6p6QPu3uLpKOS\nHqxdSQCAOKbG2cjdXx/28nuSHh9r/ZUrVw4+z+VyyuVycd4WAILV3d2t7u7uCe2jonuKmlmTpMfd\n/aP51zPd/Wj++V2SFrj7kjLbck9RAKhSnHuKjjtCN7MNknKS3m9mhyW1S7rSzFokHZd0UNLNVVcL\nAKipikboE3oDRugAULU4I3RmigJAIAh0AAgEgQ4AgSDQAWRGb6+0fHn0iNE4KQogE3p7pQsukNwl\nM+mll6S5c5Ouqn44KQogWOvWRWEuRY/r1ydbTxoxQgeQCYzQx8cIHUAmzJ0bhfiKFeGHeVyM0AEg\nhRihA0ADI9ABIBAEOgAEgkAHgEAQ6ABSYWBAevbZ6BHxEOgAEjcwIC1aJF1+efRIqMdDoANI3J49\n0t690rvvSvv2Rc9RPQIdQOLmzZOam6UTT5QuvDB6juoxsQhAKgwMRCPz5mbpjDOSriZ5cSYWEegA\nkELMFAWABkagA0AgCHQACASBDqCumDA0eQh0AHXDhKHJRaADqBsmDE0uAh1A3TBhaHJxHTqAumLC\nUDxMLAKAQDCxCAAaGIEOAIEg0AEgEAQ6gNiYNJQuBDqAWJg0lD4EOoBYmDRUe5t7N6v/WH/s7Ql0\nALEwaaj2Wme16t4t98YOda5DBxAbk4Zqr/9Yv+7dcq86F3fWfmKRma2TtFhSn7tflF92lqQfSzpP\n0kFJN7r7m2W2J9ABoAoH+w9q9lmz6zKxaL2kT41YtkLSr9z9HyU9Lemeat4UAFBa/7F+relZE2vb\ncQPd3bdJ+vOIxddJeiT//BFJn4n17gCAQYV2S0dbR6zt454UPcfd+yTJ3Y9KOifmfgAAeT2He9TR\n1qHpp0yPtX2trnKhSQ4EpLdXWr48esTElbocsf9Yvzb3bi5adu3ca2OHuSRNjbldn5nNcPc+M5sp\n6U9jrbxy5crB57lcTrlcLubbAqi33l7pggskd2nNGumll6S5c5OuKtsKlyMWRt+lWivd3d3q7u6e\n0PtUdNmimTVJetzdP5p/fb+k/3P3+81suaSz3H1FmW25ygXIkOXLpQceGHq9YoW0enVy9YSiEOJ3\nt96tNT1rxm2t1OXz0M1sg6ScpPdL6pPULuknkjZK+pCkQ4ouWyx5JTyBDmTL8BG6GSP0WjrYf1Cz\nH5qtA3ceUNP0pjHX5QYXAGqit1dav15aupQwr5VUjNAnikAH0OiG98xH9tDLhTqBDgAptLl3s1pn\ntRaFd/+xfvUc7tG1c68tuQ2BDgCB4J6iANDACHQACASBDgCBINABIBAEOgAEgkAHgBIq/UCtNCHQ\nAaCEkff3LEwGap3VmnBl5XEdOgCUUe10/VpiYhHQwI4ckbq6pMWLpQ98IOlqwlHNB2rVEhOLgAZ1\n5Ig0Z450883R45EjSVcUhsL9PQ/ceUBretaM6qmnDYEOBKCrSzp2LHp+7Jj0858nW08Ihn+AVtP0\nJnW0dRT11NOIlgsQgMII/dgx6ZRTpFdfpe0yUXE+UKuW6KEDDezIkWhk/ulPE+YhINABIBCcFAWA\nBkagA0AgCHQg5QYGpGefjR6BsRDoQIoNDEiLFkmXXx49EuoYC4EOpNiePdLevdK770r79kXPgXII\ndCDF5s2TmpulE0+ULrwweg6Uw2WLQMoNDEQj8+Zm6Ywzkq4Gk4Xr0AEgEFyHDgANjEAHErRhg9TU\nFD0CE0WgAwnZsEH64helQ4eiR0K9vCWblmh33+6iZbv7dmvJpiUJVZRO9NCBhDQ1RWFeMHu29Ic/\nJFZOqu3u260rf3ilnrnpGV0046JRr0NEDx3IkG9/u/j1t76VTB1ZcNGMi/TMTc/oyh9eqY17NwYf\n5nExQgcStGGD9M1vRmG+hO7BuDbu3agbN92ox65/TDc035B0OXXFZYsAglVoszy8+GEt61oW/Aid\nlguAIA3vmd/QfMNg+2XkidJGR6ADddDWJplFjxjb5t7No+7T2X+sX5t7Nw++vm/rfUUj8kJP/b6t\n901qrWlHywWosbY26emnh15/4hPSli3J1ZN2w2/GPP2U6aNeNyp66EAKWIk/Qf4ExlYI8btb79aa\nnjUNH+ZSAoFuZgclvSnpuKS/uftlJdYh0NFQGKHHc7D/oGY/NFsH7jygpulNSZeTuCROih6XlHP3\nS0qFOdCItmyJQlwizCvVf6xfa3rW6MCdB7SmZ82onjoqM9FAtxrsAwjOli1Rm6WRw7ySk52FZYWe\nedP0JnW0dejeLfcS6jFMNIxd0lNmtsPMvlqLgoC0W7Ag6pMvWJB0JenWOqu1KJgLwd06q7VovZ7D\nPUU98+mnTFdHW4d6DvdMes1ZN9Ee+j+4+x/N7GxJT0m63d23jVjH29vbB1/ncjnlcrnY7wkkacEC\n6bnnhl7Pny/t2JFcPWnHyc7KdXd3q7u7e/D1qlWrkrvKxczaJQ24+4MjlnNSFMHgCpbqcbIznkk9\nKWpmp5rZ6fnnp0n6pKQ9cfcHZMH8+WO/RjFOdk6uifTQZ0jaZmbPS/qtpMfd/cnalAWk044dQyHe\nyO2WSk54crJz8jGxCEDVKpndubl3s1pntRb1zPuP9avncI+unXttUqVnBjNFgQkY3h/nV3Z8nPCs\nLwIdiImTnfFwwrN++PhcAJOGE57pQ6ADGFTpzZg54ZlOtFyAPHrold+MmROe9UcPHcCENdqt3tKK\nQAfKYPRdnUa6GXNacVIUKGHkFSylrmjBkN19u7Wsa5keu/4xLetaxn07M4RABzCImzFnG4EOYBA3\nY842euhoCPTQkTVxeuhT61UMMBkqDWpCHI2Algsyi5OdkZXPrNSh/kNFyw71H9LKZ1YmUxASQ6AD\nGbf0kqVavGHxYKgf6j+kxRsWa+klSxOuDJONHjoyiw/UGlII8e8u/q5u6bpFXUu6dN7085IuCxPA\nxCI0HE52Dtl2eJsWrV+krUu3auGshUmXgwliYhGCYTb0NRb3oa/QVHJXoIJD/Yd0S9ct2rp0q27p\numVUTx2NgUBH6nCyM9I6q7XoEwwLn3DYOqu1aL1Cu6VrSZcWzlqoriVdRT11NA4CHUip6adMH/xY\n2oP9B0fd4q1g/fPri3rm500/T11LurT++fVJlI0E0UNH6nCysxh3BWpM9NCRCeP1x0eGd4hhPm31\nNHVu7yxa1rm9U9NWTytaxl2BUA0CHZOq0v54yCc7JWl122rd9sRtg6Heub1Ttz1xm1a3rR5ch7sC\noVq0XDCpaKcMKYT49R+5Xpv2b9Laa9bq1stuHfw+dwVqbFyHjtQj0Ivd8NgN2rR/k67/yPXaeOPG\npMtBitBDR6IquXY89P64rTK1dLYULWvpbJGtGn1QOrd3Dob5pv2bRvXUgWoR6KiJaq4dD7k/fvHZ\nF+uF118YDPWWzha98PoLuvjsi4vWK7Rb1l6zVhtv3Ki116wt6qkDcdByQU3QShlSCPGT7CS94+/o\n4rMv1q5bdxWtM231NK1uW13UM+/c3ql7ttyjN+95c7JLRgrRQ0fdjPeZKY0Q6LbKNOfMOXrlrlcG\nl53/nfP16l9elbcX/8ee/G8n6x1/RyfZSfrrv/51sktFAOihoy4qaaeE3huXpDlnztGrf3lV53/n\nfElDYT7nzDlF67V0tgyG+Tv+zqieOlAvjNAxrkYYfVeqEOJTNEXHdXzUiH14z3zXrbtGvQYqRcsF\ndRF6oBeuQBneNim1rOCEVSfouI5riqbovfb3Ru1rZHgXQr3UvoByCHTUTcifOz78kkJv91Gvhxtv\nhA7UCj101E1WLzW0VTbqGvCRy0qNzEcul4p75u+1vzeqpw4kjUBHQygEdakJPtLo8C7VHimEeWFE\n/spdrwyGOpAGtFyQSdX0vUuFeJx1gMk06S0XM7vazF4ys14zWz6RfSEZld7qLa3GG3lL44++K2m/\nAFkQO9DNbIqkf5f0KUnNkr5gZhfUqrC06O7uTrqECRmr/rTf6m2s2ivte4/8fqnXI7et1cg85N+d\nLMh6/XFMZIR+maT/dvdD7v43Sf8p6bpqdnDF+iu07fC2omXbDm/TFeuvGHxtq0zzH55ftM78h+eP\ne6Kr1LJK1hm5rPBLUYt91bKuWtSvu0YE213J119J7QWV9L0rPflZal8TDfasBwr1Z89EAv2Dkv5n\n2OvX8ssq1tHWoasevUrrntqmL31JWvfUNl316FXqaOsYXOfSGZdqZ99OnXbHfJlJp90xXzv7durS\nGZeW3Kd93aIWwtfLDzcrWaew3qpVtdtXLeuqSf1naijU77Lodbl9VdDaqGSdWu6r0pG3VPvRN5BG\niV7lsnDWQq39+K/0lV9fpR+9vFZf+fVVWvvxX2nhrIWD6zy37Dmd+salevt9O6Xlp+jt9+3UqW9c\nqueWPVe0L2936U1FoXRPPpzeLP4DrmSdUestXFW7fdWyrlrUbyP2ZWOPesuNdittf1S7r1Xdq8ru\nq9K+d71G30Aaxb7Kxcz+SdJKd786/3qFJHf3+0esx18OAMQwaTNFzewESS9LapP0R0nbJX3B3ffH\n2iEAYEKmxt3Q3d8zs9slPamodbOOMAeA5NR9YhEAYHLU7aRo1icdmdlBM3vBzJ43s+1J1zMeM1tn\nZn1mtnvYsrPM7Ekze9nMfmlm05KscSxl6m83s9fM7Pf5r6uTrHEsZnaumT1tZnvN7EUzuyO/PPU/\ngxK1fy2/PBPH38xONrPf5f9WXzSz9vzy1B97acz6qz7+dRmh5ycd9Srqrx+RtEPS5939pZq/WZ2Y\n2R8kXeruf066lkqY2UJJb0l61N0vyi+7X9Ib7v5A/n+qZ7n7iiTrLKdM/e2SBtz9wUSLq4CZzZQ0\n0913mdnpknYqmpexVCn/GYxR++eUneN/qru/nT+31yPpDkn/opQf+4Iy9V+jKo9/vUboE550lAKm\nDH14mbtvkzTyfz7XSXok//wRSZ+Z1KKqUKZ+Kfo5pJ67H3X3Xfnnb0naL+lcZeBnUKb2wpySrBz/\nt/NPT1Z0btCVgWNfUKZ+qcrjX6/AmvCkoxRwSU+Z2Q4z+2rSxcR0jrv3SdEfraRzEq4njtvNbJeZ\nfT+t/2QeycyaJLVI+q2kGVn6GQyr/Xf5RZk4/mY2xcyel3RU0lPuvkMZOvZl6peqPP6ZGYEmoNXd\nPybp05Juy7cEsi5rZ8A7JX3Y3VsU/aJn4Z/+p0vaJOnO/Gh35DFP7c+gRO2ZOf7uftzdL1H0r6LL\nzKxZGTr2Jeq/UDGOf70C/X8lzRr2+tz8ssxw9z/mH1+X9F+K2khZ02dmM6TBPumfEq6nKu7++rDP\nXv6epAVJ1jMeM5uqKBD/w91/ml+ciZ9Bqdqzdvwlyd3/Iqlb0tXKyLEfbnj9cY5/vQJ9h6Tzzew8\nMztJ0ucl/axO71VzZnZqfrQiMztN0icl7Um2qoqYintuP5N0U/75lyX9dOQGKVNUf/6PsOCzSv/P\n4AeS9rn7Q8OWZeVnMKr2rBx/M/v7QjvCzP5O0j8rOg+QiWNfpv6X4hz/ul2Hnr/E5iENTTq6ry5v\nVAdmNlvRqNwVnaD4UdrrN7MNknKS3i+pT1K7pJ9I2ijpQ5IOSbrR3fuTqnEsZeq/UlE/97ikg5Ju\nLvRE08bMWiX9RtKLin5vXNI3FM2gfkwp/hmMUfsSZeD4m9lHFZ30nJL/+rG7d5jZ+5TyYy+NWf+j\nqvL4M7EIAALBSVEACASBDgCBINABIBAEOgAEgkAHgEAQ6AAQCAIdAAJBoANAIP4fvctOtUsuGfQA\nAAAASUVORK5CYII=\n", 344 | "text/plain": [ 345 | "" 346 | ] 347 | }, 348 | "metadata": {}, 349 | "output_type": "display_data" 350 | } 351 | ], 352 | "source": [ 353 | "figure(1);plot(data.T[0], scores, 'b.');plot(data.T[0], scores_, 'gx');" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "Above, if we compare with previous plot, we can see that the updated anomaly scores end at below 12 (green crosses) while in previous plot, the anomaly scores end at below 20 (blue dots). Thus, the anomalous data got less anomalous given the new observed data set (data_object_2)." 361 | ] 362 | } 363 | ], 364 | "metadata": { 365 | "kernelspec": { 366 | "display_name": "Python 2", 367 | "language": "python", 368 | "name": "python2" 369 | }, 370 | "language_info": { 371 | "codemirror_mode": { 372 | "name": "ipython", 373 | "version": 2 374 | }, 375 | "file_extension": ".py", 376 | "mimetype": "text/x-python", 377 | "name": "python", 378 | "nbconvert_exporter": "python", 379 | "pygments_lexer": "ipython2", 380 | "version": "2.7.11" 381 | }, 382 | "widgets": { 383 | "state": {}, 384 | "version": "1.1.1" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 0 389 | } 390 | -------------------------------------------------------------------------------- /docs/pyISC_sklearn_anomaly_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Anomaly Detection Overview" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "An anomaly detector for computing anomaly scores is constructed by providing a set of component distributions that defines the models used by the anomaly detector. Then, in order to train the anomaly detector, the $fit$ method can be called with some training data, and then compute the anomaly scores with the $anomaly\\_score$ method.\n", 15 | "Below, we show how to create and train a bivariate Gaussian distribution and how to compute anomaly scores." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/plain": [ 26 | "array([ 0.09595115, 1.07745075, 0.0999642 , 0.05291047, 0.67480946,\n", 27 | " 0.77318013])" 28 | ] 29 | }, 30 | "execution_count": 1, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pyisc\n", 38 | "\n", 39 | "# Get some data:\n", 40 | "X = np.array([[20, 4], [1200, 130], [12, 8], [27, 8], [-9, 13], [2, -6]])\n", 41 | "\n", 42 | "# Create an anomaly detector where the numbers are column indices of the data:\n", 43 | "anomaly_detector = pyisc.AnomalyDetector(\n", 44 | " pyisc.P_Gaussian([0,1])\n", 45 | ")\n", 46 | "\n", 47 | "# The anomaly detector is trained\n", 48 | "anomaly_detector.fit(X) \n", 49 | "\n", 50 | "# Then, we can compute the anomaly scores for the data:\n", 51 | "anomaly_detector.anomaly_score(X)\n", 52 | "\n", 53 | "# The result is anomaly scores (with two decimal precision):\n", 54 | "#array([ 0.10, 1.08, 0.10, 0.05, 0.67, 0.77])" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "By comparing the number pairs in the list, the second element easily stands out as the \"most anomalous\". \n", 62 | "Similarly, we can create a anomaly detector with the Gamma or Poisson distributions where the numbers are the column indices into the input data:\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 11, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "pyisc.P_Gamma(frequency_column=0,period_column=1)\n", 72 | "\n", 73 | "pyisc.P_Poisson(frequency_column=0,period_column=1);" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "In case we have more than one known class of data points, it is also possible to train the detector to make a separate model for each class. \n", 81 | "In this case, if $y$ is an array with two or more class labels, the anomaly detector can still be similarly trained and likewise compute the anomaly scores:" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 28, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "array([ 0.09595115, 1.07745081, 0.0999642 , 0.05291047, 0.67480948,\n", 93 | " 0.77318014])" 94 | ] 95 | }, 96 | "execution_count": 28, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "#Create classes (only one class)\n", 103 | "y = np.zeros(len(X))\n", 104 | "\n", 105 | "#Fit classes\n", 106 | "anomaly_detector.fit(X,y)\n", 107 | "\n", 108 | "anomaly_detector.anomaly_score(X,y)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 2", 124 | "language": "python", 125 | "name": "python2" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 2 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython2", 137 | "version": "2.7.13" 138 | }, 139 | "widgets": { 140 | "state": {}, 141 | "version": "1.1.1" 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 1 146 | } 147 | -------------------------------------------------------------------------------- /docs/pyISC_sklearn_outlier_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Outlier Detection Overview" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In a similar fashion as when we create an anomaly detector, we can create an outlier detector. \n", 15 | "The outlier detector differs from the anomaly detector since a fraction of outliers (contamination) is known beforehand and the output is a prediction of whether a data point is an outlier or not.\n", 16 | "Consequently, the outlier detector can dynamically select a threshold for deciding when a data point is an outlier or inlier from the training data. \n", 17 | "Below, we use the same data set as in previous section but we now know that there is one anomalous data point - an outlier - and five inliers in the data set." 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 4, 23 | "metadata": { 24 | "collapsed": false 25 | }, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "array([ 1, -1, 1, 1, 1, 1])" 31 | ] 32 | }, 33 | "execution_count": 4, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "import numpy as np\n", 40 | "import pyisc\n", 41 | "\n", 42 | "# Data with an outlier in element 1:\n", 43 | "X = [[20, 4], [1200, 130], [12, 8], [27, 8], [-9, 13], [2, -6]] \n", 44 | "\n", 45 | "# Create an outlier detector with the known fraction of outliers: 1 of 6:\n", 46 | "outlier_detector = pyisc.SklearnOutlierDetector(\n", 47 | " contamination=1.0/len(X),\n", 48 | " component_models=pyisc.P_Gaussian([0,1])\n", 49 | ")\n", 50 | "\n", 51 | "# The outlier detector is trained\n", 52 | "outlier_detector.fit(np.array(X)) \n", 53 | "\n", 54 | "# Then, the data is classified into being outliers or not:\n", 55 | "outlier_detector.predict(np.array(X))\n", 56 | "\n", 57 | "# The result is classification of outliers (-1) and inliers (1):\n", 58 | "#array([ 1, -1, 1, 1, 1, 1, 1])" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Thus, we are able to detect the second element as an outlier. The outlier detector follows the API used in scikit-learn for outlier detection with known contamination (see http://scikit-learn.org/stable/modules/outlier_detection.html) " 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 2", 81 | "language": "python", 82 | "name": "python2" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 2 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython2", 94 | "version": "2.7.11" 95 | }, 96 | "widgets": { 97 | "state": {}, 98 | "version": "1.1.1" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 0 103 | } 104 | -------------------------------------------------------------------------------- /docs/pyISC_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The pyISC Interactive Tutorial" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Anomaly Detection\n", 15 | "(with unknown fraction of outliers)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Anomaly Detection Overview " 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "Simple Anomaly Detection with Frequency Data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "Multivariate Anomaly Detection" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "Anomaly Detection with Classes" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Outlier Detection \n", 51 | "(with known fraction of outliers)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Outlier Detection Overview " 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Comparison of outlier detectors" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "Outlier detection using real-world data" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## Classification" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Using pyISC as Classifier" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 2", 102 | "language": "python", 103 | "name": "python2" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 2 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython2", 115 | "version": "2.7.13" 116 | }, 117 | "widgets": { 118 | "state": {}, 119 | "version": "1.1.1" 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 1 124 | } 125 | -------------------------------------------------------------------------------- /meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: pyisc 3 | version: "develop" 4 | 5 | source: 6 | path: . 7 | 8 | build: 9 | detect_binary_files_with_prefix: True 10 | 11 | 12 | requirements: 13 | build: 14 | - swig 15 | - python {{python}} 16 | - numpy 17 | - requests 18 | - setuptools 19 | - mingw [win] 20 | - libpython 1.0 [win] 21 | - mkl 2017.0.3 [win] 22 | 23 | run: 24 | - libcxx [not win] 25 | - python 26 | - numpy 27 | - scipy 28 | - scikit-learn 29 | - swig 30 | - libpython 1.0 [win] 31 | - mkl 2017.0.3 [win] 32 | 33 | 34 | test: 35 | imports: 36 | - pyisc 37 | 38 | about: 39 | home: https//www.sics.se 40 | license: LGPLv3 41 | license_file: LICENSE 42 | -------------------------------------------------------------------------------- /pyisc.i: -------------------------------------------------------------------------------- 1 |  2 | %module pyisc 3 | 4 | %{ 5 | #define SWIG_FILE_WITH_INIT 6 | 7 | /* Includes the header in the wrapper code */ 8 | #include "dataformat/format.hh" 9 | #include "dataformat/formatbinary.hh" 10 | #include "dataformat/formatcont.hh" 11 | #include "dataformat/formatdiscr.hh" 12 | #include "dataformat/formatsymbol.hh" 13 | #include "dataformat/dynindvector.hh" 14 | #include "dataformat/data.hh" 15 | #include "isc2/isc_exportimport.hh" 16 | #include "isc2/isc_micromodel.hh" 17 | #include "isc2/hmatrix.hh" 18 | #include "isc2/isc_micromodel_gaussian.hh" 19 | #include "isc2/isc_component.hh" 20 | #include "isc2/isc_mixture.hh" 21 | #include "isc2/gamma.hh" 22 | #include "isc2/isc_micromodel_multigaussian.hh" 23 | #include "isc2/hgf.hh" 24 | #include "isc2/isc_micromodel_poissongamma.hh" 25 | #include "isc2/isc_micromodel_markovgaussian.hh" 26 | //#include "isc2/isc_micromodel_multidirichlet.hh" 27 | #include "isc2/anomalydetector.hh" 28 | #include "src/_Format.hh" 29 | #include "src/_DataObject.hh" 30 | #include "src/_AnomalyDetector.hh" 31 | #include "src/_JSonExporter.hh" 32 | #include "src/_JSonImporter.hh" 33 | 34 | %} 35 | %include 36 | %include "numpy.i" 37 | %include 38 | %include 39 | %init %{ 40 | import_array(); 41 | %} 42 | 43 | 44 | 45 | 46 | 47 | %inline %{ 48 | /* Create any sort of [size] array */ 49 | 50 | int *_int_array(int size) { 51 | return (int *) new int[size]; 52 | } 53 | 54 | int **_int_pointer_array(int size) { 55 | return (int **) new int*[size]; 56 | } 57 | 58 | void _free_array_int_pointer(int** array, int length) { 59 | delete [] array; 60 | } 61 | 62 | void _set_int_array(int** array2D, int index, int*array1D) { 63 | array2D[index] = array1D; 64 | } 65 | 66 | void _set_array_value(int *array1, int index, int val) { 67 | array1[index] = val; 68 | } 69 | 70 | int _get_array_value(int *array1, int index) { 71 | return array1[index]; 72 | } 73 | 74 | double* _double_array(int size) { 75 | return (double*) new double[size]; 76 | } 77 | 78 | int _get_int_value(int *array1, int index) { 79 | return array1[index]; 80 | } 81 | 82 | 83 | intfloat* _intfloat_array(int size) { 84 | return (intfloat*) new intfloat[size]; 85 | } 86 | 87 | void _free_array_intfloat(intfloat* array) { 88 | delete [] array; 89 | } 90 | 91 | float _get_intfloat_value(intfloat *array1, int index) { 92 | return (float) array1[index]; 93 | } 94 | 95 | 96 | double _get_double_value(double* array1, int index) { 97 | 98 | return array1[index]; 99 | } 100 | 101 | double _set_double_value(double* array1, int index, double value) { 102 | array1[index] = value; 103 | } 104 | 105 | double* _to_cpp_array(double* IN_ARRAY1, int DIM1) { 106 | double* out_array = new double[DIM1]; 107 | for(int i=0; i < DIM1; i++) { 108 | out_array[i] = IN_ARRAY1[i]; 109 | } 110 | 111 | return out_array; 112 | } 113 | 114 | void _free_array_double(double* array) { 115 | delete [] array; 116 | } 117 | 118 | int* _to_cpp_array_int(int* IN_ARRAY1, int DIM1) { 119 | int* out_array = new int[DIM1]; 120 | for(int i=0; i < DIM1; i++) { 121 | out_array[i] = IN_ARRAY1[i]; 122 | } 123 | 124 | return out_array; 125 | } 126 | 127 | void _free_array_int(int* array) { 128 | delete [] array; 129 | } 130 | 131 | void _to_numpy_array_double(double* inarray, double* ARGOUT_ARRAY1, int DIM1) { 132 | for(int i=0; i < DIM1; i++) { 133 | ARGOUT_ARRAY1[i] = inarray[i]; 134 | } 135 | } 136 | 137 | void _to_numpy_array_int(int* inarray, int* ARGOUT_ARRAY1, int DIM1) { 138 | for(int i=0; i < DIM1; i++) { 139 | ARGOUT_ARRAY1[i] = inarray[i]; 140 | } 141 | } 142 | 143 | 144 | char* _get_string_value(char** strings, int i) { 145 | return strings[i]; 146 | } 147 | 148 | 149 | IscMarkovGaussMicroModel** _to_pointer(std::vector vec) { 150 | IscMarkovGaussMicroModel** new_vec = new IscMarkovGaussMicroModel*[vec.size()]; 151 | for(int i=0; i < vec.size(); i++) { 152 | new_vec[i] = vec[i]; 153 | } 154 | return new_vec; 155 | } 156 | 157 | void _free_pointer(IscMarkovGaussMicroModel** new_vec) { 158 | delete [] new_vec; 159 | } 160 | 161 | 162 | %} 163 | 164 | %apply (double* IN_ARRAY1, int DIM1) {(double* in_array1D, int num_of_columns)} 165 | %apply (double* IN_ARRAY2, int DIM1, int DIM2) {(double* in_array2D, int num_of_rows, int num_of_columns)} 166 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* deviations, int deviations_length)} 167 | %apply (int* ARGOUT_ARRAY1, int DIM1) {(int* class_ids, int class_ids_length)} 168 | %apply (int* ARGOUT_ARRAY1, int DIM1) {(int* cluster_ids, int cluster_ids_length)} 169 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* out_1DArray, int num_of_elements)} 170 | %apply (double* ARGOUT_ARRAY1, int DIM1) {(double* logp, int size)} 171 | 172 | /* Parse the header file to generate wrappers */ 173 | 174 | enum IscCombinationRule {IscMin, IscMax, IscPlus}; 175 | 176 | 177 | %ignore IscCombinationRule; 178 | %ignore IscMin; 179 | %ignore IscMax; 180 | %ignore IscPlus; 181 | 182 | %rename ("_%s", regexmatch$name="^Isc") ""; 183 | 184 | %include "isc2/isc_exportimport.hh" 185 | %include "src/_Format.hh" 186 | %include "src/_DataObject.hh" 187 | %include "src/_AnomalyDetector.hh" 188 | %include "src/_JSonExporter.hh" 189 | %include "src/_JSonImporter.hh" 190 | 191 | %include "isc2/isc_component.hh" 192 | %include "isc2/isc_micromodel.hh" 193 | %include "isc2/isc_micromodel_multigaussian.hh" 194 | %include "isc2/isc_micromodel_poissongamma.hh" 195 | %include "isc2/isc_micromodel_markovgaussian.hh" 196 | 197 | 198 | #%include "isc2/isc_micromodel_multidirichlet.hh" 199 | %template(_IscMicroModelVector) std::vector; 200 | %template(_IscMarkovGaussMicroModelVector) std::vector; 201 | 202 | 203 | %pythoncode %{ 204 | from _pyisc_modules.BaseISC import * 205 | from _pyisc_modules.AnomalyDetector import * 206 | from _pyisc_modules.DataObject import * 207 | from _pyisc_modules.SklearnClassifier import * 208 | from _pyisc_modules.SklearnOutlierDetector import * 209 | from _pyisc_modules.AnomalyClustering import * 210 | from _pyisc_modules.OutlierClustering import * 211 | from numpy import array, dtype, double 212 | 213 | 214 | def _to_numpy_array(inarray, n, type=double): 215 | if type == double: 216 | return _to_numpy_array_double(inarray,n); 217 | elif type == int: 218 | return _to_numpy_array_int(inarray,n); 219 | print ("Unknown type ", type) 220 | 221 | %} 222 | 223 | 224 | 225 | %extend pyisc::_DataObject { 226 | intfloat* _DataObject::__getitem__(int i) { 227 | return _get_intfloat(i); 228 | } 229 | } 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup,Extension 4 | 5 | from numpy.distutils.misc_util import get_numpy_include_dirs 6 | from distutils.sysconfig import get_python_lib; 7 | 8 | 9 | ''' 10 | In order to create a source distribution run setup build_ext sdist, otherwise the pyisc.py will not be generated from 11 | pyisc.i, which is not distributed in the source distribution, only the generated sources are distributed. 12 | ''' 13 | 14 | ######## import numpy.i ########## 15 | # Import numpy.i from current version. 16 | # See http://stackoverflow.com/questions/21855775/numpy-i-is-missing-what-is-the-recommended-way-to-install-it 17 | 18 | 19 | np_file_name = 'numpy.i' 20 | 21 | if not os.path.exists(np_file_name): 22 | import re 23 | import requests 24 | import numpy 25 | 26 | np_version = re.compile(r'(?P[0-9]+)\.' 27 | '(?P[0-9]+)') \ 28 | .search(numpy.__version__) 29 | np_version_string = np_version.group() 30 | np_version_info = {key: int(value) 31 | for key, value in list(np_version.groupdict().items())} 32 | 33 | 34 | np_file_url = 'https://raw.githubusercontent.com/numpy/numpy/maintenance/' + \ 35 | np_version_string + '.x/tools/swig/' + np_file_name 36 | if(np_version_info['MAJOR'] == 1 and np_version_info['MINOR'] < 9): 37 | np_file_url = np_file_url.replace('tools', 'doc') 38 | 39 | chunk_size = 8196 40 | with open(np_file_name, 'wb') as file: 41 | for chunk in requests.get(np_file_url, 42 | stream=True).iter_content(chunk_size): 43 | file.write(chunk) 44 | 45 | ###### END numpy.i import ####### 46 | 47 | extra_flags = [] 48 | 49 | disc_dir = "." 50 | 51 | arduinojson_dir = os.path.join("ArduinoJson","src") 52 | dataframe_src_dir = os.path.join(disc_dir,'dataformat') 53 | isc_src_dir = os.path.join(disc_dir, 'isc2') 54 | pyisc_src_dir = "src" 55 | pyisc_module_dir = "_pyisc_modules" 56 | isclibraries = ["-Wall", "-O"] 57 | 58 | numpyincdir = get_numpy_include_dirs() 59 | 60 | py_modules = [ 61 | os.path.join(pyisc_module_dir, src) for src in 62 | ["__init__", 63 | "BaseISC", 64 | "AnomalyDetector", 65 | "DataObject", 66 | "SklearnOutlierDetector", 67 | "SklearnClassifier", 68 | "AnomalyClustering", 69 | "OutlierClustering", 70 | ] 71 | ]\ 72 | +["pyisc"] 73 | 74 | 75 | pylib = get_python_lib() 76 | 77 | # Must be updated if file structure has changed 78 | if "uninstall" in sys.argv: 79 | 80 | from glob import glob 81 | files = [os.path.join(pylib, mod)+".py" for mod in py_modules] + \ 82 | [os.path.join(pylib, mod)+".pyc" for mod in py_modules] + \ 83 | [os.path.join(pylib,pyisc_module_dir)] + \ 84 | [os.path.join(pylib, "pyisc-1.0-py2.7.egg-info")] + \ 85 | glob(os.path.os.path.join(pylib, "_pyisc.*")) 86 | 87 | 88 | for file in files: 89 | if os.path.exists(file): 90 | if os.path.isdir(file): 91 | os.removedirs(file) 92 | else: 93 | os.remove(file) 94 | print("removing "+file) 95 | 96 | sys.exit() 97 | 98 | #add extra flags as needed, look in file our-g++ 99 | 100 | if sys.platform == 'darwin': 101 | isclibraries += ["z"] 102 | extra_flags = ["-DPLATFORM_MAC"] 103 | elif sys.platform == "win32": 104 | extra_flags = ["-DPLATFORM_MSW"] 105 | else: # Default, works for Linux 106 | isclibraries += ["z"] 107 | extra_flags = ["-Wmissing-declarations","-DUSE_WCHAR -DPLATFORM_GTK"] 108 | 109 | #extra_flags += ['-std=c++11'] 110 | 111 | dataframe_sources = [os.path.join(dataframe_src_dir, src) 112 | for src in "readtokens.o table.o format.o formatdispatch.o formatbinary.o " \ 113 | "formatdiscr.o formatcont.o formatsymbol.o formattime.o formatunknown.o " \ 114 | "data.o datafile.o datadispatch.o".replace(".o", ".cc").split()] 115 | 116 | isc_sources = [os.path.join(isc_src_dir, src) 117 | for src in "anomalydetector.o isc_mixture.o isc_component.o isc_micromodel_poissongamma.o " \ 118 | "isc_micromodel_gaussian.o isc_micromodel_multigaussian.o " \ 119 | "isc_micromodel_markovgaussian.o " \ 120 | "hmatrix.o gamma.o hgf.o" 121 | .replace(".o", ".cc").split()] 122 | 123 | pyisc_sources = [os.path.join(pyisc_src_dir, src) for src in ["_Format.cc", "_DataObject.cc", "_AnomalyDetector.cc", "_JSonExporter.cc", "_JSonImporter.cc", "mystring.cc"]] 124 | pyisc_headers = [s.replace(".cc", ".hh") for s in pyisc_sources] 125 | 126 | # Only run when creating the distribution, not when installing it on someone else computer. Removes dependency on Swig 127 | if os.path.exists('pyisc.i'): 128 | setup(name="pyisc", 129 | author="Tomas Olsson", 130 | author_email="tomas.olsson@ri.se", 131 | url="http://www.sics.se", 132 | version="1.0", 133 | ext_modules=[ 134 | Extension("_pyisc", 135 | language='c++', 136 | sources=["pyisc.i"]+dataframe_sources+isc_sources+pyisc_sources, 137 | include_dirs=[disc_dir, isc_src_dir, dataframe_src_dir, pyisc_src_dir, arduinojson_dir]+numpyincdir, 138 | extra_compile_args=extra_flags, 139 | swig_opts=['-c++','-I'+str(disc_dir)]) 140 | ], 141 | license="LGPLv3", 142 | classifiers=[ 143 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)' 144 | ] 145 | ) 146 | 147 | # The following overlapping setup is only run in order to inlcude pyisc.py when all *.py files are copied to the same folder. 148 | setup(name="pyisc", 149 | author="Tomas Olsson", 150 | author_email="tomas.olsson@ri.se", 151 | url="http://www.sics.se", 152 | version="1.0", 153 | ext_modules=[ 154 | Extension("_pyisc", 155 | language='c++', 156 | sources=["pyisc.i"]+dataframe_sources+isc_sources+pyisc_sources, 157 | include_dirs=[disc_dir, isc_src_dir,dataframe_src_dir,pyisc_src_dir, arduinojson_dir]+numpyincdir, 158 | extra_compile_args=extra_flags, 159 | swig_opts=['-c++', '-I'+str(disc_dir)]) 160 | ], 161 | py_modules=py_modules, 162 | license="LGPLv3+", 163 | classifiers=[ 164 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)' 165 | ] 166 | ) 167 | 168 | 169 | -------------------------------------------------------------------------------- /setup2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from distutils.sysconfig import get_python_lib; 4 | pylib = get_python_lib() 5 | 6 | for file in os.listdir('.'): 7 | if not file.startswith('setup'): 8 | print("copy", file, "to", os.path.join(pylib, file)) 9 | if os.path.isdir(file): 10 | dst = os.path.join(pylib, file) 11 | if os.path.exists(dst): 12 | shutil.rmtree(dst) 13 | shutil.copytree(file,dst) 14 | else: 15 | shutil.copy(file,pylib) 16 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/src/.DS_Store -------------------------------------------------------------------------------- /src/_AnomalyDetector.cc: -------------------------------------------------------------------------------- 1 | /* 2 | -------------------------------------------------------------------------- 3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB 4 | 5 | Main author: Tomas Olsson 6 | 7 | This code is free software: you can redistribute it and/or modify it 8 | under the terms of the GNU Lesser General Public License as published 9 | by the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This code is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with this code. If not, see . 19 | -------------------------------------------------------------------------- 20 | */ 21 | 22 | #include "_AnomalyDetector.hh" 23 | #include 24 | #ifdef WIN32 25 | #define _USE_MATH_DEFINES 26 | #include 27 | #endif 28 | 29 | 30 | /** 31 | * This is a function used to create a micro model for a given mixture component 32 | * 33 | * co is the creating object, that is, the inner anomaly detector. 34 | */ 35 | ::IscMicroModel *inner_create_micro_model(const void* co, int mixtureCompIndex) 36 | { 37 | return ((pyisc::_AnomalyDetector*)co)->_CreateMixtureComponet(mixtureCompIndex); 38 | } 39 | 40 | namespace pyisc { 41 | 42 | ::IscMicroModel *_AnomalyDetector::_CreateMixtureComponet(int mixtureComponentIndex) { 43 | return this->component_distribution_creators[mixtureComponentIndex]->create(); 44 | } 45 | 46 | 47 | _AnomalyDetector::_AnomalyDetector( 48 | int off, int splt, double th, 49 | int cl, ::IscCombinationRule cr, 50 | std::vector component_distribution_creators) : 51 | ::AnomalyDetector(component_distribution_creators.size(),off,splt,th,cl,cr, inner_create_micro_model) { 52 | 53 | for(int i=0; i < component_distribution_creators.size(); i++) { 54 | this->component_distribution_creators.push_back(component_distribution_creators[i]->create()); 55 | } 56 | if(DEBUG) 57 | printf("_AnomalyDetector created\n"); 58 | } 59 | 60 | 61 | void _AnomalyDetector::importModel(IscAbstractModelImporter *importer) { 62 | if(DEBUG) 63 | printf("_AnomalyDetector calling importer\n"); 64 | 65 | IscAbstractModelImporter *innerImporter = importer->getModelImporter("AnomalyDetector"); 66 | 67 | if(DEBUG) 68 | printf("_AnomalyDetector importer cannot reach this far \n"); 69 | 70 | ::AnomalyDetector::importModel(innerImporter); 71 | 72 | delete innerImporter; 73 | 74 | if(DEBUG) 75 | printf("_AnomalyDetector imported\n"); 76 | } 77 | 78 | 79 | void _AnomalyDetector::exportModel(IscAbstractModelExporter *exporter) { 80 | IscAbstractModelExporter *innerExporter = exporter->createModelExporter("AnomalyDetector"); 81 | ::AnomalyDetector::exportModel(innerExporter); 82 | delete innerExporter; 83 | 84 | if(DEBUG) 85 | printf("_AnomalyDetector exported\n"); 86 | 87 | } 88 | 89 | _AnomalyDetector::~_AnomalyDetector() { 90 | if(DEBUG) 91 | printf("_AnomalyDetector deletion started\n"); 92 | 93 | for(int i=0; i < this->component_distribution_creators.size(); i++) { 94 | delete this->component_distribution_creators[i]; 95 | } 96 | 97 | if(DEBUG) 98 | printf("_AnomalyDetector deleted\n"); 99 | } 100 | 101 | 102 | void _AnomalyDetector::_SetParams(int off, int splt, double th, int cl) { 103 | ::AnomalyDetector::SetParams(off,splt,th,cl); 104 | } 105 | 106 | void _AnomalyDetector::_Reset() { 107 | ::AnomalyDetector::Reset(); 108 | } 109 | 110 | void _AnomalyDetector::_TrainOne(Format* format, double* in_array1D, int num_of_columns) { 111 | intfloat* vec = new intfloat[num_of_columns]; 112 | for (int j = 0; j < num_of_columns; j++) { 113 | if (format->get_isc_format()->nth(j)->type() == FORMATSPEC_DISCR) { 114 | vec[j].i = (int) in_array1D[j]; 115 | } else if (format->get_isc_format()->nth(j)->type() 116 | == FORMATSPEC_CONT) { 117 | vec[j].f = (float) in_array1D[j]; 118 | } 119 | } 120 | ::AnomalyDetector::TrainOne(vec); 121 | 122 | delete [] vec; 123 | } 124 | 125 | void _AnomalyDetector::_UntrainOne(Format* format, double* in_array1D, int num_of_columns) { 126 | intfloat* vec = new intfloat[num_of_columns]; 127 | for (int j = 0; j < num_of_columns; j++) { 128 | if (format->get_isc_format()->nth(j)->type() == FORMATSPEC_DISCR) { 129 | vec[j].i = (int) in_array1D[j]; 130 | } else if (format->get_isc_format()->nth(j)->type() 131 | == FORMATSPEC_CONT) { 132 | vec[j].f = (float) in_array1D[j]; 133 | } 134 | } 135 | ::AnomalyDetector::UntrainOne(vec); 136 | 137 | delete [] vec; 138 | } 139 | 140 | void _AnomalyDetector::_TrainDataIncrementally(pyisc::_DataObject* d) { 141 | for(int i=0; i < d->size(); i++) { 142 | ::AnomalyDetector::TrainOne((*d->get_isc_data_object())[i]); 143 | } 144 | 145 | } 146 | 147 | void _AnomalyDetector::_UntrainDataIncrementally(pyisc::_DataObject* d) { 148 | for(int i=0; i < d->size(); i++) { 149 | ::AnomalyDetector::UntrainOne((*d->get_isc_data_object())[i]); 150 | } 151 | 152 | } 153 | 154 | void _AnomalyDetector::_TrainData(_DataObject* d) { 155 | ::AnomalyDetector::TrainData(d->get_isc_data_object()); 156 | } 157 | 158 | void _AnomalyDetector::_CalcAnomaly(class _DataObject* d, double* deviations, int deviantions_length) { 159 | if( deviantions_length != d->size()) { 160 | printf("Wrong deviations lengths"); 161 | } 162 | ::AnomalyDetector::CalcAnomaly(d->get_isc_data_object(), deviations); 163 | } 164 | 165 | void _AnomalyDetector::_ClassifyData(class _DataObject* d, int* class_ids, int class_ids_length, 166 | int* cluster_ids, int cluster_ids_length) { 167 | if( class_ids_length != d->size() && cluster_ids_length != d->size()) { 168 | printf("Wrong number of classes or clusters"); 169 | } 170 | 171 | ::AnomalyDetector::ClassifyData(d->get_isc_data_object(), class_ids, cluster_ids); 172 | 173 | } 174 | 175 | int _AnomalyDetector::_CalcAnomalyDetails(union intfloat* vec, 176 | double* anom, int* cla, int* clu, double* devs, union intfloat* peak, 177 | union intfloat* min, union intfloat* max, double* expect, double* var) { 178 | return ::AnomalyDetector::CalcAnomalyDetails(vec, *anom, *cla, *clu, devs, peak, min, max, expect, var); 179 | } 180 | 181 | void _AnomalyDetector::_LogProbabilityOfData(class _DataObject* data, double* logp, int size) { 182 | ::DataObject *d = data->get_isc_data_object(); 183 | int i, id = -1; 184 | intfloat* vec; 185 | int n = d->size(); 186 | double min_logp=HUGE_VALF; 187 | for (i=0; ilogp(vec+offset, id); 192 | if(logp[i] < min_logp) { 193 | min_logp = logp[i]; 194 | } 195 | } 196 | } 197 | 198 | /* 199 | int AnomalyDetector::CalcAnomalyDetailsSingle(union intfloat* vec, 200 | int mmind, int cla, int clu, double* devs, union intfloat* peak, 201 | union intfloat* min, union intfloat* max, double* expect, double* var) { 202 | }*/ 203 | 204 | 205 | 206 | 207 | 208 | void _AnomalyDetector::_CalcAnomalyDetailPerformanceTest(pyisc::_DataObject* d) { 209 | ::DataObject* data = d->get_isc_data_object(); 210 | double* expect2 = new double[d->length()]; 211 | double dum3; 212 | int dum1, dum2; 213 | 214 | double *devs = new double[::AnomalyDetector::len]; 215 | 216 | for(int i=0; i < d->size(); i++) { 217 | ::AnomalyDetector::CalcAnomalyDetails((*data)[i], dum3, dum1, dum2, devs, 218 | 0,0,0,expect2,0); 219 | } 220 | 221 | delete [] devs; 222 | delete [] expect2; 223 | 224 | } 225 | 226 | 227 | 228 | } /* namespace pyisc */ 229 | 230 | 231 | -------------------------------------------------------------------------------- /src/_AnomalyDetector.hh: -------------------------------------------------------------------------------- 1 | /* 2 | -------------------------------------------------------------------------- 3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB 4 | 5 | Main author: Tomas Olsson 6 | 7 | This code is free software: you can redistribute it and/or modify it 8 | under the terms of the GNU Lesser General Public License as published 9 | by the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This code is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with this code. If not, see . 19 | -------------------------------------------------------------------------- 20 | */ 21 | 22 | #ifndef ANOMALYDETECTOR2_HH_ 23 | #define ANOMALYDETECTOR2_HH_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "_DataObject.hh" 31 | #include 32 | #include 33 | #include "isc_exportimport.hh" 34 | 35 | 36 | namespace pyisc { 37 | 38 | 39 | class _AnomalyDetector : ::AnomalyDetector { 40 | public: 41 | 42 | _AnomalyDetector( 43 | int off, 44 | int splt, 45 | double th, 46 | int cl, 47 | ::IscCombinationRule cr, 48 | std::vector vector); 49 | /** 50 | * n is number of isc mixture models 51 | * off is the first column containing features used by the detector 52 | * splt is a the column containing a known class 53 | * th is a threshold on when to consider a vector of data as anomalous 54 | * cl is a variable if zero indicate no clustering else indicates that clustering should be done 55 | * cr is variable indicating how the anomaly scores for the different isc mixture components should be combined 56 | * cf is a function that creates a isc micro component for each of the n isc mixture component. 57 | * 58 | * 59 | * An isc micro model uses or more columns as input. 60 | * 61 | * Pattern of input data vector: (ignored columns(header), distribution components, #distribution input values per component) 62 | * 63 | */ 64 | // _AnomalyDetector(int n, int off, int splt, double th, int cl, 65 | // ::IscCombinationRule cr, ::IscCreateFunc cf); // Or a creation function for the appropriate micromodels can be used 66 | virtual ~_AnomalyDetector(); 67 | 68 | virtual void importModel(IscAbstractModelImporter *importer); 69 | virtual void exportModel(IscAbstractModelExporter *exporter); 70 | virtual void _SetParams(int off, int splt, double th, int cl); 71 | virtual void _Reset(); 72 | virtual void _TrainOne(Format* format, double* in_array1D, int num_of_columns); 73 | virtual void _UntrainOne(Format* format, double* in_array1D, int num_of_columns); 74 | virtual void _TrainData(_DataObject* d); 75 | virtual void _TrainDataIncrementally(_DataObject* d); 76 | virtual void _UntrainDataIncrementally(_DataObject* d); 77 | 78 | virtual void _CalcAnomaly(class _DataObject* d, double* deviations, int deviations_length); 79 | virtual void _ClassifyData(class _DataObject* d, int* class_ids, int class_ids_length, int* cluster_ids, int cluster_ids_length); 80 | 81 | virtual int _CalcAnomalyDetails(union intfloat* vec, double* anom, int* cla, 82 | int* clu, double* devs = 0, union intfloat* peak = 0, 83 | union intfloat* min = 0, union intfloat* max = 0, 84 | double* expect = 0, double* var = 0); 85 | /*virtual int CalcAnomalyDetailsSingle(union intfloat* vec, int mmind, 86 | int cla, int clu, double* devs = 0, union intfloat* peak = 0, 87 | union intfloat* min = 0, union intfloat* max = 0, 88 | double* expect = 0, double* var = 0);*/ 89 | 90 | virtual ::IscMicroModel *_CreateMixtureComponet(int mixtureComponentIndex); 91 | 92 | virtual ::AnomalyDetector* get_isc_anomaly_detector() {return this;}; 93 | 94 | virtual void _CalcAnomalyDetailPerformanceTest(pyisc::_DataObject* obj); 95 | 96 | virtual void _LogProbabilityOfData(class _DataObject* d, double* logp, int size); 97 | 98 | private: 99 | std::vector component_distribution_creators; 100 | }; 101 | 102 | 103 | } /* namespace pyisc */ 104 | 105 | #endif /* ANOMALYDETECTOR2_HH_ */ 106 | -------------------------------------------------------------------------------- /src/_DataObject.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * DataObject.cc 3 | * 4 | * Created on: Mar 6, 2015 5 | * Author: tol 6 | */ 7 | 8 | #include "_DataObject.hh" 9 | #include 10 | 11 | 12 | namespace pyisc { 13 | 14 | //double* _to_cpp_array(double* in_array1D, int num_of_columns) { 15 | // return in_array1D; 16 | //} 17 | 18 | void _DataObject::init(pyisc::Format* format) { 19 | is_data_obj_created = 1; 20 | is_data_format_created = 0; 21 | 22 | isc_data_obj = new ::DataObject(format->get_isc_format()); 23 | data_format = format; 24 | 25 | if(DEBUG) 26 | printf("Create _DataObject\n"); 27 | } 28 | 29 | _DataObject::_DataObject(pyisc::Format *format) { 30 | init(format); 31 | } 32 | 33 | _DataObject::_DataObject(pyisc::Format *format, double* in_array2D, int num_of_rows, 34 | int num_of_columns) { 35 | init(format); 36 | add2DArray(in_array2D, num_of_rows, num_of_columns); 37 | } 38 | 39 | _DataObject::_DataObject(const char* formatfile, const char* datafile) { 40 | isc_data_obj = new ::DataObject(formatfile,datafile); 41 | data_format = new Format(isc_data_obj->format()); 42 | is_data_obj_created = 1; 43 | is_data_format_created = 1; 44 | } 45 | _DataObject::_DataObject(::DataObject* data_object) { 46 | isc_data_obj = data_object; 47 | data_format = new Format(isc_data_obj->format()); 48 | is_data_obj_created = 0; 49 | is_data_format_created = 1; 50 | } 51 | 52 | _DataObject::~_DataObject() { 53 | if(DEBUG) { 54 | printf("Delete object"); 55 | } 56 | if (is_data_obj_created && isc_data_obj) { 57 | delete isc_data_obj; 58 | } 59 | if( is_data_format_created && data_format) { 60 | delete data_format; 61 | } 62 | } 63 | 64 | void _DataObject::add2DArray(double* in_array2D, int num_of_rows, int num_of_columns) { 65 | intfloat* vec; 66 | for (int i = 0; i < num_of_rows; i++) { 67 | vec = isc_data_obj->newentry(); 68 | _convert_to_intfloat((in_array2D+i*num_of_columns), num_of_columns, vec); 69 | } 70 | } 71 | 72 | 73 | void _DataObject::_convert_to_intfloat(double* in_array1D, int num_of_columns, intfloat* vec) { 74 | for (int j = 0; j < num_of_columns; j++) { 75 | switch(data_format->get_isc_format()->nth(j)->type()) { 76 | case FORMATSPEC_DISCR: 77 | case FORMATSPEC_SYMBOL: 78 | case FORMATSPEC_BINARY: 79 | case FORMATSPEC_UNKNOWN: 80 | case FormatSpecDatetimeType: 81 | vec[j].i = (int) in_array1D[j]; 82 | break; 83 | case FORMATSPEC_CONT: 84 | vec[j].f = (float) in_array1D[j]; 85 | break; 86 | default: 87 | printf("An unhandled isc format %i for value %f\n",data_format->get_isc_format()->nth(j)->type(), in_array1D[j]); 88 | } 89 | } 90 | } 91 | 92 | void _DataObject::add1DArray(double* in_array1D, int num_of_columns) { 93 | add2DArray(in_array1D, 1, num_of_columns); 94 | } 95 | 96 | 97 | int _DataObject::size() { 98 | return isc_data_obj->size(); 99 | } 100 | 101 | 102 | int _DataObject::length() { 103 | return isc_data_obj->length(); 104 | } 105 | 106 | Format* _DataObject::getFormat() { 107 | return data_format; 108 | } 109 | 110 | void pyisc::_DataObject::_as1DArray(double* out_1DArray, int num_of_elements) { 111 | int num_of_rows = isc_data_obj->size(); 112 | int num_of_columns = isc_data_obj->length(); 113 | if(num_of_elements != num_of_rows*num_of_columns) { 114 | printf("Wrong number of elements"); 115 | } 116 | 117 | intfloat* vec; 118 | for (int i = 0; i < num_of_rows; i++) { 119 | vec = (*isc_data_obj)[i]; 120 | _convert_to_numpyarray(vec, (out_1DArray+num_of_columns*i), num_of_columns); 121 | } 122 | } 123 | 124 | void pyisc::_DataObject::_convert_to_numpyarray(intfloat* vec, double* out_1DArray, int num_of_elements) { 125 | for (int j = 0; j < num_of_elements; j++) { 126 | switch(data_format->get_isc_format()->nth(j)->type()) { 127 | case FORMATSPEC_DISCR: 128 | case FORMATSPEC_SYMBOL: 129 | case FORMATSPEC_BINARY: 130 | case FORMATSPEC_UNKNOWN: 131 | case FormatSpecDatetimeType: 132 | out_1DArray[j] = (double) vec[j].i; 133 | break; 134 | case FORMATSPEC_CONT: 135 | out_1DArray[j] = (double) vec[j].f; 136 | break; 137 | default: 138 | printf("An unhandled isc format %i for value %i or %f\n",data_format->get_isc_format()->nth(j)->type(), vec[j].i, vec[j].f); 139 | } 140 | } 141 | } 142 | 143 | ::DataObject* _DataObject::get_isc_data_object() { 144 | return isc_data_obj; 145 | } 146 | 147 | void _DataObject::set_column_values(int column_index, double* in_array1D, int num_of_columns) { 148 | if(isc_data_obj->size() != num_of_columns) { 149 | printf("Array is not of same size as column array"); 150 | return; 151 | } 152 | for(int index=0; index < isc_data_obj->size(); index++) { 153 | switch(data_format->get_isc_format()->nth(column_index)->type()) { 154 | case FORMATSPEC_DISCR: 155 | case FORMATSPEC_SYMBOL: 156 | case FORMATSPEC_BINARY: 157 | case FORMATSPEC_UNKNOWN: 158 | case FormatSpecDatetimeType: 159 | (*isc_data_obj)[index][column_index].i = (int) in_array1D[index]; 160 | break; 161 | case FORMATSPEC_CONT: 162 | (*isc_data_obj)[index][column_index].f = (float) in_array1D[index]; 163 | break; 164 | default: 165 | printf("An unhandled isc format %i \n",data_format->get_isc_format()->nth(column_index)->type()); 166 | } 167 | } 168 | } 169 | 170 | } /* namespace pyisc */ 171 | 172 | void pyisc::_DataObject::_getRow(int row_index, double* out_1DArray, 173 | int num_of_elements) { 174 | int num_of_columns = length(); 175 | if(num_of_elements != num_of_columns) { 176 | printf("Wrong number of elements specified"); 177 | } 178 | intfloat* vec = (*isc_data_obj)[row_index]; 179 | _convert_to_numpyarray(vec, out_1DArray, num_of_columns); 180 | } 181 | 182 | intfloat* pyisc::_DataObject::_get_intfloat(int index) { 183 | return (*isc_data_obj)[index]; 184 | } 185 | 186 | 187 | -------------------------------------------------------------------------------- /src/_DataObject.hh: -------------------------------------------------------------------------------- 1 | /* 2 | -------------------------------------------------------------------------- 3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB 4 | 5 | Main author: Tomas Olsson 6 | 7 | This code is free software: you can redistribute it and/or modify it 8 | under the terms of the GNU Lesser General Public License as published 9 | by the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This code is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with this code. If not, see . 19 | -------------------------------------------------------------------------- 20 | */ 21 | 22 | #ifndef DATAOBJECT_HH_ 23 | #define DATAOBJECT_HH_ 24 | 25 | #include 26 | #include 27 | #include "_Format.hh" 28 | 29 | namespace pyisc { 30 | 31 | //extern double* _to_cpp_array(double* in_array1D, int num_of_columns); 32 | class _DataObject { 33 | int is_data_obj_created = 0; 34 | int is_data_format_created = 0; 35 | 36 | protected: 37 | pyisc::Format* data_format; 38 | ::DataObject *isc_data_obj; 39 | 40 | public: 41 | /** 42 | * Create an empty DataObject with a Format that specifies the data types of the columns in a row 43 | */ 44 | _DataObject(pyisc::Format *f); 45 | /** 46 | * Create an DataObject for the double array with a Format that specifies the data types of the columns in a row 47 | */ 48 | _DataObject(pyisc::Format *format, double* in_array2D, int num_of_rows, int num_of_columns); 49 | virtual ~_DataObject(); 50 | 51 | /** 52 | * Read isc original data object from file 53 | */ 54 | _DataObject(const char* formatfile, const char* datafile = 0); 55 | 56 | /** 57 | * Convert isc original data object to pyisc 58 | */ 59 | _DataObject(::DataObject* data_object0); 60 | /** 61 | * Add a 1D numpy array as a row to the data object 62 | */ 63 | virtual void add1DArray(double* in_array1D, int num_of_columns); 64 | 65 | 66 | /** 67 | * Add a 2D numpy array to the data object 68 | */ 69 | virtual void add2DArray(double* in_array2D, int num_of_rows, int num_of_columns); 70 | 71 | /** 72 | * Returns number of rows. 73 | */ 74 | virtual int size(); 75 | 76 | /** 77 | * Returns number of columns. 78 | */ 79 | virtual int length(); 80 | 81 | virtual Format* getFormat(); 82 | 83 | /** 84 | * Returns a 1D array representation of the data rows*cols. 85 | */ 86 | virtual void _as1DArray(double* out_1DArray, int num_of_elements); 87 | 88 | /** 89 | * Returns a single array at the given row. 90 | */ 91 | virtual void _getRow(int row_index, double* out_1DArray, int num_of_elements); 92 | 93 | virtual ::DataObject* get_isc_data_object(); 94 | 95 | /** 96 | * Takes an numpy array from swig and convert it to a provided intfloat pointer 97 | */ 98 | virtual void _convert_to_intfloat(double* in_array1D, int num_of_columns, intfloat* vec); 99 | /** 100 | * Takes an intfloat pointer from swig and convert it to a provided numpy array. 101 | */ 102 | virtual void _convert_to_numpyarray(intfloat* vec, double* ARGOUT_ARRAY1, int DIM1); 103 | 104 | virtual intfloat* _get_intfloat(int index); 105 | 106 | /** 107 | * Takes a numpy array and sets it values as the given column values. 108 | */ 109 | virtual void set_column_values(int column_index, double* in_array1D, int num_of_columns); 110 | 111 | protected: 112 | void init(pyisc::Format* format); 113 | 114 | }; 115 | 116 | } 117 | 118 | 119 | 120 | #endif /* DATAOBJECT_HH_ */ 121 | -------------------------------------------------------------------------------- /src/_Format.cc: -------------------------------------------------------------------------------- 1 | /* 2 | -------------------------------------------------------------------------- 3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB 4 | 5 | Main author: Tomas Olsson 6 | 7 | This code is free software: you can redistribute it and/or modify it 8 | under the terms of the GNU Lesser General Public License as published 9 | by the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This code is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with this code. If not, see . 19 | -------------------------------------------------------------------------- 20 | */ 21 | #include "_Format.hh" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | namespace pyisc { 29 | 30 | Format::Format() { 31 | isc_format = new ::Format(); 32 | is_format_created = 1; 33 | if(DEBUG) 34 | printf("Create isc format\n"); 35 | 36 | } 37 | 38 | Format::Format(::Format *isc_format0) { 39 | isc_format = isc_format0; 40 | 41 | } 42 | 43 | Format::~Format() { 44 | if(is_format_created && isc_format) { 45 | if(DEBUG) 46 | printf("Delete isc format\n"); 47 | delete isc_format; 48 | isc_format=0; 49 | } 50 | } 51 | 52 | void Format::addColumn(const char* name, ColumnType type) { 53 | switch(type) { 54 | case Continuous: 55 | isc_format->add(new ::FormatSpecCont(name)); 56 | break; 57 | case Discrete: 58 | isc_format->add(new ::FormatSpecDiscr(name)); 59 | break; 60 | case TIME: 61 | isc_format->add(new ::FormatSpecDatetime(name)); 62 | break; 63 | case Symbol: 64 | isc_format->add(new ::FormatSpecSymbol(name)); 65 | break; 66 | default: 67 | printf("Unknown column type %i", type); 68 | }; 69 | } 70 | 71 | void Format::add(FormatSpec* format_spec) { 72 | isc_format->add(format_spec->_isc_format->copy()); 73 | } 74 | 75 | 76 | } /* namespace pyisc */ 77 | 78 | ::Format* pyisc::Format::get_isc_format() { 79 | return isc_format; 80 | } 81 | 82 | void pyisc::Format::printColumnNames() { 83 | printf("Column names:\n"); 84 | for(int j=0; j < size(); j++) { 85 | printf(" %s Type %i\n",isc_format->nth(j)->name, isc_format->nth(j)->type()); 86 | } 87 | } 88 | 89 | int pyisc::Format::size() { 90 | return isc_format->length(); 91 | } 92 | 93 | -------------------------------------------------------------------------------- /src/_Format.hh: -------------------------------------------------------------------------------- 1 | /* 2 | -------------------------------------------------------------------------- 3 | Copyright (C) 2014, 2015, 2016 SICS Swedish ICT AB 4 | 5 | Main author: Tomas Olsson 6 | 7 | This code is free software: you can redistribute it and/or modify it 8 | under the terms of the GNU Lesser General Public License as published 9 | by the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This code is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU Lesser General Public License for more details. 16 | 17 | You should have received a copy of the GNU Lesser General Public License 18 | along with this code. If not, see . 19 | -------------------------------------------------------------------------- 20 | */ 21 | 22 | 23 | #ifndef FORMAT_HH_ 24 | #define FORMAT_HH_ 25 | 26 | #include 27 | #include 28 | 29 | #ifndef DEBUG 30 | #define DEBUG 0 31 | #endif 32 | 33 | namespace pyisc { 34 | class FormatSpec{ 35 | 36 | public: 37 | FormatSpec(::FormatSpec *isc_format_spec) {_isc_format = isc_format_spec;}; 38 | const char* get_name() {return _isc_format->name;}; 39 | const char* represent(int v) { return _isc_format->represent(intfloat(v)); }; 40 | const char* represent(float v) { return _isc_format->represent(intfloat(v)); }; 41 | int getnum() { return _isc_format->getnum(); }; 42 | void add(const char* str) {_isc_format->add(str);}; 43 | ::FormatSpec *_isc_format; 44 | }; 45 | 46 | class Format { 47 | int is_format_created = 0; 48 | 49 | protected: 50 | ::Format* isc_format; 51 | 52 | public: 53 | enum ColumnType { 54 | Discrete, 55 | Continuous, 56 | Symbol, 57 | TIME 58 | }; 59 | 60 | Format(); 61 | /** 62 | * Convert isc orginial format to pyisc format 63 | */ 64 | Format(::Format*); 65 | virtual ~Format(); 66 | 67 | /** 68 | * Add a new column to the format with name and type. 69 | */ 70 | virtual void addColumn(const char *name, ColumnType type); 71 | 72 | /** 73 | * TODO memory leak! 74 | */ 75 | virtual FormatSpec* get_nth_column(int n) {return new FormatSpec(isc_format->nth(n));}; 76 | virtual FormatSpec* nth(int n) {return get_nth_column(n);}; 77 | virtual void add(FormatSpec*); 78 | virtual int size(); 79 | 80 | virtual void printColumnNames(); 81 | 82 | virtual ::Format* get_isc_format(); 83 | }; 84 | } 85 | 86 | 87 | 88 | #endif /* FORMAT_HH_ */ 89 | -------------------------------------------------------------------------------- /src/_JSonExporter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * _JSonExporter.cc 3 | * 4 | * Created on: Feb 9, 2018 5 | * Author: tol 6 | */ 7 | 8 | #include "_JSonExporter.hh" 9 | 10 | 11 | namespace pyisc { 12 | 13 | 14 | void _JSonExporter::notImplemented(){ 15 | root[std::string("EXPORT_NOT_IMPLEMENTED")] = true; 16 | } 17 | 18 | void _JSonExporter::addParameter(const char* parameter_name, const char* value){ 19 | root[std::string(parameter_name)] = std::string(value); 20 | } 21 | 22 | void _JSonExporter::addParameter(const char* parameter_name, int value){ 23 | root[std::string(parameter_name)] = value; 24 | } 25 | void _JSonExporter::addParameter(const char* parameter_name, float value){ 26 | root[std::string(parameter_name)] = value; 27 | } 28 | void _JSonExporter::addParameter(const char* parameter_name, double value){ 29 | root[std::string(parameter_name)] = value; 30 | } 31 | void _JSonExporter::addParameter(const char* parameter_name, int *values, int length){ 32 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name)); 33 | for(int i=0; i < length;i++) { 34 | array.add(values[i]); 35 | } 36 | } 37 | void _JSonExporter::addParameter(const char* parameter_name, float *values, int length){ 38 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name)); 39 | for(int i=0; i < length;i++) { 40 | array.add(values[i]); 41 | } 42 | } 43 | void _JSonExporter::addParameter(const char* parameter_name, double *values, int length){ 44 | ArduinoJson::JsonArray& array = root.createNestedArray(std::string(parameter_name)); 45 | for(int i=0; i < length;i++) { 46 | array.add(values[i]); 47 | } 48 | } 49 | 50 | IscAbstractModelExporter* _JSonExporter::createModelExporter(const char * parameter_name) { 51 | return ( IscAbstractModelExporter*) new _JSonExporter(root.createNestedObject(std::string(parameter_name))); 52 | } 53 | IscAbstractModelExporter* _JSonExporter::createModelExporter(int parameter_id){ 54 | return ( IscAbstractModelExporter*) new _JSonExporter(root.createNestedObject(to_string(parameter_id))); 55 | } 56 | 57 | std::string _JSonExporter::getJSonString() { 58 | std::string str; 59 | root.prettyPrintTo(str); 60 | return str; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/_JSonExporter.hh: -------------------------------------------------------------------------------- 1 | /* 2 | * _JSonExporter.hh 3 | * 4 | * Created on: Feb 9, 2018 5 | * Author: tol 6 | */ 7 | 8 | #ifndef JSONEXPORTER_HH_ 9 | #define JSONEXPORTER_HH_ 10 | 11 | #include "isc_exportimport.hh" 12 | #include "ArduinoJson.hpp" 13 | #include "mystring.hh" 14 | 15 | 16 | namespace pyisc { 17 | 18 | 19 | // for convenience 20 | 21 | class _JSonExporter : ::IscAbstractModelExporter { 22 | public: 23 | _JSonExporter():root(jsonBuffer.createObject()){}; 24 | virtual ~_JSonExporter(){}; 25 | 26 | virtual void notImplemented(); 27 | 28 | virtual void addParameter(const char* parameter_name, const char* value); 29 | virtual void addParameter(const char* parameter_name, int value); 30 | virtual void addParameter(const char* parameter_name, float value); 31 | virtual void addParameter(const char* parameter_name, double value); 32 | virtual void addParameter(const char* parameter_name, int *value, int length); 33 | virtual void addParameter(const char* parameter_name, float *value, int length); 34 | virtual void addParameter(const char* parameter_name, double *value, int length); 35 | 36 | virtual IscAbstractModelExporter* createModelExporter(const char * parameter_name); 37 | virtual IscAbstractModelExporter* createModelExporter(int parameter_id); 38 | 39 | virtual std::string getJSonString(); 40 | 41 | protected: 42 | _JSonExporter(ArduinoJson::JsonObject& root):root(root){ 43 | }; 44 | 45 | private: 46 | ArduinoJson::DynamicJsonBuffer jsonBuffer; 47 | ArduinoJson::JsonObject& root; 48 | }; 49 | 50 | } 51 | 52 | 53 | #endif /* JSONEXPORTER_HH_ */ 54 | 55 | -------------------------------------------------------------------------------- /src/_JSonImporter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * _JSonImporter.cc 3 | * 4 | * Created on: 19 Feb 2018 5 | * Author: tol 6 | */ 7 | 8 | 9 | #include "_JSonImporter.hh" 10 | 11 | namespace pyisc { 12 | 13 | void _JSonImporter::notImplemented(){ 14 | printf("JSon importer not implemented\n"); 15 | } 16 | 17 | void _JSonImporter::fillParameter(const char* parameter_name, int& value){ 18 | if(DEBUG) 19 | printf("Import %s as int:%i", parameter_name, value); 20 | value = (*root)[std::string(parameter_name)]; 21 | } 22 | void _JSonImporter::fillParameter(const char* parameter_name, float& value){ 23 | if(DEBUG) 24 | printf("Import %s as float:%f", parameter_name, value); 25 | value = (*root)[std::string(parameter_name)]; 26 | } 27 | void _JSonImporter::fillParameter(const char* parameter_name, double& value){ 28 | if(DEBUG) 29 | printf("Import %s as double:%d", parameter_name, value); 30 | value = (*root)[std::string(parameter_name)]; 31 | } 32 | void _JSonImporter::fillParameter(const char* parameter_name, int *values, int length){ 33 | if(DEBUG) 34 | printf("Import %s as int array", parameter_name); 35 | 36 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)]; 37 | for(int i=0; i < length;i++) { 38 | values[i] = array[i]; 39 | } 40 | } 41 | void _JSonImporter::fillParameter(const char* parameter_name, float *values, int length){ 42 | if(DEBUG) 43 | printf("Import %s as float array", parameter_name); 44 | 45 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)]; 46 | for(int i=0; i < length;i++) { 47 | values[i] = array[i]; 48 | } 49 | } 50 | void _JSonImporter::fillParameter(const char* parameter_name, double *values, int length){ 51 | if(DEBUG) 52 | printf("Import %s as double array", parameter_name); 53 | 54 | ArduinoJson::JsonArray& array = (*root)[std::string(parameter_name)]; 55 | for(int i=0; i < length;i++) { 56 | values[i] = array[i]; 57 | } 58 | } 59 | 60 | IscAbstractModelImporter* _JSonImporter::getModelImporter(const char * parameter_name) { 61 | if(DEBUG) 62 | printf("Import %s as json object", parameter_name); 63 | 64 | ArduinoJson::JsonObject& object = (*root)[std::string(parameter_name)]; 65 | 66 | 67 | return ( IscAbstractModelImporter*) new _JSonImporter(&object); 68 | } 69 | IscAbstractModelImporter* _JSonImporter::getModelImporter(int parameter_id){ 70 | if(DEBUG) 71 | printf("Import %i as json object", parameter_id); 72 | 73 | ArduinoJson::JsonObject& object = (*root)[to_string(parameter_id)]; 74 | 75 | 76 | return ( IscAbstractModelImporter*) new _JSonImporter(&object); 77 | } 78 | 79 | } // namespace pyisc 80 | 81 | -------------------------------------------------------------------------------- /src/_JSonImporter.hh: -------------------------------------------------------------------------------- 1 | /* 2 | * _JSonImporter.hh 3 | * 4 | * Created on: 19 Feb 2018 5 | * Author: tol 6 | */ 7 | 8 | #ifndef SRC__JSONIMPORTER_HH_ 9 | #define SRC__JSONIMPORTER_HH_ 10 | 11 | #include "isc_exportimport.hh" 12 | #include "ArduinoJson.hpp" 13 | #include "mystring.hh" 14 | 15 | #ifndef DEBUG 16 | #define DEBUG 0 17 | #endif 18 | 19 | namespace pyisc { 20 | 21 | class _JSonImporter : IscAbstractModelImporter { 22 | public: 23 | _JSonImporter(){ 24 | }; 25 | virtual ~_JSonImporter(){}; 26 | virtual void notImplemented(); 27 | 28 | // Methods that sets the values to the provided data structure 29 | virtual void fillParameter(const char* parameter_name, int &value); 30 | virtual void fillParameter(const char* parameter_name, float &value); 31 | virtual void fillParameter(const char* parameter_name, double &value); 32 | 33 | virtual void fillParameter(const char* parameter_name, int *value, int length); 34 | virtual void fillParameter(const char* parameter_name, float *value, int length); 35 | virtual void fillParameter(const char* parameter_name, double *value, int length); 36 | virtual IscAbstractModelImporter* getModelImporter(const char * parameter_name); 37 | virtual IscAbstractModelImporter* getModelImporter(int parameter_id); 38 | 39 | // Return True if succeeds 40 | bool parseJSon(std::string json) { 41 | root = &jsonBuffer.parseObject(json); 42 | return root->success(); 43 | } 44 | protected: 45 | _JSonImporter(ArduinoJson::JsonObject* root):root(root){ 46 | }; 47 | 48 | private: 49 | ArduinoJson::DynamicJsonBuffer jsonBuffer; 50 | ArduinoJson::JsonObject* root; 51 | 52 | }; 53 | 54 | } // namespace pyisc 55 | 56 | 57 | 58 | 59 | 60 | #endif /* SRC__JSONIMPORTER_HH_ */ 61 | -------------------------------------------------------------------------------- /src/mystring.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * mystring.cc 3 | * 4 | * Created on: 23 Feb 2018 5 | * Author: tol 6 | */ 7 | #include "mystring.hh" 8 | 9 | std::string to_string(int i) { 10 | std::ostringstream stm ; 11 | stm << i; 12 | return stm.str(); 13 | } 14 | -------------------------------------------------------------------------------- /src/mystring.hh: -------------------------------------------------------------------------------- 1 | /* 2 | * mystring.hh 3 | * 4 | * Created on: 23 Feb 2018 5 | * Author: tol 6 | */ 7 | 8 | #ifndef SRC_MYSTRING_HH_ 9 | #define SRC_MYSTRING_HH_ 10 | 11 | #include 12 | #include 13 | 14 | // Replace the std::to_string 15 | std::string to_string(int i); 16 | 17 | #endif /* SRC_MYSTRING_HH_ */ 18 | -------------------------------------------------------------------------------- /unittests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STREAM3/pyISC/b5615fe5d6b3e474f7afcdf3f3e44b3dded2e889/unittests/__init__.py -------------------------------------------------------------------------------- /unittests/test_AnomalyDetector.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pyisc 3 | import numpy as np 4 | class MyTestCase(unittest.TestCase): 5 | def test_something(self): 6 | 7 | X = np.array([0.1, -0.1, 0.05, -0.01, 0.0, 0.11]).reshape((-1,1)) 8 | 9 | try: 10 | ad = pyisc.AnomalyDetector(pyisc.P_Gaussian(1)).fit(X) 11 | 12 | self.assertFalse(True, "the probability model use column index that is larger than the data's max column index") 13 | except AssertionError: 14 | pass # OK 15 | 16 | ad = pyisc.AnomalyDetector(pyisc.P_Gaussian(0)).fit(X) 17 | 18 | 19 | self.assertTrue(np.array_equal(ad.anomaly_score(X), ad.anomaly_score(X))) 20 | 21 | 22 | 23 | if __name__ == '__main__': 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /unittests/test_DataObject.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pyisc import DataObject 4 | from numpy import array, c_,unique 5 | from scipy.stats import norm 6 | from numpy.testing.utils import assert_allclose, assert_equal 7 | 8 | class test_DataObject(unittest.TestCase): 9 | def test_dataobject_set_column_values(self): 10 | X = array([norm(1.0).rvs(10) for _ in range(1000)]) 11 | y = [None] * 1000 12 | 13 | DO = DataObject(c_[X,y], class_column=len(X[0])) 14 | assert_equal(len(X[0]), DO.class_column) 15 | assert_equal(unique(y), DO.classes_) 16 | 17 | classes=[None] + ['1', '2', '3', '4', '5'] 18 | DO = DataObject(c_[X,y], class_column=len(X[0]), classes=classes) 19 | assert_equal(len(X[0]), DO.class_column) 20 | assert_equal(classes, DO.classes_) 21 | 22 | X2 = DO.as_2d_array() 23 | assert_allclose(X2.T[:-1].T.astype(float), X) 24 | assert_equal(X2.T[-1],y) 25 | 26 | new_y = ["%i"%(divmod(i,5)[1]+1) for i in range(len(X))] 27 | DO.set_column_values(len(X[0]), new_y) 28 | 29 | assert_equal(len(X[0]), DO.class_column) 30 | assert_equal([None]+list(unique(new_y)), DO.classes_) 31 | 32 | X2 = DO.as_2d_array() 33 | assert_allclose(X2.T[:-1].T.astype(float), X) 34 | assert_equal(X2.T[-1], new_y) 35 | 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /unittests/test_JSonExportImport.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pyisc; 4 | import numpy as np 5 | from scipy.stats import norm 6 | from numpy.testing.utils import assert_allclose 7 | 8 | 9 | class MyTestCase(unittest.TestCase): 10 | def test_multivariate_gaussian(self): 11 | po_normal = norm(1.1, 5) 12 | po_anomaly = norm(1.5, 7) 13 | 14 | po_normal2 = norm(2.2, 10) 15 | po_anomaly2 = norm(3, 12) 16 | 17 | gs_normal = norm(1, 12) 18 | gs_anomaly = norm(2, 30) 19 | 20 | normal_len = 100 21 | anomaly_len = 15 22 | 23 | data = np.column_stack( 24 | [ 25 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)), 26 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)), 27 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)), 28 | ] 29 | ) 30 | 31 | anomaly_detector = pyisc.AnomalyDetector( 32 | component_models=[ 33 | pyisc.P_Gaussian(0), # columns 1 and 0 34 | pyisc.P_Gaussian(1), # columns 2 and 0 35 | pyisc.P_Gaussian(2) # column 3 36 | ], 37 | output_combination_rule=pyisc.cr_max 38 | ) 39 | 40 | anomaly_detector.fit(data); 41 | 42 | json = anomaly_detector.exportJSon() 43 | 44 | print(json) 45 | 46 | anomaly_detector2 = pyisc.AnomalyDetector( 47 | component_models=[ 48 | pyisc.P_Gaussian(0), # columns 1 and 0 49 | pyisc.P_Gaussian(1), # columns 2 and 0 50 | pyisc.P_Gaussian(2) # column 3 51 | ], 52 | output_combination_rule=pyisc.cr_max 53 | ) 54 | 55 | anomaly_detector2.importJSon(json) 56 | 57 | json2 = anomaly_detector2.exportJSon() 58 | 59 | print(json2) 60 | 61 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data)) 62 | self.assertEqual(json, json2) 63 | 64 | 65 | 66 | def test_conditional_gaussian(self): 67 | po_normal = norm(1.1, 5) 68 | po_anomaly = norm(1.5, 7) 69 | 70 | po_normal2 = norm(2.2, 10) 71 | po_anomaly2 = norm(3, 12) 72 | 73 | gs_normal = norm(1, 12) 74 | gs_anomaly = norm(2, 30) 75 | 76 | normal_len = 100 77 | anomaly_len = 15 78 | 79 | data = np.column_stack( 80 | [ 81 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)), 82 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)), 83 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)), 84 | ] 85 | ) 86 | 87 | anomaly_detector = pyisc.AnomalyDetector( 88 | component_models=[ 89 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])]) 90 | ], 91 | output_combination_rule=pyisc.cr_max 92 | ) 93 | 94 | anomaly_detector.fit(data); 95 | 96 | json = anomaly_detector.exportJSon() 97 | 98 | print(json) 99 | 100 | anomaly_detector2 = pyisc.AnomalyDetector( 101 | component_models=[ 102 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])]) 103 | ], 104 | output_combination_rule=pyisc.cr_max 105 | ) 106 | 107 | anomaly_detector2.importJSon(json) 108 | 109 | json2 = anomaly_detector2.exportJSon() 110 | 111 | print(json2) 112 | 113 | self.assertEqual(json, json2) 114 | 115 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data)) 116 | if __name__ == '__main__': 117 | unittest.main() 118 | -------------------------------------------------------------------------------- /unittests/test_SklearnOutlierDetection.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from random import seed 3 | 4 | from scipy import stats 5 | import numpy as np 6 | import pyisc 7 | 8 | class test_SklearnOutlierDetection(unittest.TestCase): 9 | def test_outlier_detection(self): 10 | print("Start of test") 11 | n_samples = 1000 12 | norm_dist = stats.norm(0, 1) 13 | 14 | truth = np.ones((n_samples,)) 15 | truth[-100:] = -1 16 | 17 | X0 = norm_dist.rvs(n_samples) 18 | X = np.c_[X0*5, X0+norm_dist.rvs(n_samples)*2] 19 | 20 | uniform_dist = stats.uniform(-10,10) 21 | 22 | X[-100:] = np.c_[uniform_dist.rvs(100),uniform_dist.rvs(100)] 23 | 24 | outlier_detector = pyisc.SklearnOutlierDetector( 25 | 100.0/n_samples, 26 | pyisc.P_Gaussian([0,1]) 27 | ) 28 | 29 | outlier_detector.fit(X, np.array([1]*len(X))) 30 | 31 | 32 | self.assertLess(outlier_detector.threshold_, 0.35) 33 | self.assertGreater(outlier_detector.threshold_, 0.25) 34 | 35 | predictions = outlier_detector.predict(X, np.array([1]*len(X))) 36 | 37 | accuracy = sum(truth == predictions)/float(n_samples) 38 | 39 | print("accuracy", accuracy) 40 | self.assertGreater(accuracy, 0.85) 41 | 42 | 43 | 44 | 45 | 46 | if __name__ == '__main__': 47 | unittest.main() 48 | -------------------------------------------------------------------------------- /unittests/test_max_index_problem.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pyisc; 4 | import numpy as np 5 | from scipy.stats import norm 6 | from numpy.testing.utils import assert_allclose 7 | import pickle 8 | 9 | class MyTestCase(unittest.TestCase): 10 | def test_multivariate_gaussian(self): 11 | from scipy.stats import poisson, norm 12 | 13 | po_normal = poisson(10) 14 | po_anomaly = poisson(25) 15 | 16 | po_normal2 = poisson(2) 17 | po_anomaly2 = poisson(3) 18 | 19 | gs_normal = norm(1, 12) 20 | gs_anomaly = norm(2, 30) 21 | 22 | normal_len = 10000 23 | anomaly_len = 15 24 | 25 | data = np.column_stack( 26 | [ 27 | [1] * (normal_len + anomaly_len), 28 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)), 29 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)), 30 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)), 31 | ] 32 | ) 33 | anomaly_detector = pyisc.AnomalyDetector( 34 | component_models=[ 35 | pyisc.P_PoissonOnesided(1, 0), # columns 1 and 0 36 | pyisc.P_Poisson(2, 0), # columns 2 and 0 37 | pyisc.P_Gaussian(3) # column 3 38 | ], 39 | output_combination_rule=pyisc.cr_max 40 | ) 41 | 42 | anomaly_detector.fit(data); 43 | # This above should fail this test if the problem still occurs: 44 | ''' 45 | --------------------------------------------------------------------------- 46 | AssertionError Traceback (most recent call last) 47 | in () 48 | ----> 1 anomaly_detector.fit(data); 49 | 50 | C:\ProgramData\Anaconda3\envs\pyISC_py27\lib\site-packages\_pyisc_modules\BaseISC.pyc in fit(self, X, y) 51 | 313 52 | 314 53 | --> 315 return self._fit(X,y) 54 | 316 55 | 317 def _fit(self,X,y=None): 56 | 57 | C:\ProgramData\Anaconda3\envs\pyISC_py27\lib\site-packages\_pyisc_modules\BaseISC.pyc in _fit(self, X, y) 58 | 352 59 | 353 if data_object is not None: 60 | --> 354 assert self._max_index < data_object.length() # ensure that data distribution has not to large index into the data 61 | 355 62 | 356 return self._fit(data_object) 63 | 64 | AssertionError: 65 | ''' 66 | 67 | assert True; 68 | 69 | 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /unittests/test_p_ConditionalGaussian.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import TestCase 3 | 4 | from numpy.lib.index_tricks import c_ 5 | from numpy.ma.extras import vstack 6 | from numpy.testing.utils import assert_allclose, assert_equal 7 | from scipy.stats.stats import pearsonr 8 | 9 | from pyisc import AnomalyDetector, P_Gaussian, P_ConditionalGaussian, P_ConditionalGaussianCombiner, cr_plus 10 | from scipy.stats import norm 11 | from numpy import array 12 | 13 | class TestPConditionalGaussian(TestCase): 14 | 15 | def test_conditional_gaussian(self): 16 | x = array([[x0] for x0 in norm(0,1).rvs(1000)]) 17 | 18 | gauss_scores = AnomalyDetector(P_Gaussian(0)).fit(x).anomaly_score(x) 19 | condgauss_scores = \ 20 | AnomalyDetector(P_ConditionalGaussian([0], [])). \ 21 | fit(x). \ 22 | anomaly_score(x) 23 | 24 | assert_allclose(gauss_scores, condgauss_scores,atol=0.01,rtol=0.01) 25 | 26 | 27 | X = array([[x0, x1] for x0,x1 in zip(norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)) ]) 28 | 29 | gauss_scores_X = AnomalyDetector(P_Gaussian([0])).fit(X).anomaly_score(X) 30 | condgauss_scores_X = \ 31 | AnomalyDetector(P_ConditionalGaussian([0],[1])). \ 32 | fit(X). \ 33 | anomaly_score(X) 34 | 35 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=0.3) 36 | 37 | 38 | X = array([[x0, x0+0.1*x1] for x0,x1 in zip(norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)) ]) 39 | 40 | 41 | # This is not equal at all 42 | gauss_scores_X = AnomalyDetector(P_Gaussian([0,1])).fit(X).anomaly_score(X) 43 | condgauss_scores_X = \ 44 | AnomalyDetector(P_ConditionalGaussian([0,1],[])). \ 45 | fit(X). \ 46 | anomaly_score(X) 47 | 48 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.994), True) 49 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=2) # Very bad 50 | 51 | 52 | X = array([[x0, x0 + 0.1 * x1, x2] for x0, x1, x2 in c_[norm(0, 1).rvs(1000), norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)]]) 53 | 54 | # This is not equal at all 55 | gauss_scores_X = AnomalyDetector(P_Gaussian([0, 1])).fit(X).anomaly_score(X) 56 | condgauss_scores_X = \ 57 | AnomalyDetector(P_ConditionalGaussian([0, 1], [])). \ 58 | fit(X). \ 59 | anomaly_score(X) 60 | 61 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.994), True) 62 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=2) # Very bad 63 | 64 | 65 | X = array( 66 | [[x0, x0 + 0.1 * x1, x2] for x0, x1, x2 in c_[norm(0, 1).rvs(1000), norm(0, 1).rvs(1000), norm(0, 1).rvs(1000)]]) 67 | 68 | # This is not equal at all 69 | gauss_scores_X = AnomalyDetector(P_Gaussian([0, 1,2])).fit(X).anomaly_score(X) 70 | condgauss_scores_X = \ 71 | AnomalyDetector( 72 | P_ConditionalGaussianCombiner([ 73 | P_ConditionalGaussian([0], [1,2]), 74 | P_ConditionalGaussian([1], [2]), 75 | P_ConditionalGaussian([2], []), 76 | ])). \ 77 | fit(X). \ 78 | anomaly_score(X) 79 | 80 | assert_equal((pearsonr(gauss_scores_X, condgauss_scores_X) > 0.98), True) 81 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=5) # Very bad 82 | 83 | 84 | # This is very much equal 85 | gauss_scores_X = AnomalyDetector(P_ConditionalGaussian([0, 1, 2], [])).fit(X).anomaly_score(X) 86 | condgauss_scores_X = \ 87 | AnomalyDetector( 88 | P_ConditionalGaussianCombiner([ 89 | P_ConditionalGaussian([0], [1, 2]), 90 | P_ConditionalGaussian([1], [2]), 91 | P_ConditionalGaussian([2], []), 92 | ])). \ 93 | fit(X). \ 94 | anomaly_score(X) 95 | 96 | assert_allclose(gauss_scores_X, condgauss_scores_X, atol=0.001) 97 | 98 | 99 | # If we combine them using a ordinary combination rule by adding anomaly score together 100 | condgauss_scores_X2 = \ 101 | AnomalyDetector( 102 | [ 103 | P_ConditionalGaussian([0], [1, 2]), 104 | P_ConditionalGaussian([1], [2]), 105 | P_ConditionalGaussian([2], []), 106 | ], cr_plus). \ 107 | fit(X). \ 108 | anomaly_score(X) 109 | 110 | 111 | assert_equal((pearsonr(condgauss_scores_X, condgauss_scores_X2) > 0.99), True) # Good 112 | 113 | assert_allclose(condgauss_scores_X2, condgauss_scores_X, atol=2) # Bad 114 | 115 | 116 | # 117 | ad1 = AnomalyDetector( 118 | [P_Gaussian([i]) for i in range(len(X[0]))], 119 | cr_plus 120 | ).fit(X) 121 | s1 = ad1.anomaly_score(X) 122 | 123 | ad2 = AnomalyDetector( 124 | [P_ConditionalGaussian([i], []) for i in range(len(X[0]))], 125 | cr_plus 126 | ).fit(X) 127 | s2 = ad2.anomaly_score(X) 128 | 129 | print("r:", pearsonr(s1,s2)) 130 | 131 | assert_allclose(s1, s2, rtol=0.01) # OK 132 | 133 | if __name__ == '__main__': 134 | unittest.main() -------------------------------------------------------------------------------- /unittests/test_p_ConditionalGaussianDependencyMatrix.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from unittest import TestCase 3 | 4 | from numpy import array,r_ 5 | 6 | from numpy.ma.testutils import assert_close 7 | from numpy.testing.utils import assert_allclose, assert_equal 8 | from scipy.stats import norm 9 | from scipy.stats.stats import pearsonr 10 | from sklearn.utils import shuffle 11 | 12 | from pyisc import AnomalyDetector, \ 13 | P_ConditionalGaussianDependencyMatrix, \ 14 | P_ConditionalGaussianCombiner, \ 15 | P_ConditionalGaussian, \ 16 | P_Gaussian, cr_plus, cr_max, \ 17 | SklearnClassifier, SklearnClusterer 18 | 19 | import pylab as plt 20 | 21 | norm_dist = norm(0, 1) 22 | 23 | 24 | def sample_markov_chain(length, noise=0.1): 25 | global norm_dist 26 | sample = [] 27 | sample.append(norm_dist.rvs(1)[0]) 28 | for i in range(1,length): 29 | sample.append(norm_dist.rvs(1)[0]*noise+sample[i-1]) 30 | 31 | return sample 32 | 33 | class TestPConditionalGaussianDependencyMatrix(TestCase): 34 | 35 | 36 | def test_conditional_gaussian_dependency_matrix(self): 37 | length = 100 38 | n_samples = 1000 39 | X = array([sample_markov_chain(length) for _ in range(n_samples)]) 40 | 41 | 42 | # Next two should be equal 43 | s0 = AnomalyDetector( 44 | P_ConditionalGaussianDependencyMatrix(list(range(length)),length) 45 | ).fit(X).anomaly_score(X) 46 | 47 | ad1=AnomalyDetector( 48 | P_ConditionalGaussianCombiner([P_ConditionalGaussian([i + 1], [i]) for i in range(length - 1)]+[P_ConditionalGaussian([0], [])]), 49 | cr_plus 50 | ).fit(X) 51 | s1 = ad1.anomaly_score(X) 52 | 53 | assert_allclose(s0, s1, rtol=0.0001) # OK 54 | 55 | # Most likely, these two are not equal but highly correlated 56 | ad2=AnomalyDetector( 57 | [P_ConditionalGaussian([i], []) for i in range(length)], 58 | cr_plus 59 | ).fit(X) 60 | s2 = ad2.anomaly_score(X) 61 | 62 | ad3=AnomalyDetector( 63 | P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], []) for i in range(length)]), 64 | cr_plus 65 | ).fit(X) 66 | s3 = ad3.anomaly_score(X) 67 | 68 | assert_equal(pearsonr(s2,s3)> 0.985, True) 69 | 70 | 71 | # Test classification 72 | Y = array([sample_markov_chain(length,0.2) for _ in range(n_samples)]) 73 | Z = array([sample_markov_chain(length,0.3) for _ in range(n_samples)]) 74 | 75 | 76 | data = r_[X,Y,Z] 77 | labels = r_[['X']*len(X), ['Y']*len(Y), ['Z']*len(Z)] 78 | 79 | data_index = shuffle(list(range(len(data)))) 80 | training_set = data_index[:n_samples*2] 81 | test_set = data_index[n_samples*2:] 82 | 83 | models = { 84 | 'independent gaussian': 85 | AnomalyDetector([P_Gaussian([i]) for i in range(length)],cr_plus), 86 | 'independent conditional gaussian': 87 | AnomalyDetector([P_ConditionalGaussian([i], []) for i in range(length)],cr_plus), 88 | 'independent conditional gaussian with combiner': 89 | AnomalyDetector(P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], []) for i in range(length)])), 90 | 'single conditional gaussian with combiner': 91 | AnomalyDetector(P_ConditionalGaussianCombiner([P_ConditionalGaussian([i], [i-1]) for i in range(1, length)]+ 92 | [P_ConditionalGaussian([0], [])])), 93 | 'dependency matrix': 94 | AnomalyDetector(P_ConditionalGaussianDependencyMatrix(list(range(length)),length)) 95 | } 96 | 97 | all_acc = {} 98 | for key in models: 99 | ad=models[key].fit(data[training_set], labels[training_set]) 100 | 101 | adclf = SklearnClassifier.clf(ad) 102 | 103 | labels_predicted = adclf.predict(data[test_set]) 104 | accuracy = sum(labels[test_set]==labels_predicted)/float(len(test_set)) 105 | all_acc[key] = accuracy 106 | print(key, "accuracy = ", accuracy) 107 | 108 | 109 | assert_close(all_acc['independent gaussian'],all_acc['independent conditional gaussian'],decimal=2) 110 | assert_close(all_acc['independent gaussian'], all_acc['independent conditional gaussian with combiner'],decimal=2) 111 | assert_close(all_acc['single conditional gaussian with combiner'], all_acc['dependency matrix'],decimal=2) 112 | 113 | 114 | if __name__ == '__main__': 115 | unittest.main() -------------------------------------------------------------------------------- /unittests/test_pickle_export_import.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import pyisc; 4 | import numpy as np 5 | from scipy.stats import norm 6 | from numpy.testing.utils import assert_allclose 7 | import pickle 8 | 9 | class MyTestCase(unittest.TestCase): 10 | def test_multivariate_gaussian(self): 11 | po_normal = norm(1.1, 5) 12 | po_anomaly = norm(1.5, 7) 13 | 14 | po_normal2 = norm(2.2, 10) 15 | po_anomaly2 = norm(3, 12) 16 | 17 | gs_normal = norm(1, 12) 18 | gs_anomaly = norm(2, 30) 19 | 20 | normal_len = 100 21 | anomaly_len = 15 22 | 23 | data = np.column_stack( 24 | [ 25 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)), 26 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)), 27 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)), 28 | ] 29 | ) 30 | 31 | anomaly_detector = pyisc.AnomalyDetector( 32 | component_models=[ 33 | pyisc.P_Gaussian(0), # columns 1 and 0 34 | pyisc.P_Gaussian(1), # columns 2 and 0 35 | pyisc.P_Gaussian(2) # column 3 36 | ], 37 | output_combination_rule=pyisc.cr_max 38 | ) 39 | 40 | anomaly_detector.fit(data); 41 | 42 | json = anomaly_detector.exportJSon() 43 | 44 | 45 | p = pickle.dumps(anomaly_detector) 46 | 47 | print(p) 48 | 49 | anomaly_detector2 = pickle.loads(p) 50 | 51 | json2 = anomaly_detector2.exportJSon() 52 | 53 | print(json2) 54 | 55 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data)) 56 | self.assertEqual(json, json2) 57 | 58 | 59 | 60 | def test_conditional_gaussian(self): 61 | po_normal = norm(1.1, 5) 62 | po_anomaly = norm(1.5, 7) 63 | 64 | po_normal2 = norm(2.2, 10) 65 | po_anomaly2 = norm(3, 12) 66 | 67 | gs_normal = norm(1, 12) 68 | gs_anomaly = norm(2, 30) 69 | 70 | normal_len = 100 71 | anomaly_len = 15 72 | 73 | data = np.column_stack( 74 | [ 75 | list(po_normal.rvs(normal_len)) + list(po_anomaly.rvs(anomaly_len)), 76 | list(po_normal2.rvs(normal_len)) + list(po_anomaly2.rvs(anomaly_len)), 77 | list(gs_normal.rvs(normal_len)) + list(gs_anomaly.rvs(anomaly_len)), 78 | ] 79 | ) 80 | 81 | anomaly_detector = pyisc.AnomalyDetector( 82 | component_models=[ 83 | pyisc.P_ConditionalGaussianCombiner([pyisc.P_ConditionalGaussian([0], [1]), pyisc.P_ConditionalGaussian([1], [2])]) 84 | ], 85 | output_combination_rule=pyisc.cr_max 86 | ) 87 | 88 | anomaly_detector.fit(data); 89 | 90 | json = anomaly_detector.exportJSon() 91 | 92 | print(json) 93 | 94 | p = pickle.dumps(anomaly_detector) 95 | 96 | print(p) 97 | 98 | anomaly_detector2 = pickle.loads(p) 99 | 100 | json2 = anomaly_detector2.exportJSon() 101 | 102 | print(json2) 103 | 104 | self.assertEqual(json, json2) 105 | 106 | assert_allclose(anomaly_detector.anomaly_score(data), anomaly_detector2.anomaly_score(data)) 107 | if __name__ == '__main__': 108 | unittest.main() 109 | --------------------------------------------------------------------------------