├── README.md ├── build.properties ├── build.xml ├── lib ├── Jama-1.0.3.jar ├── commons-math-2.2.jar ├── copyYourLibsHere └── encog-mod-3.2.0.jar ├── licenses ├── LICENSE └── short_license.txt ├── resources ├── ABOUT.NFO ├── META-INF │ └── icon.png └── com │ └── rapidminer │ └── resources │ ├── OperatorsAnomalyDetection.xml │ ├── groupsAnomalyDetection.properties │ ├── i18n │ ├── ErrorsAnomalyDetection.properties │ ├── GUIAnomalyDetection.properties │ ├── OperatorsDocAnomalyDetection.xml │ └── UserErrorMessagesAnomalyDetection.properties │ ├── ioobjectsAnomalyDetection.xml │ └── parserulesAnomalyDetection.xml └── src ├── anomalydetection_libsvm ├── COPYRIGHT ├── Kernel.java ├── Svm.java ├── svm_model.java ├── svm_node.java ├── svm_parameter.java └── svm_problem.java └── de └── dfki └── madm └── anomalydetection ├── PlugInitAnomalyDetection.java ├── evaluator ├── Evaluator.java ├── cluster_based │ ├── CBLOFEvaluator.java │ ├── CMGOSEvaluator.java │ ├── ClusterOrder.java │ ├── CovarianceMatrix.java │ └── LDCOFEvaluator.java ├── evaluation │ └── ROCEvaluator.java ├── kernel_based │ ├── AnomalyDetectionLibSVMEvaluator.java │ └── RBF_Kernel.java ├── nearest_neighbor_based │ ├── ALOCIEvaluator.java │ ├── COFEvaluator.java │ ├── INFLOEvaluator.java │ ├── KNNCollection.java │ ├── KNNCollectionModel.java │ ├── KNNEvaluator.java │ ├── LOCIEvaluator.java │ ├── LOFEvaluator.java │ └── LoOPEvaluator.java └── statistical_based │ ├── HistogramBin.java │ └── HistogramEvaluator.java └── operator ├── AbstractAnomalyDetectionOperator.java ├── cluster_based ├── AbstractClusteringAnomalyDetectionOperator.java ├── CBLOFAnomalyDetectionOperator.java ├── CMGOSAnomalyDetectionOperator.java └── LDCOFAnomalyDetectionOperator.java ├── evaluation ├── ROCOperator.java └── ROCPerformanceVector.java ├── kernel_based ├── AnomalyDetectionLibSVMOperator.java └── NumberOfSupportVectorsValue.java ├── model_based ├── ALSOOperator.java └── RNNOperator.java ├── nearest_neighbor_based ├── ALOCIAnomalyDetectionOperator.java ├── AbstractNearestNeighborBasedAnomalyDetectionOperator.java ├── COFAnomalyDetectionOperator.java ├── INFLOAnomalyDetectionOperator.java ├── KNNAnomalyDetectionOperator.java ├── LOCIAnomalyDetectionOperator.java ├── LOFAnomalyDetectionOperator.java ├── LoOPAnomalyDetectionOperator.java └── Point.java └── statistical_based ├── HistogramOperator.java ├── OutlierCellColorProvider.java ├── OutlierColorJoin.java ├── OutlierDataViewer.java ├── OutlierExampleSet.java ├── OutlierExampleSetDataRenderer.java ├── OutlierJTable.java └── RobustPCAOperator.java /README.md: -------------------------------------------------------------------------------- 1 | ![http://madm.dfki.de/_media/rapidminer/ad-logo.png](http://madm.dfki.de/_media/rapidminer/ad-logo.png) 2 | 3 | RapidMiner Anomaly Detection Extension 4 | ====================================== 5 | 6 | The Anomaly Detection Extension for RapidMiner comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets. It allows you to find data, which is significantly different from the normal, without the need for the data being labeled. 7 | 8 | Some of the algorithms are: 9 | 10 | * Local Outlier Factor (LOF) 11 | * k-NN Global Anomaly Score 12 | * Connectivity-based Outlier Factor (COF) 13 | * Local Correlation Integral (LOCI) 14 | * Local Outlier Probability (LoOP) 15 | * Cluster-based Local Outlier Factor (CBLOF) 16 | 17 | More information and usage examples can be found on the author's homepage 18 | 19 | Installation 20 | ------------ 21 | 22 | * In RapidMiner, go to Help->Updates and Extensions (Marketplace) and search for “anomaly detection” and click on “Install”, or 23 | * Copy the jar file to the “lib/plugins” directory of RapidMiner 24 | 25 | Copyright/ License/ Credits 26 | --------------------------- 27 | 28 | Copyright 2008-2013 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz 29 | Copyright 2008-2019 Markus Goldstein 30 | 31 | This is free software. Licensed under the [GNU AGPL, Version 3](licenses/LICENSE). 32 | There is NO WARRANTY, to the extent permitted by law. 33 | 34 | Authors 35 | ------- 36 | 37 | Markus Goldstein 38 | Mennatallah Amer 39 | Johann Gebhardt 40 | Patrick Kalka 41 | Ahmed Elsawy 42 | 43 | This Software is supported by ... 44 | [![http://madm.dfki.de/lib/tpl/dfki/images/logo.jpg](http://madm.dfki.de/lib/tpl/dfki/images/logo.jpg)](http://www.madm.eu/)        45 |        46 |        47 | [![https://www.goldiges.de/assets/images/logo-full.png](https://www.goldiges.de/assets/images/logo-full.png)](https://www.goldiges.de/) 48 | -------------------------------------------------------------------------------- /build.properties: -------------------------------------------------------------------------------- 1 | extension.version=2 2 | extension.revision=4 3 | extension.update=001 4 | -------------------------------------------------------------------------------- /build.xml: -------------------------------------------------------------------------------- 1 | 2 | Build file for the RapidMiner Anomaly Detection extension 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /lib/Jama-1.0.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/Jama-1.0.3.jar -------------------------------------------------------------------------------- /lib/commons-math-2.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/commons-math-2.2.jar -------------------------------------------------------------------------------- /lib/copyYourLibsHere: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/copyYourLibsHere -------------------------------------------------------------------------------- /lib/encog-mod-3.2.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/encog-mod-3.2.0.jar -------------------------------------------------------------------------------- /licenses/short_license.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2009 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | -------------------------------------------------------------------------------- /resources/ABOUT.NFO: -------------------------------------------------------------------------------- 1 | The Anomaly Detection Extension comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets. 2 | -------------------------------------------------------------------------------- /resources/META-INF/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/META-INF/icon.png -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | table_new.png 6 | 7 | 8 | 9 | k-NN Global Anomaly Score 10 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.KNNAnomalyDetectionOperator 11 | 12 | 13 | 14 | 15 | Local Outlier Factor (LOF) 16 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LOFAnomalyDetectionOperator 17 | 18 | 19 | 20 | Connectivity-Based Outlier Factor (COF) 21 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.COFAnomalyDetectionOperator 22 | 23 | 24 | 25 | 26 | Local Correlation Integeral (LOCI) 27 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LOCIAnomalyDetectionOperator 28 | 29 | 30 | 31 | 32 | approximate Local Correlation Integral (aLOCI) 33 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.ALOCIAnomalyDetectionOperator 34 | 35 | 36 | 37 | 38 | Local Outlier Probablity (LoOP) 39 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LoOPAnomalyDetectionOperator 40 | 41 | 42 | 43 | 44 | Influenced Outlierness (INFLO) 45 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.INFLOAnomalyDetectionOperator 46 | 47 | 48 | 49 | 50 | 51 | 52 | Cluster-Based Local Outlier Factor (CBLOF) 53 | de.dfki.madm.anomalydetection.operator.cluster_based.CBLOFAnomalyDetectionOperator 54 | 55 | 56 | 57 | 58 | Local Density Cluster-Based Outlier Factor (LDCOF) 59 | de.dfki.madm.anomalydetection.operator.cluster_based.LDCOFAnomalyDetectionOperator 60 | 61 | 62 | 63 | 64 | Clustering-based Multivariate Gaussian Outlier Score (CMGOS) 65 | de.dfki.madm.anomalydetection.operator.cluster_based.CMGOSAnomalyDetectionOperator 66 | 67 | 68 | 69 | 70 | 71 | 72 | Histogram-based Outlier Score (HBOS) 73 | de.dfki.madm.anomalydetection.operator.statistical_based.HistogramOperator 74 | 75 | 76 | 77 | Color Coded Join 78 | de.dfki.madm.anomalydetection.operator.statistical_based.OutlierColorJoin 79 | 80 | 81 | 82 | Robust Principal Component Analysis Anomaly Score (rPCA) 83 | de.dfki.madm.anomalydetection.operator.statistical_based.RobustPCAOperator 84 | 85 | 86 | 87 | 88 | 89 | One-Class LIBSVM Anomaly Score 90 | de.dfki.madm.anomalydetection.operator.kernel_based.AnomalyDetectionLibSVMOperator 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | Attribute-wise Learning (ALSO) 99 | de.dfki.madm.anomalydetection.operator.model_based.ALSOOperator 100 | 101 | 102 | Replicator Neural Network (RNN) 103 | de.dfki.madm.anomalydetection.operator.model_based.RNNOperator 104 | 105 | 106 | 107 | 108 | 109 | Generate ROC 110 | de.dfki.madm.anomalydetection.operator.evaluation.ROCOperator 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/groupsAnomalyDetection.properties: -------------------------------------------------------------------------------- 1 | # red 2 | group.anomaly_detection.color = #f1d9e8 3 | -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/i18n/ErrorsAnomalyDetection.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/com/rapidminer/resources/i18n/ErrorsAnomalyDetection.properties -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/i18n/GUIAnomalyDetection.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/com/rapidminer/resources/i18n/GUIAnomalyDetection.properties -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/i18n/UserErrorMessagesAnomalyDetection.properties: -------------------------------------------------------------------------------- 1 | error.1001.name = Error in sub process 2 | error.1001.short = Error in sub process 3 | error.1001.long = An error has occurred in the subprocess for learning the models. 4 | -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/ioobjectsAnomalyDetection.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | de.dfki.madm.anomalydetection.operator.statistical_based.OutlierExampleSetDataRenderer 8 | com.rapidminer.gui.renderer.data.ExampleSetDataRenderer 9 | com.rapidminer.gui.renderer.data.ExampleSetMetaDataRenderer 10 | com.rapidminer.gui.renderer.data.ExampleSetPlotRenderer 11 | com.rapidminer.gui.new_plotter.integration.ExpertDataTableRenderer 12 | com.rapidminer.gui.renderer.AnnotationsRenderer 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /resources/com/rapidminer/resources/parserulesAnomalyDetection.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/COPYRIGHT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2000-2005 Chih-Chung Chang and Chih-Jen Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | 3. Neither name of copyright holders nor the names of its contributors 16 | may be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/Kernel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | package anomalydetection_libsvm; 24 | 25 | public abstract class Kernel extends QMatrix { 26 | private svm_node[][] x; 27 | private final double[] x_square; 28 | 29 | // svm_parameter 30 | private final int kernel_type; 31 | private final int degree; 32 | private final double gamma; 33 | private final double coef0; 34 | 35 | @Override 36 | abstract float[] get_Q(int column, int len); 37 | @Override 38 | abstract float[] get_QD(); 39 | 40 | @Override 41 | void swap_index(int i, int j) 42 | { 43 | do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false); 44 | if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false); 45 | } 46 | 47 | private static double powi(double base, int times) { 48 | double tmp = base, ret = 1.0; 49 | 50 | for(int t=times; t>0; t/=2) 51 | { 52 | if(t%2!=0) ret*=tmp; 53 | tmp = tmp * tmp; 54 | } 55 | return ret; 56 | } 57 | 58 | private static double tanh(double x) { 59 | double e = Math.exp(x); 60 | return 1.0-2.0/(e*e+1); 61 | } 62 | 63 | public double kernel_function(int i, int j) { 64 | switch(kernel_type) { 65 | case svm_parameter.LINEAR: 66 | return dot(x[i],x[j]); 67 | case svm_parameter.POLY: 68 | return powi(gamma*dot(x[i],x[j])+coef0,degree); 69 | case svm_parameter.RBF: 70 | return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); 71 | case svm_parameter.SIGMOID: 72 | return tanh(gamma*dot(x[i],x[j])+coef0); 73 | case svm_parameter.PRECOMPUTED: 74 | return x[i][(int)(x[j][0].value)].value; 75 | default: 76 | return 0; // java 77 | } 78 | } 79 | 80 | Kernel(int l, svm_node[][] x_, svm_parameter param) { 81 | this.kernel_type = param.kernel_type; 82 | this.degree = param.degree; 83 | this.gamma = param.gamma; 84 | this.coef0 = param.coef0; 85 | 86 | x = x_.clone(); 87 | 88 | if(kernel_type == svm_parameter.RBF) 89 | { 90 | x_square = new double[l]; 91 | for(int i=0;i y[j].index) 111 | ++j; 112 | else 113 | ++i; 114 | } 115 | } 116 | return sum; 117 | } 118 | 119 | static double k_function(svm_node[] x, svm_node[] y, 120 | svm_parameter param) 121 | { 122 | switch(param.kernel_type) 123 | { 124 | case svm_parameter.LINEAR: 125 | return dot(x,y); 126 | case svm_parameter.POLY: 127 | return powi(param.gamma*dot(x,y)+param.coef0,param.degree); 128 | case svm_parameter.RBF: 129 | { 130 | double sum = 0; 131 | int xlen = x.length; 132 | int ylen = y.length; 133 | int i = 0; 134 | int j = 0; 135 | while(i < xlen && j < ylen) 136 | { 137 | if(x[i].index == y[j].index) 138 | { 139 | double d = x[i++].value - y[j++].value; 140 | sum += d*d; 141 | } 142 | else if(x[i].index > y[j].index) 143 | { 144 | sum += y[j].value * y[j].value; 145 | ++j; 146 | } 147 | else 148 | { 149 | sum += x[i].value * x[i].value; 150 | ++i; 151 | } 152 | } 153 | 154 | while(i < xlen) 155 | { 156 | sum += x[i].value * x[i].value; 157 | ++i; 158 | } 159 | 160 | while(j < ylen) 161 | { 162 | sum += y[j].value * y[j].value; 163 | ++j; 164 | } 165 | 166 | return Math.exp(-param.gamma*sum); 167 | } 168 | case svm_parameter.SIGMOID: 169 | return tanh(param.gamma*dot(x,y)+param.coef0); 170 | case svm_parameter.PRECOMPUTED: 171 | return x[(int)(y[0].value)].value; 172 | default: 173 | return 0; // java 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/svm_model.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | package anomalydetection_libsvm; 24 | 25 | public class svm_model implements java.io.Serializable { 26 | 27 | private static final long serialVersionUID = 7974831813044169852L; 28 | 29 | public svm_parameter param; // parameter 30 | public int nr_class; // number of classes, = 2 in regression/one class svm 31 | public int l; // total #SV 32 | public int nBSV; // Bound SV(only correct in case of one-class SVM) 33 | public int []nBSVIndicies; // Indicies of bound support vectors relative to the original exampleset 34 | public svm_node[][] SV; // SVs (SV[l]) 35 | public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l]) 36 | public double[] rho; // constants in decision functions (rho[k*(k-1)/2]) 37 | public double[] probA; // pariwise probability information 38 | public double[] probB; 39 | 40 | // for classification only 41 | 42 | public int[] label; // label of each class (label[k]) 43 | public int[] nSV; // number of SVs for each class (nSV[k]) 44 | // nSV[0] + nSV[1] + ... + nSV[k-1] = l 45 | public double max_confidence; // confidence used for anomaly detection score 46 | 47 | public double[] labelValues; // actual label values for all support vectors (only used for displaying) 48 | } 49 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/svm_node.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | package anomalydetection_libsvm; 24 | 25 | public class svm_node implements java.io.Serializable { 26 | 27 | private static final long serialVersionUID = -3046511301730620312L; 28 | 29 | public int index; 30 | public double value; 31 | } 32 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/svm_parameter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | package anomalydetection_libsvm; 24 | 25 | public class svm_parameter implements Cloneable,java.io.Serializable { 26 | 27 | private static final long serialVersionUID = -2733609912517132812L; 28 | 29 | /* svm_type */ 30 | public static final int C_SVC = 0; 31 | public static final int NU_SVC = 1; 32 | public static final int ONE_CLASS = 2; 33 | public static final int EPSILON_SVR = 3; 34 | public static final int NU_SVR = 4; 35 | public static final int ROBUST_ONE_CLASS = 5; 36 | public static final int ETA_ONE_CLASS = 6; 37 | 38 | 39 | /* kernel_type */ 40 | public static final int LINEAR = 0; 41 | public static final int POLY = 1; 42 | public static final int RBF = 2; 43 | public static final int SIGMOID = 3; 44 | public static final int PRECOMPUTED = 4; 45 | 46 | public int svm_type; 47 | public int kernel_type; 48 | public int degree; // for poly 49 | public double gamma; // for poly/rbf/sigmoid 50 | public double coef0; // for poly/sigmoid 51 | 52 | // these are for training only 53 | public double cache_size; // in MB 54 | public double eps; // stopping criteria 55 | public double C; // for C_SVC, EPSILON_SVR and NU_SVR 56 | public int nr_weight; // for C_SVC 57 | public int[] weight_label; // for C_SVC 58 | public double[] weight; // for C_SVC 59 | public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR 60 | public double p; // for EPSILON_SVR 61 | public int shrinking; // use the shrinking heuristics 62 | public int probability; // do probability estimates 63 | public double lambda; // lambda 64 | 65 | @Override 66 | public Object clone() { 67 | try 68 | { 69 | return super.clone(); 70 | } catch (CloneNotSupportedException e) 71 | { 72 | return null; 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/anomalydetection_libsvm/svm_problem.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner 3 | * 4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors 5 | * 6 | * Complete list of developers available at our web site: 7 | * 8 | * http://rapid-i.com 9 | * 10 | * This program is free software: you can redistribute it and/or modify 11 | * it under the terms of the GNU Affero General Public License as published by 12 | * the Free Software Foundation, either version 3 of the License, or 13 | * (at your option) any later version. 14 | * 15 | * This program is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | * GNU Affero General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Affero General Public License 21 | * along with this program. If not, see http://www.gnu.org/licenses/. 22 | */ 23 | package anomalydetection_libsvm; 24 | 25 | public class svm_problem implements java.io.Serializable { 26 | 27 | private static final long serialVersionUID = -4451389443706847272L; 28 | 29 | public int l; 30 | public double[] y; 31 | public svm_node[][] x; 32 | } 33 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/PlugInitAnomalyDetection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see = sizeNormal) 90 | numberOfLargeClusters = i; 91 | else { 92 | if (i != numberOfClusters - 1) { 93 | if (clusterOrders[i].getClusterSize() / clusterOrders[i+1].getClusterSize() >= beta) 94 | numberOfLargeClusters = i; 95 | } 96 | } 97 | } 98 | else break; 99 | } 100 | for (int i=0; i< numberOfClusters; i++) 101 | result[clusterOrders[i].getClusterIndex()]= i<=numberOfLargeClusters; 102 | 103 | return result; 104 | } 105 | 106 | /** 107 | * The method the computes CBLOF 108 | * 109 | * @param weighting 110 | * 111 | * 112 | * @return The array containing the cblof scores. 113 | * 114 | */ 115 | public double[] evaluate() { 116 | int n = points.length; 117 | 118 | double[] cblof = new double[n]; 119 | 120 | int numberOfClusters= centroids.length; 121 | 122 | // calculates cblof 123 | for (int i = 0; i < n; i++) { 124 | int clusterIndex = belongsToCluster[i]; 125 | if (largeCluster[clusterIndex]) { 126 | // It is a large cluster 127 | cblof[i] = measure.calculateDistance(centroids[clusterIndex], 128 | points[i]); 129 | if (weighting) 130 | cblof[i] *= clusterSize[clusterIndex]; 131 | } else { 132 | // It is a small cluster 133 | 134 | double MinDistance = Double.MAX_VALUE; 135 | 136 | // search for the nearest large cluster 137 | for (int j = 0; j { 33 | private int clusterIndex; 34 | private int clusterSize; 35 | public ClusterOrder(int index, int size) { 36 | clusterIndex= index; 37 | clusterSize= size; 38 | } 39 | @Override 40 | public int compareTo(ClusterOrder o) { 41 | 42 | return o.clusterSize- clusterSize; 43 | } 44 | public int getClusterIndex() { 45 | return clusterIndex; 46 | } 47 | public int getClusterSize() { 48 | return clusterSize; 49 | } 50 | 51 | public static ClusterOrder[] getOrderedClusters(int [] clusterSize){ 52 | int numberOfClusters= clusterSize.length; 53 | ClusterOrder[]clusterOrders= new ClusterOrder[numberOfClusters]; 54 | for (int i=0; i< numberOfClusters; i++){ 55 | clusterOrders[i]= new ClusterOrder(i, clusterSize[i]); 56 | } 57 | Arrays.sort(clusterOrders); 58 | return clusterOrders; 59 | } 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/cluster_based/CovarianceMatrix.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2012 by Deutsches Forschungszentrum fuer Kuenstliche 5 | * Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify it under the 8 | * terms of the GNU Affero General Public License as published by the Free 9 | * Software Foundation, either version 3 of the License, or (at your option) any 10 | * later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see data, int numberOfThreads) { 52 | this.data = new double[data.size()][]; 53 | int index = 0; 54 | for (double[] ar : data) { 55 | this.data[index] = ar; 56 | index++; 57 | } 58 | this.numberOfThreads = numberOfThreads; 59 | this.doWork(); 60 | } 61 | 62 | public CovarianceMatrix(double[][] data, int numberOfThreads) { 63 | this.data = data; 64 | this.numberOfThreads = numberOfThreads; 65 | this.doWork(); 66 | } 67 | 68 | private void calcCovMat() { 69 | Thread[] temp = new Thread[this.numberOfThreads]; 70 | int count = 0; 71 | for (int i = 0; i < dim; i++) { 72 | for (int j = 0; j < dim; j++) { 73 | temp[count]= new worker(CovMat, i, j, data); 74 | temp[count].start(); 75 | count++; 76 | if (count == this.numberOfThreads) { 77 | count = 0; 78 | for (int j1 = 0; j1 < this.numberOfThreads; j1++) { 79 | try { 80 | temp[j1].join(); 81 | } catch (InterruptedException e) { 82 | e.printStackTrace(); 83 | } 84 | } 85 | } 86 | } 87 | } 88 | } 89 | } 90 | 91 | class worker extends Thread { 92 | double[][] CovMat; 93 | double[][] points; 94 | private int j; 95 | private int k; 96 | 97 | public worker(double[][] CovMat, int j, int k, double[][] points) { 98 | this.CovMat = CovMat; 99 | this.points = points; 100 | this.j = j; 101 | this.k = k; 102 | } 103 | 104 | @Override 105 | public void run() { 106 | double ret = 0; 107 | 108 | for (int i = 0; i < points.length; i++) { 109 | ret += (points[i][j] * points[i][k]); 110 | } 111 | 112 | ret = (1.0 / (points.length - 1)) * ret; 113 | 114 | synchronized (CovMat) { 115 | CovMat[j][k] = ret; 116 | } 117 | } 118 | } -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/cluster_based/LDCOFEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see = minimumClusterSize; 108 | } 109 | return result; 110 | 111 | } 112 | 113 | public double[] evaluate() { 114 | int n = points.length; 115 | int numberOfClusters = centroids.length; 116 | double[] result = new double[n]; 117 | double[] distances = new double[n]; 118 | int[] belongsToLargeCluster = new int[n]; 119 | double[] summationDistances = new double[numberOfClusters]; 120 | for (int i = 0; i < n; i++) { 121 | int clusterIndex = belongsToCluster[i]; 122 | if (largeCluster[clusterIndex]) { 123 | // It is a large cluster 124 | distances[i] = measure.calculateDistance( 125 | centroids[clusterIndex], points[i]); 126 | summationDistances[clusterIndex] += distances[i]; 127 | } else { 128 | // It is a small cluster 129 | double MinDistance = Double.MAX_VALUE; 130 | 131 | // search for the nearest large cluster 132 | for (int j = 0; j < numberOfClusters; j++) { 133 | if (!largeCluster[j]) 134 | continue; 135 | double temp = measure.calculateDistance(centroids[j], 136 | points[i]); 137 | if (temp < MinDistance) { 138 | MinDistance = temp; 139 | clusterIndex = j; 140 | } 141 | } 142 | 143 | distances[i] = MinDistance; 144 | 145 | } 146 | 147 | belongsToLargeCluster[i] = clusterIndex; 148 | } 149 | 150 | for (int i = 0; i < numberOfClusters; i++) 151 | summationDistances[i] /= clusterSize[i]; 152 | 153 | for (int i = 0; i < n; i++) { 154 | if(summationDistances[belongsToLargeCluster[i]]== 0.0) { 155 | result[i] = 0; 156 | } 157 | else { 158 | result[i] = distances[i] 159 | / summationDistances[belongsToLargeCluster[i]]; 160 | } 161 | 162 | } 163 | 164 | return result; 165 | } 166 | 167 | } 168 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/evaluation/ROCEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer Kuenstliche 5 | * Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify it under the 8 | * terms of the GNU Affero General Public License as published by the Free 9 | * Software Foundation, either version 3 of the License, or (at your option) any 10 | * later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see { 30 | private int index; 31 | private double outlierScore; 32 | 33 | public OutlierPair(int i, double o) { 34 | index = i; 35 | outlierScore = o; 36 | } 37 | 38 | @Override 39 | public int compareTo(OutlierPair arg0) { 40 | if (outlierScore > arg0.outlierScore) 41 | return -1; 42 | if (outlierScore < arg0.outlierScore) 43 | return 1; 44 | return 0; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return this.outlierScore + " " + this.index; 50 | } 51 | } 52 | 53 | public double auc; 54 | public ArrayList out = new ArrayList(); 55 | private String normal = ""; 56 | 57 | public String getNormalClass() { 58 | return this.normal; 59 | } 60 | public Object[][] pre = null; //prediction / recall 61 | /** 62 | * The returned array has 2 columns denoting: false positive rate,true positive rate 63 | * precision/recall will be stored in pre. (true positive rate==recall) 64 | */ 65 | public Object[][] evaluate(String outlierString, Object[] labels, double[] res) throws OperatorException { 66 | int size = res.length; 67 | Object[][] result; 68 | 69 | LinkedList rocPoints = new LinkedList(); 70 | 71 | int count = 0; 72 | int anz_outlier = 0; 73 | 74 | int positive = 0; 75 | int negative = 0; 76 | int truePositive = 0; 77 | int falsePositive = 0; 78 | OutlierPair[] outliers = new OutlierPair[size]; 79 | for (int j = 0; j < size; j++) { 80 | if (labels[j].toString().equals(outlierString)) { 81 | anz_outlier++; 82 | } 83 | outliers[j] = new OutlierPair(j, res[j]); 84 | } 85 | Arrays.sort(outliers); 86 | double Area = 0; 87 | double[] last = new double[] { 0, 0 }; 88 | for (int j = 0; j < size; j++) { 89 | 90 | int x = outliers[j].index; 91 | 92 | if (count < anz_outlier) { 93 | this.out.add(outliers[j].index); 94 | count++; 95 | } 96 | 97 | if (labels[x].toString().equals(outlierString)) { 98 | truePositive++; 99 | positive++; 100 | } else { 101 | if (this.normal.equals("")) { 102 | this.normal = labels[x].toString(); 103 | } 104 | else { 105 | if (!this.normal.equals(labels[x].toString()) && !outlierString.equals("")) { 106 | throw new OperatorException("There should be only two labels (normal & outlier). Currently found :" + outlierString + ", " + this.normal + " and " + labels[x].toString()); 107 | } 108 | } 109 | falsePositive++; 110 | negative++; 111 | 112 | } 113 | if (j != size - 1 && outliers[j].outlierScore == outliers[j + 1].outlierScore) 114 | continue; 115 | Area += last[1] * ((double)falsePositive - last[0]) + (double)0.5 * ((double)falsePositive - last[0]) * ((double)truePositive - last[1]); 116 | rocPoints.add(new double[] { falsePositive, truePositive , truePositive*1.0/(truePositive+falsePositive), outliers[j].outlierScore}); 117 | last[0] = falsePositive; 118 | last[1] = truePositive; 119 | 120 | } 121 | if (positive == 0) { 122 | throw new OperatorException("'" + outlierString + "' not found in the labels"); 123 | } 124 | if (negative == 0) { 125 | throw new OperatorException("All the records are '" + outlierString + "'"); 126 | } 127 | double totalArea = (double)positive * (double)negative; 128 | 129 | auc = Area / totalArea; 130 | result = new Object[rocPoints.size()][2]; 131 | int i = 0; 132 | pre = new Object[rocPoints.size()][2]; 133 | for (double[] r : rocPoints) { 134 | result[i][0] = r[0] / negative; 135 | result[i++][1] = r[1] / positive; 136 | } 137 | i=0; 138 | for(double[] r : rocPoints) { 139 | pre[i][0] = r[2]; // precision = tp /(tp+fp) 140 | pre[i++][1] = r[1] / positive; //recall = tp(so far) / all outlier 141 | } 142 | 143 | return result; 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/kernel_based/AnomalyDetectionLibSVMEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see maxConfidence) 112 | maxConfidence = prob[0]; 113 | } 114 | double[] result = new double[testSet.length]; 115 | maxConfidence = Math.abs(maxConfidence); 116 | model.max_confidence = maxConfidence; 117 | 118 | for (int i = 0; i < testSet.length; i++) { 119 | Svm.svm_predict_values(model, trainingSet[i], prob); 120 | result[i] = (maxConfidence - prob[0]) / Math.abs(maxConfidence); 121 | 122 | } 123 | return result; 124 | } 125 | 126 | @Override 127 | public double[] evaluate() { 128 | int l = values.length; 129 | double[] results; 130 | svm_problem problem; 131 | double[] labels; 132 | if (params.kernel_type == svm_parameter.RBF && automatic_gamma_learning) { 133 | params.gamma = RBF_Kernel.learnGamma(values); 134 | } 135 | 136 | labels = new double[l]; 137 | problem = new svm_problem(); 138 | problem.l = l; 139 | problem.x = values; 140 | problem.y = labels; 141 | model = Svm.svm_train(problem, params); 142 | results = computeAnomalyScore(model, values, values); 143 | return results; 144 | } 145 | 146 | public svm_model getModel() { 147 | return model; 148 | } 149 | 150 | } 151 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/kernel_based/RBF_Kernel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see y[j].index) { 80 | sum += y[j].value * y[j].value; 81 | ++j; 82 | } else { 83 | sum += x[i].value * x[i].value; 84 | ++i; 85 | } 86 | } 87 | 88 | while (i < xlen) { 89 | sum += x[i].value * x[i].value; 90 | ++i; 91 | } 92 | 93 | while (j < ylen) { 94 | sum += y[j].value * y[j].value; 95 | ++j; 96 | } 97 | 98 | double k_value = Math.exp(-sum/(alpha*alpha)); 99 | return new double[]{k_value, sum*k_value/(alpha*alpha*alpha)}; 100 | } 101 | 102 | public static double learnGamma(svm_node[][] values){ 103 | return learnGamma(values, estimateInitialAlpha(values)); 104 | } 105 | 106 | /** 107 | * Perform gradient ascent to maximize J= s^2 / (k_avg +eps) 108 | * where s^2= sum_i_0_{l-1}(sum_j_{i+1}_{l-1}((k(i,j)-k_avg)^2))/(number-1) 109 | * k_avg = sum_i_0_{l-1}(sum_j_{i+1}_{l-1}(k(i,j))/ number 110 | * dJ/d_alpha = sum_i_0_{l-1}(sum_j_{i+1}_{l-1}( 111 | * 2 * (k(i,j) - k_avg) * (k(i,j)' - k_avg') * (k_avg + eps) 112 | * - 113 | * k_avg' * (k(i,j) - k_avg) ^ 2 114 | * )/ 115 | * (number-1) * (k_avg + eps) ^ 2 116 | * @param values input values 117 | * @param eps 118 | * @return 119 | */ 120 | public static double learnGamma(svm_node[][] values, double initialAlpha){ 121 | int l= values.length; // size of dataset 122 | int number= l*(l-1)/2; // number of non-diagonal kernel elements 123 | double alpha = initialAlpha; // standard deviation of gaussian 124 | double learning_rate= initialAlpha*initialAlpha; // learning rate of gradient ascent 125 | double eps_conv = Math.pow(10,-3); // to test for convergence 126 | double eps = Math.pow(10,-12); 127 | double lastValue = 0; 128 | for(int f=0; f<100 ; f++){ 129 | double [] k; 130 | double k_avg = 0; // Average of non-diagonal entries in 131 | double k_d_avg = 0; // differential of k_avg relative to alpha 132 | for(int i=0; i< l ; i++) { 133 | for(int j= i+1; j< l ; j++) { 134 | k = RBF_Kernel.k_function(values[i], values[j], alpha); 135 | k_avg += k[0]; 136 | k_d_avg += k[1]; 137 | } 138 | } 139 | 140 | k_avg /= number; 141 | k_d_avg /= number; 142 | 143 | double diff = 0.0; // gradient of maximization objective 144 | double s_2=0.0; // variance of non-diagonal 145 | for(int i=0; i < l; i++) { 146 | for(int j = i+1; j< l ; j++) { 147 | k = RBF_Kernel.k_function(values[i], values[j], alpha); 148 | s_2+=(k[0]-k_avg)*(k[0]-k_avg); 149 | diff+=2*(k[0]-k_avg)*(k[1]-k_d_avg)*(k_avg+eps)-(k[0]-k_avg)*(k[0]-k_avg)* k_d_avg; 150 | } 151 | } 152 | diff/= (number-1)*(k_avg+ eps)*(k_avg+eps); 153 | s_2/= (number-1); 154 | 155 | 156 | double temp = learning_rate* diff; 157 | if(f==0){ 158 | while(alpha+ temp <0) 159 | { 160 | learning_rate/=3; 161 | temp/=3; 162 | } 163 | } 164 | else { 165 | // reduce the learning rate because it is too large. 166 | while(temp* lastValue < 0){ 167 | learning_rate/=3; 168 | temp = learning_rate * diff; 169 | } 170 | while(alpha+ temp <0) 171 | { 172 | learning_rate/=3; 173 | temp/=3; 174 | } 175 | } 176 | 177 | lastValue = temp; 178 | alpha += temp; 179 | 180 | if(debug) { 181 | System.out.println("maximized value at iteration "+ f+" "+ s_2/(k_avg+eps)); 182 | System.out.println("After iteration "+f+ " alpha = "+ alpha+ " gamma ="+ (1.0/(alpha*alpha))); 183 | } 184 | 185 | if(Math.abs(temp) < eps_conv){ 186 | break; 187 | } 188 | 189 | } 190 | if(debug) 191 | System.out.println("Returned Gamma "+ (1.0/(alpha*alpha))); 192 | return 1.0/(alpha*alpha); 193 | } 194 | 195 | public static double computeOptimizationObjective(svm_node[][] values, double eps, double gamma){ 196 | double alpha = Math.sqrt(1.0/gamma); 197 | double k_avg=0.0; 198 | double s2=0.0; 199 | int l = values.length; 200 | int number = l*(l-1)/2; 201 | double [] k; 202 | for(int i=0; i< l ; i++) { 203 | for(int j=i+1; j < l; j++) { 204 | k = k_function(values[i], values[j], alpha); 205 | k_avg+=k[0]; 206 | } 207 | } 208 | k_avg/=number; 209 | for(int i=0; i < l; i++) { 210 | for(int j = i + 1; j [] kdist = getKnnCollection().getKdistNeighbors(); 64 | DistanceMeasure measure = getMeasure(); 65 | 66 | // The array that will contain the average chaining distance 67 | double[] acDist = new double[n]; 68 | double[] cof = new double[n]; 69 | 70 | // attributes used for intermediate calculations 71 | 72 | // tempDistances[x] contains the minimum distance to connect the set 73 | // already connected to the element with index indicies[x] 74 | double[] tempDistances = new double[n]; 75 | int[] indicies = new int[n]; 76 | 77 | int j; 78 | int size; 79 | 80 | // calculating average chaining distance 81 | // The average chaining distance has the following formula 82 | // (summation from i=1 to cardinality of 2*(cardinality-i+1) * ei 83 | // )/(cardinality *(cardinality-1)) 84 | for (int i = 0; i < n; i++) { 85 | 86 | int cardinality = weight[i] - 1; 87 | 88 | size = k + kdist[i].size(); 89 | 90 | int minIndex = 0; 91 | 92 | for (j = 0; j < k; j++) { 93 | tempDistances[j] = neighborDistances[i][j]; 94 | indicies[j] = neighborIndicies[i][j]; 95 | cardinality += weight[neighborIndicies[i][j]]; 96 | } 97 | 98 | for (int x : kdist[i]) { 99 | tempDistances[j] = neighborDistances[i][k - 1]; 100 | indicies[j] = x; 101 | cardinality += weight[x]; 102 | j++; 103 | } 104 | 105 | 106 | 107 | double summation = 0; 108 | // weighSofar represents (cardinality -i+1) in the above formula 109 | int weightSofar = cardinality - weight[i] + 1; 110 | 111 | double denominator = cardinality * (cardinality + 1); 112 | 113 | for (int l = 0; l < size; l++) { 114 | // in case we have X duplicates of the same point then we will have 115 | // the weight of the current edge equal to 2*(weightSofar + (weightSofar-1)+....+ 116 | // (weightSofar-X+1)) which is equal to the summation of i from 117 | // i= weightSoFar-X+1 to weightSofar which is equal ( 118 | // weightSOFar*(weightSoFar+1) - 119 | // (weighSoFar-X)*(weighSofar-X+1)) let t1 120 | // =weightSOfar*(weightSofar+1) and t2 = 121 | // (weighSofar-X)*(weighSofar-X+1) then the weight of the 122 | // current edge should be equal to t1-t2 123 | 124 | // currentweight = t1 125 | int currentweight = weightSofar * (weightSofar + 1); 126 | 127 | // weighSofar = weightSofar -X 128 | weightSofar -= weight[indicies[minIndex]]; 129 | 130 | // currentweight= currentweight- t2 131 | currentweight -= weightSofar * (weightSofar + 1); 132 | 133 | summation += currentweight * tempDistances[minIndex]; 134 | 135 | // the index of the point just added to the set 136 | int currentIndex = indicies[minIndex]; 137 | 138 | // an index of -1 indicates that the point was already reached 139 | // and thus shouldn't need to be reached again 140 | indicies[minIndex] = -1; 141 | 142 | // This contains the index of the point that is the nearest 143 | // neighbor of the set from the set indicies[0..j] 144 | minIndex = -1; 145 | 146 | for (j = 0; j < size; j++) { 147 | if (indicies[j] == -1) 148 | continue; 149 | 150 | double temp = measure.calculateDistance( 151 | points[currentIndex], points[indicies[j]]); 152 | if (temp < tempDistances[j]) 153 | tempDistances[j] = temp; 154 | 155 | if (minIndex == -1 156 | || tempDistances[minIndex] > tempDistances[j] 157 | || (tempDistances[minIndex] == tempDistances[j] && indicies[j] < indicies[minIndex])) { 158 | // assigns the nearest neighbor if non exists or if 159 | // point i is nearer than the current nearest neighbor, 160 | // in case they have the same distance ties are broken 161 | // by taking the earlier index 162 | 163 | minIndex = j; 164 | } 165 | 166 | } 167 | 168 | } 169 | acDist[i] = summation / denominator; 170 | 171 | } 172 | 173 | // calculating cof 174 | for (int i = 0; i < n; i++) { 175 | 176 | int cardinality = weight[i] - 1; 177 | double summation = cardinality * acDist[i]; 178 | for (j = 0; j < k; j++) { 179 | int currentIndex = neighborIndicies[i][j]; 180 | summation += weight[currentIndex] * acDist[currentIndex]; 181 | cardinality += weight[currentIndex]; 182 | } 183 | for (int x : kdist[i]) { 184 | summation += weight[x] * acDist[x]; 185 | cardinality += weight[x]; 186 | } 187 | cof[i] = cardinality * acDist[i] / summation; 188 | 189 | } 190 | 191 | return cof; 192 | 193 | } 194 | 195 | /** 196 | * The method is called to initialize the evaluation process. 197 | */ 198 | @Override 199 | public double[] evaluate() { 200 | super.evaluate(); 201 | double[] cof = cof(); 202 | return cof; 203 | } 204 | 205 | @Override 206 | public double[] reEvaluate(int step) { 207 | getKnnCollection().shrink(step); 208 | double[] cof = cof(); 209 | return cof; 210 | 211 | } 212 | 213 | /** Method is overridden to avoid doing extra work **/ 214 | @Override 215 | protected void setAnomalyScore(int i, double[] neighBorDistanceSoFar, 216 | int[] neighBorIndiciesSoFar, int numberOfNeighbors) { 217 | 218 | } 219 | 220 | } 221 | 222 | class Node implements Comparable { 223 | 224 | int index; 225 | double distance; 226 | 227 | public Node(int index, double distance) { 228 | this.index = index; 229 | this.distance = distance; 230 | } 231 | 232 | @Override 233 | public int compareTo(Node arg0) { 234 | if (distance < arg0.distance) 235 | return -1; 236 | if (distance > arg0.distance) 237 | return 1; 238 | if (index < arg0.index) 239 | return -1; 240 | if (index > arg0.index) 241 | return 1; 242 | return 0; 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/INFLOEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see [] kdistNeighbors = getKnnCollection().getKdistNeighbors(); 67 | int n = getN(); 68 | double[] inflo = new double[n]; 69 | 70 | // for intermediate work 71 | int[] cardinality = new int[n]; 72 | double[] summationDensities = new double[n]; 73 | 74 | for (int i = 0; i < n; i++) { 75 | int end = neighborNumbers[i]; 76 | double kdist = distances[i][end - 1]; 77 | 78 | cardinality[i]+= weight[i]-1; 79 | summationDensities[i]+= (weight[i]-1)* 1/kdist; 80 | for (int j = 0; j < end; j++) { 81 | int currentIndex = neighbors[i][j]; 82 | int currentWeight = weight[currentIndex]; 83 | cardinality[i] += currentWeight; 84 | double currentDistance = distances[i][j]; 85 | double currentKdist = distances[currentIndex][neighborNumbers[currentIndex] - 1]; 86 | 87 | summationDensities[i] += currentWeight * 1.0 / currentKdist; 88 | if (currentDistance > currentKdist) { 89 | cardinality[currentIndex] += weight[i]; 90 | summationDensities[currentIndex] += weight[i] * 1.0 / kdist; 91 | 92 | } 93 | } 94 | 95 | for(int currentIndex: kdistNeighbors[i]) 96 | { 97 | int currentWeight= weight[currentIndex]; 98 | cardinality[i] += currentWeight; 99 | double currentDistance = distances[i][neighborNumbers[i]-1]; 100 | double currentKdist = distances[currentIndex][neighborNumbers[currentIndex] - 1]; 101 | 102 | summationDensities[i] += currentWeight* 1.0 / currentKdist; 103 | if (currentDistance > currentKdist) { 104 | cardinality[currentIndex] += weight[i]; 105 | summationDensities[currentIndex] += weight[i]* 1.0 / kdist; 106 | 107 | } 108 | 109 | } 110 | } 111 | 112 | for (int i = 0; i < n; i++) { 113 | int end = neighborNumbers[i]; 114 | double kdist = distances[i][end - 1]; 115 | inflo[i] = summationDensities[i] * kdist / cardinality[i]; 116 | 117 | } 118 | 119 | return inflo; 120 | 121 | } 122 | 123 | @Override 124 | protected void setAnomalyScore(int i, double[] neighBorDistanceSoFar, 125 | int[] neighBorIndiciesSoFar, int numberOfNeighbors) { 126 | 127 | } 128 | 129 | } 130 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/KNNCollection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see [] kdistNeighbors; 70 | 71 | /** 72 | * The weight of the points. Which corresponds to the number of elements in 73 | * the example set that have these coordinates. 74 | **/ 75 | private int[] weight; 76 | 77 | @SuppressWarnings("unchecked") 78 | public KNNCollection(int n, int k, double[][] points, int[] weight) { 79 | this.n = n; 80 | this.k = k; 81 | this.points = points; 82 | this.weight = weight; 83 | 84 | neighborIndicies = new int[n][k]; 85 | neighborDistances = new double[n][k]; 86 | numberOfNeighbors = new int[n]; 87 | kdistNeighbors = new LinkedList[n]; 88 | 89 | for (int i = 0; i < n; i++) 90 | kdistNeighbors[i] = new LinkedList(); 91 | 92 | } 93 | 94 | public int getK() { 95 | return k; 96 | } 97 | 98 | public LinkedList[] getKdistNeighbors() { 99 | return kdistNeighbors; 100 | } 101 | 102 | public int getN() { 103 | return n; 104 | } 105 | 106 | public double[][] getNeighBorDistanceSoFar() { 107 | return neighborDistances; 108 | } 109 | 110 | public int[][] getNeighBorIndiciesSoFar() { 111 | return neighborIndicies; 112 | } 113 | 114 | public int[] getNumberOfNeighborsSoFar() { 115 | return numberOfNeighbors; 116 | } 117 | 118 | public double[][] getPoints() { 119 | return points; 120 | } 121 | 122 | public int[] getWeight() { 123 | return weight; 124 | } 125 | 126 | public void shrink(int shrinkBy){ 127 | for (int i=0; i< shrinkBy; i++) 128 | shrink(); 129 | } 130 | /** 131 | * This method shrinks the kNNCollection to k-1 132 | */ 133 | public void shrink() { 134 | k--; 135 | if (k == 0) 136 | return; 137 | for (int index = 0; index < n; index++) { 138 | // reduce the number of distinct neighbors by 1 139 | numberOfNeighbors[index]--; 140 | // removed index is equal to the old numberofNeighbors -1 which is equal to the new number of neighbors 141 | int removedIndex = numberOfNeighbors[index]; 142 | int newLast = removedIndex - 1; 143 | if (neighborDistances[index][newLast] == neighborDistances[index][removedIndex]) { 144 | kdistNeighbors[index] 145 | .add(neighborIndicies[index][removedIndex]); 146 | } else 147 | kdistNeighbors[index].clear(); 148 | } 149 | 150 | } 151 | 152 | /** 153 | * This method updates the KNNcollection by adding the currentDistance and 154 | * point2 to the set of the nearest neighbors of point1 if applicable. 155 | * 156 | * @param point1 157 | * The point we are updating the neighborhood set for. 158 | * @param point2 159 | * @param currentDistance 160 | * The distance between point1 and point2. 161 | */ 162 | public void updateNearestNeighbors(int point1, int point2, 163 | double currentDistance) { 164 | // if this is the first neighbor then add it to the neighborhood set. 165 | if (numberOfNeighbors[point1] == 0) { 166 | neighborIndicies[point1][0] = point2; 167 | neighborDistances[point1][0] = currentDistance; 168 | numberOfNeighbors[point1]++; 169 | return; 170 | } 171 | 172 | int last = numberOfNeighbors[point1] - 1; 173 | 174 | // if the number of neighbors is less than k or the currentDistance is 175 | // less than the max distance in the neighborhood so far then add point2 176 | // to the set 177 | 178 | if (neighborDistances[point1][last] >= currentDistance 179 | || numberOfNeighbors[point1] < k) { 180 | 181 | boolean flag = true; 182 | if (numberOfNeighbors[point1] < k) 183 | numberOfNeighbors[point1]++; 184 | else { 185 | 186 | if (neighborDistances[point1][last] == currentDistance) { 187 | // if the current distance as the maximum distance then the 188 | // point should be added to the nearest neighborhood set 189 | kdistNeighbors[point1].add(point2); 190 | flag = false; 191 | } else { 192 | 193 | if (last > 0 194 | && neighborDistances[point1][last - 1] == neighborDistances[point1][last]) 195 | // if the maximum distance is the same as the second 196 | // maximum distace then last point which is going to 197 | // removed should be added to the list. 198 | kdistNeighbors[point1] 199 | .add(neighborIndicies[point1][last]); 200 | else 201 | // else the kdist neighbors are reset. 202 | // kdistNeighbors[point1].empty(); 203 | kdistNeighbors[point1].clear(); 204 | } 205 | } 206 | 207 | // Adding point2 to the neighborhood in the appropriate position 208 | // using insertion sort. 209 | 210 | if (flag) { 211 | int i = Math.min(last, k - 2); 212 | for (; i >= 0; i--) 213 | if (neighborDistances[point1][i] > currentDistance) { 214 | neighborDistances[point1][i + 1] = neighborDistances[point1][i]; 215 | neighborIndicies[point1][i + 1] = neighborIndicies[point1][i]; 216 | } else 217 | break; 218 | 219 | neighborDistances[point1][i + 1] = currentDistance; 220 | neighborIndicies[point1][i + 1] = point2; 221 | } 222 | } 223 | } 224 | public static KNNCollection clone(KNNCollection a){ 225 | KNNCollection ret = new KNNCollection(a.n,a.k,a.points,a.weight); 226 | ret.neighborIndicies = a.neighborIndicies.clone(); 227 | ret.neighborDistances = a.neighborDistances.clone(); 228 | ret.numberOfNeighbors = a.numberOfNeighbors.clone(); 229 | ret.kdistNeighbors = a.kdistNeighbors.clone(); 230 | return ret; 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/KNNCollectionModel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see ids; 79 | /** 80 | * HashMap containing the mapping of the example Ids to the index of the 81 | * example which will be used in the further processing 82 | */ 83 | private HashMap idMap; 84 | 85 | public AbstractAnomalyDetectionOperator(OperatorDescription description) { 86 | super(description); 87 | // Adding the outlier attribute to the meta data 88 | getTransformer().addRule( 89 | new PassThroughRule(exampleSetInput, exampleSetOutput, false) { 90 | @Override 91 | public MetaData modifyMetaData(MetaData metaData) { 92 | if (metaData instanceof ExampleSetMetaData) { 93 | ExampleSetMetaData exampleSetMetaData = (ExampleSetMetaData) metaData; 94 | AttributeMetaData amd = new AttributeMetaData( 95 | Attributes.OUTLIER_NAME, Ontology.REAL, 96 | Attributes.OUTLIER_NAME); 97 | exampleSetMetaData.addAttribute(amd); 98 | return exampleSetMetaData; 99 | } else { 100 | return metaData; 101 | } 102 | 103 | } 104 | }); 105 | getTransformer().addPassThroughRule(exampleSetInput, originalOutput); 106 | 107 | } 108 | 109 | /** 110 | * The method performs the common tasks for most anomaly detection operators 111 | * so that doWork(ExampleSet exampleSet, Attributes attributes, double[][] 112 | * points) is enough to be overridden in the subclasses to do the 113 | * functionality of the operator 114 | */ 115 | @Override 116 | public void doWork() throws OperatorException { 117 | ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); 118 | 119 | int type = DataRowFactory.TYPE_DOUBLE_ARRAY; 120 | if (exampleSet.getExampleTable() instanceof MemoryExampleTable) { 121 | DataRowReader reader = exampleSet.getExampleTable() 122 | .getDataRowReader(); 123 | if (reader.hasNext()) 124 | type = reader.next().getType(); 125 | } 126 | ExampleSet resultSet = null; 127 | if (type >= 0) 128 | resultSet = MaterializeDataInMemory.materializeExampleSet( 129 | exampleSet, type); 130 | else 131 | resultSet = (ExampleSet) exampleSet.clone(); 132 | Attributes attributes = resultSet.getAttributes(); 133 | 134 | Attribute anomalyScore = initializeAnomalyScore(resultSet, attributes); 135 | double[][] points = initializePoints(resultSet, attributes); 136 | double[] res = doWork(resultSet, attributes, points); 137 | storeResult(resultSet, res, anomalyScore); 138 | originalOutput.deliver(exampleSet); 139 | exampleSetOutput.deliver(resultSet); 140 | 141 | } 142 | 143 | /** 144 | * The method that should be implemented by the subclasses. 145 | * 146 | * @param exampleSet 147 | * example set 148 | * @param attributes 149 | * the attributes of the example set 150 | * @param points 151 | * the array containing the points in the example set 152 | * @return The result array that contains the anomaly score. 153 | * 154 | * @throws OperatorException 155 | */ 156 | public double[] doWork(ExampleSet exampleSet, Attributes attributes, 157 | double[][] points) throws OperatorException { 158 | return null; 159 | 160 | } 161 | 162 | public InputPort getExampleSetInput() { 163 | return exampleSetInput; 164 | } 165 | 166 | public OutputPort getExampleSetOutput() { 167 | return exampleSetOutput; 168 | } 169 | 170 | public HashMap getIdMap() { 171 | return idMap; 172 | } 173 | 174 | public ArrayList getIds() { 175 | return ids; 176 | } 177 | 178 | public OutputPort getOriginalOutput() { 179 | return originalOutput; 180 | } 181 | 182 | /** 183 | * Initializes the outlier attribute 184 | * 185 | * @param exampleSet 186 | * @param attributes 187 | * @return anomalyScore Attribute 188 | */ 189 | public Attribute initializeAnomalyScore(ExampleSet exampleSet, 190 | Attributes attributes) { 191 | Attribute anomalyScore = AttributeFactory.createAttribute( 192 | Attributes.OUTLIER_NAME, Ontology.REAL); 193 | exampleSet.getExampleTable().addAttribute(anomalyScore); 194 | attributes.setOutlier(anomalyScore); 195 | return anomalyScore; 196 | } 197 | 198 | /** 199 | * Initializes the points from the example set. 200 | * 201 | * @param exampleSet 202 | * the input example set 203 | * @param attributes 204 | * the attributes of the exampleSet 205 | * @return points the initialized points from the exampleSet 206 | */ 207 | public double[][] initializePoints(ExampleSet exampleSet, 208 | Attributes attributes) { 209 | double[][] points = new double[exampleSet.size()][attributes.size()]; 210 | int currentExample = 0; 211 | Attribute idAttribute= exampleSet.getAttributes().getId(); 212 | 213 | ids = new ArrayList(); 214 | idMap = new HashMap(); 215 | for (Example example : exampleSet) { 216 | int i = 0; 217 | Object id; 218 | if(idAttribute!=null){ 219 | if(idAttribute.isNominal()){ 220 | NominalMapping nominalMapping = idAttribute.getMapping(); 221 | id = nominalMapping.mapIndex((int)(example.getValue(idAttribute))); 222 | 223 | } 224 | else { 225 | id = example.getValue(idAttribute); 226 | } 227 | ids.add(id); 228 | idMap.put(id, currentExample); 229 | } 230 | for (Attribute currentAttribute : attributes) { 231 | 232 | points[currentExample][i++] = example 233 | .getValue(currentAttribute); 234 | } 235 | currentExample++; 236 | } 237 | return points; 238 | } 239 | 240 | public void storeResult(ExampleSet exampleSet, double[] res, 241 | Attribute anomalyScore) { 242 | int current = 0; 243 | for (Example example : exampleSet) { 244 | example.setValue(anomalyScore, res[current++]); 245 | 246 | } 247 | } 248 | 249 | } 250 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/cluster_based/AbstractClusteringAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see = 0) 132 | resultSet = MaterializeDataInMemory.materializeExampleSet( 133 | exampleSet, type); 134 | else 135 | resultSet = (ExampleSet) exampleSet.clone(); 136 | Attributes attributes = resultSet.getAttributes(); 137 | 138 | Attribute anomalyScore = initializeAnomalyScore(resultSet, attributes); 139 | double[][] points = initializePoints(resultSet, attributes); 140 | preprocessing(exampleSet, attributes, points); 141 | double[] res = doWork(resultSet, attributes, points); 142 | storeResult(resultSet, res, anomalyScore); 143 | getOriginalOutput().deliver(exampleSet); 144 | getExampleSetOutput().deliver(resultSet); 145 | clusterModelOutput.deliver(clusterModelInput.getData(ClusterModel.class)); 146 | 147 | 148 | } 149 | 150 | 151 | 152 | /** 153 | * Initializes the instance variables. 154 | * 155 | * @param exampleSet 156 | * @param attributes 157 | * @param points 158 | * @throws OperatorException 159 | */ 160 | public void preprocessing(ExampleSet exampleSet, Attributes attributes, 161 | double[][] points) throws OperatorException { 162 | 163 | ClusterModel model = clusterModelInput.getData(ClusterModel.class); 164 | Object[] clusters = model.getClusters().toArray(); 165 | int numberOfClusters = clusters.length; 166 | 167 | clusterSize = new int[numberOfClusters ]; 168 | int n = points.length; 169 | this.logNote("cluster number ="+ numberOfClusters); 170 | int attributeSize = points[0].length; 171 | belongsToCluster = new int[n]; 172 | Arrays.fill(belongsToCluster, -1); 173 | centriods = new double[numberOfClusters][attributeSize]; 174 | 175 | HashMap idMap = getIdMap(); 176 | for (int i = 0; i < numberOfClusters ; i++) { 177 | clusterSize[i] = ((Cluster) clusters[i]).getNumberOfExamples(); 178 | Collection exampleIds = ((Cluster) clusters[i]) 179 | .getExampleIds(); 180 | for (Object id : exampleIds) { 181 | if(!idMap.containsKey(id)){ 182 | // Id present in the cluster model and not in the clustered set 183 | throw new OperatorException("Incompatible Ids between the cluster model and clustered set."); 184 | } 185 | int mapping = idMap.get(id); 186 | belongsToCluster[mapping] = i; 187 | for (int j = 0; j < attributeSize; j++) 188 | centriods[i][j] += points[mapping][j]; 189 | 190 | } 191 | for (int j = 0; j < attributeSize; j++) 192 | centriods[i][j] /= clusterSize[i]; 193 | 194 | 195 | } 196 | if(model instanceof CentroidClusterModel) 197 | { 198 | List cent= ((CentroidClusterModel) model).getCentroids(); 199 | for (int i=0; i< numberOfClusters; i++) 200 | centriods[i]= cent.get(i).getCentroid(); 201 | } 202 | for (int i=0; i< n ; i++){ 203 | if(belongsToCluster[i]==-1){ 204 | double id=exampleSet.getExample(i).getId(); 205 | if(idMap.containsKey(id)){ 206 | int idMapped = idMap.get(id); 207 | for (int l=0; l< points[i].length; l++) 208 | { 209 | if(points[i][l]!= points[idMapped][l]) 210 | throw new OperatorException("Incompatible Ids between the cluster model and the clustered set. The clustered set might contain duplicate ids."); 211 | } 212 | belongsToCluster[i]= belongsToCluster[idMapped]; 213 | } 214 | else throw new OperatorException("Incompatible Ids between the cluster model and the clustered set. The clustered set might contain duplicate ids."); 215 | 216 | } 217 | } 218 | 219 | 220 | } 221 | @Override 222 | public List getParameterTypes() { 223 | List types= super.getParameterTypes(); 224 | List distancetypes = DistanceMeasures.getParameterTypes(this); 225 | types.addAll(distancetypes); 226 | return types; 227 | } 228 | 229 | } 230 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/cluster_based/CBLOFAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see getParameterTypes() { 96 | List types = super.getParameterTypes(); 97 | types 98 | .add(new ParameterTypeDouble( 99 | PARAMETER_ALPHA, 100 | "This parameter specifies the percentage of the data set that is expected to be normal", 101 | 0, 100, 90, false)); 102 | types 103 | .add(new ParameterTypeDouble( 104 | PARAMETER_BETA, 105 | "This parameter specifies the minimum ratio between the size of a large cluster and a small cluster", 106 | 1, Integer.MAX_VALUE, 5, false)); 107 | types 108 | .add(new ParameterTypeBoolean( 109 | PARAMETER_WEIGHTING, 110 | "Uses the cluster size as a weight factor as proposed by the original publication.", 111 | true)); 112 | 113 | 114 | return types; 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/cluster_based/LDCOFAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see getParameterTypes() { 92 | List types = super.getParameterTypes(); 93 | 94 | /**/ 95 | types 96 | .add(new ParameterTypeBoolean( 97 | PARAMETER_LIKE_CBLOF, 98 | "The division into large and small clusters will be implemented in a manner similar to CBLOF.", 99 | false, false)); 100 | ParameterType type = new ParameterTypeDouble(PARAMETER_ALPHA, 101 | "percentage of normal data", 0, 100, 90); 102 | type.registerDependencyCondition(new BooleanParameterCondition(this, 103 | PARAMETER_LIKE_CBLOF, true, true)); 104 | types.add(type); 105 | type = new ParameterTypeDouble(PARAMETER_BETA, 106 | "the minimum ratio between large and small clusters", 1, 107 | Integer.MAX_VALUE, 5); 108 | type.registerDependencyCondition(new BooleanParameterCondition(this, 109 | PARAMETER_LIKE_CBLOF, true, true)); 110 | types.add(type); 111 | type = new ParameterTypeDouble( 112 | PARAMETER_GAMMA, 113 | "ratio between the maximum size of small clusters and the average cluster size", 114 | 0, 1, 0.1); 115 | type.registerDependencyCondition(new BooleanParameterCondition(this,PARAMETER_LIKE_CBLOF, true, false)); 116 | 117 | types.add(type); 118 | /**/ 119 | return types; 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/evaluation/ROCOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer Kuenstliche 5 | * Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify it under the 8 | * terms of the GNU Affero General Public License as published by the Free 9 | * Software Foundation, either version 3 of the License, or (at your option) any 10 | * later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see count = new HashMap(); 69 | double exampleSize = exampleSetInput.getData(ExampleSet.class).size(); 70 | 71 | for (Object s : initializeLabels(exampleSetInput.getData(ExampleSet.class))) { 72 | int val = 0; 73 | if (count.containsKey(s)) 74 | val = count.get(s); 75 | val++; 76 | count.put(s, val); 77 | } 78 | 79 | Object less = null; 80 | for (Object k : count.keySet()) { 81 | if (less == null || count.get(less) > count.get(k)) { 82 | less = k; 83 | } 84 | } 85 | if (!less.equals(getParameterAsString(PARAMETER_LABEL))) { 86 | this.logWarning("Specified outlier class " + getParameterAsString(PARAMETER_LABEL) + " is not smallest class (" + (count.get(less) / exampleSize) + "%)"); 87 | } 88 | else { 89 | this.logNote("Outlier class " + less + " is with " + (count.get(less) / exampleSize) + "% ok"); 90 | } 91 | } 92 | /** 93 | * Adds the performance criteria as plottable values 94 | */ 95 | public void addPerformanceValue(final String name, String description) { 96 | addValue(new ValueDouble(name, description) { 97 | @Override 98 | public double getDoubleValue() { 99 | if (currentPerformanceVector == null) 100 | return Double.NaN; 101 | PerformanceCriterion c = currentPerformanceVector.getCriterion(name); 102 | 103 | if (c != null) { 104 | return c.getAverage(); 105 | } else { 106 | return Double.NaN; 107 | } 108 | } 109 | }); 110 | } 111 | private List getCriteria() { 112 | List performanceCriteria = new LinkedList(); 113 | performanceCriteria.add( new MultiClassificationPerformance(MultiClassificationPerformance.ACCURACY)); 114 | performanceCriteria.add(new ROCPerformanceVector("AUC", auc)); 115 | return performanceCriteria; 116 | } 117 | 118 | 119 | public ROCOperator(OperatorDescription description) { 120 | super(description); 121 | performanceInput.addPrecondition(new SimplePrecondition(performanceInput, new MetaData(PerformanceVector.class), false)); 122 | getTransformer().addRule(new PassThroughOrGenerateRule(performanceInput, performanceOutput, new MetaData(PerformanceVector.class))); 123 | getTransformer().addRule(new GenerateNewMDRule(performanceOutput, PerformanceVector.class)); 124 | getTransformer().addPassThroughRule(exampleSetInput, exampleSetOutput); 125 | getTransformer().addGenerationRule(rocExampleSet, ExampleSet.class); 126 | List criteria = getCriteria(); 127 | for (PerformanceCriterion criterion : criteria) { 128 | addPerformanceValue(criterion.getName(), criterion.getDescription()); 129 | } 130 | 131 | addValue(new ValueDouble("performance", "The last performance (main criterion).") { 132 | @Override 133 | public double getDoubleValue() { 134 | if (currentPerformanceVector != null) 135 | return currentPerformanceVector.getMainCriterion().getAverage(); 136 | else 137 | return Double.NaN; 138 | } 139 | }); 140 | 141 | } 142 | 143 | @Override 144 | public void doWork() throws OperatorException { 145 | currentPerformanceVector = performanceInput.getDataOrNull(PerformanceVector.class); 146 | if (currentPerformanceVector == null) { 147 | currentPerformanceVector = new PerformanceVector(); 148 | } 149 | 150 | PerformanceCriterion perCrit; 151 | preSetOutlierLable(); 152 | ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); 153 | Object[] labels = initializeLabels(exampleSet); 154 | 155 | double[] outliers = getOutliers(exampleSet); 156 | 157 | ROCEvaluator evaluator = new ROCEvaluator(); 158 | String outlierString = getParameterAsString(PARAMETER_LABEL); 159 | ExampleSet roc = (ExampleSet) ExampleSetFactory.createExampleSet(evaluator.evaluate(outlierString, labels, outliers)); 160 | ExampleSet pre = (ExampleSet) ExampleSetFactory.createExampleSet(evaluator.pre); 161 | String norm = evaluator.getNormalClass(); 162 | roc.getAttributes().get("att1").setName("false_positive_rate"); 163 | roc.getAttributes().get("att2").setName("true_positive_rate"); 164 | pre.getAttributes().get("att1").setName("precision"); 165 | pre.getAttributes().get("att2").setName("recall"); 166 | rocExampleSet.deliver(roc); 167 | preOutput.deliver(pre); 168 | auc = evaluator.auc; 169 | Object[][] auc_ = { { auc } }; 170 | Object[] labels2 = { "AUC" }; 171 | ExampleSet a = (ExampleSet) ExampleSetFactory.createExampleSet(auc_, labels2); 172 | 173 | perCrit = new ROCPerformanceVector("AUC", auc); 174 | currentPerformanceVector.addCriterion(perCrit); 175 | currentPerformanceVector.setMainCriterionName("AUC"); 176 | 177 | Attribute pediction; 178 | int i; 179 | pediction = AttributeFactory.createAttribute("prediction", Ontology.NOMINAL); 180 | exampleSet.getExampleTable().addAttribute(pediction); 181 | exampleSet.getAttributes().setPredictedLabel(pediction); 182 | i = 0; 183 | String s = ""; 184 | for (Example example : exampleSet) { 185 | if (evaluator.out.contains(i)) 186 | s = getParameterAsString(PARAMETER_LABEL); 187 | else 188 | s = norm; 189 | example.setValue(pediction, s); 190 | i++; 191 | } 192 | 193 | MultiClassificationPerformance test = new MultiClassificationPerformance(MultiClassificationPerformance.ACCURACY); 194 | test.startCounting(exampleSet, false); 195 | for (Example e : exampleSet) 196 | test.countExample(e); 197 | currentPerformanceVector.addCriterion(test); 198 | 199 | exampleSetOutput.deliver(exampleSet); 200 | performanceOutput.deliver(currentPerformanceVector); 201 | aucOutput.deliver(a); 202 | } 203 | 204 | public double[] getOutliers(ExampleSet exampleSet) { 205 | double[] outliers = new double[exampleSet.size()]; 206 | int currentExample = 0; 207 | Attribute attribute = exampleSet.getAttributes().getOutlier(); 208 | for (Example example : exampleSet) { 209 | outliers[currentExample++] = example.getValue(attribute); 210 | } 211 | return outliers; 212 | 213 | } 214 | 215 | public Object[] initializeLabels(ExampleSet exampleSet) { 216 | Attribute label = exampleSet.getAttributes().getLabel(); 217 | Object[] labels = new Object[exampleSet.size()]; 218 | int currentExample = 0; 219 | if (label.isNumerical()) 220 | for (Example example : exampleSet) { 221 | labels[currentExample++] = example.getValue(label); 222 | } 223 | else { 224 | NominalMapping nominalMapping = label.getMapping(); 225 | for (Example example : exampleSet) { 226 | labels[currentExample++] = nominalMapping.mapIndex((int) (example.getValue(label))); 227 | } 228 | 229 | } 230 | 231 | return labels; 232 | 233 | } 234 | 235 | @Override 236 | public List getParameterTypes() { 237 | List types = super.getParameterTypes(); 238 | types.add(new ParameterTypeString(PARAMETER_LABEL, "The value that define the anomalous class for the attribute with the role \"label\". There should only be one label for the normal class and one label for the outlier class, if multiple labels exist, please rename them first.", "", false)); 239 | return types; 240 | } 241 | 242 | } 243 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/evaluation/ROCPerformanceVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2012 by Deutsches Forschungszentrum fuer Kuenstliche 5 | * Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify it under the 8 | * terms of the GNU Affero General Public License as published by the Free 9 | * Software Foundation, either version 3 of the License, or (at your option) any 10 | * later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see getParameterTypes() { 78 | LinkedList types = new LinkedList(); 79 | types.add(new ParameterTypeInt( 80 | PARAMETER_LEVEL_DIFFERENCE, 81 | "The difference in number of levels between sampling & counting, &alpha = 2 ^ -L", 82 | 1, Integer.MAX_VALUE,4,false)); 83 | types.add(new ParameterTypeInt( 84 | PARAMETER_TREE_DEPTH, 85 | "Number of levels in the quadtree", 86 | 1, Integer.MAX_VALUE,10,false)); 87 | types.add(new ParameterTypeInt( 88 | PARAMETER_GRIDS_NUM, 89 | "Total number of different grids", 90 | 1,Integer.MAX_VALUE,20,false)); 91 | 92 | types.add(new ParameterTypeInt( 93 | PARAMETER_NMIN, 94 | "The minimum number of neighbors in the sampling neighborhood.", 95 | 1,Integer.MAX_VALUE,20,false)); 96 | 97 | types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); 98 | types.add(new ParameterTypeBoolean( 99 | PARAMETER_PARALLELIZE_EVALUATION_PROCESS, 100 | "Specifies that evaluation process should be performed in parallel.", 101 | false,false)); 102 | 103 | ParameterType numberOfThreadsType = new ParameterTypeInt( 104 | PARAMETER_NUMBER_OF_THREADS, 105 | "Specifies the number of threads for execution.", 106 | 1,Integer.MAX_VALUE,Runtime.getRuntime().availableProcessors(),false); 107 | 108 | numberOfThreadsType.registerDependencyCondition(new BooleanParameterCondition(this, 109 | PARAMETER_PARALLELIZE_EVALUATION_PROCESS, true, true)); 110 | 111 | types.add(numberOfThreadsType); 112 | 113 | types.addAll(DistanceMeasures.getParameterTypes(this)); 114 | return types; 115 | 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/AbstractNearestNeighborBasedAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see [] mapping; 56 | 57 | 58 | /** The number of points assigned to that index **/ 59 | private int[] weight; 60 | 61 | 62 | 63 | public AbstractNearestNeighborBasedAnomalyDetectionOperator( 64 | OperatorDescription description) { 65 | super(description); 66 | } 67 | 68 | @Override 69 | public void doWork() throws OperatorException { 70 | ExampleSet exampleSet = getExampleSetInput().getData(ExampleSet.class); 71 | this.logNote(getName()); 72 | int type = DataRowFactory.TYPE_DOUBLE_ARRAY; 73 | if (exampleSet.getExampleTable() instanceof MemoryExampleTable) { 74 | DataRowReader reader = exampleSet.getExampleTable() 75 | .getDataRowReader(); 76 | if (reader.hasNext()) 77 | type = reader.next().getType(); 78 | } 79 | ExampleSet resultSet = null; 80 | if (type >= 0) 81 | resultSet = MaterializeDataInMemory.materializeExampleSet( 82 | exampleSet, type); 83 | else 84 | resultSet = (ExampleSet) exampleSet.clone(); 85 | Attributes attributes = resultSet.getAttributes(); 86 | 87 | Attribute anomalyScore = initializeAnomalyScore(resultSet, attributes); 88 | double[][] points = initializePoints(resultSet, attributes); 89 | preprocessing(points, exampleSet.size()); 90 | this.logNote("Number of distinct records "+ distinctPoints.length); 91 | double[] res = doWork(resultSet, attributes, distinctPoints, weight); 92 | storeResult(resultSet, res, anomalyScore); 93 | getExampleSetOutput().deliver(resultSet); 94 | getOriginalOutput().deliver(exampleSet); 95 | 96 | 97 | } 98 | 99 | public double[] doWork(ExampleSet exampleSet, Attributes attributes, 100 | double[][] points, int[] weight) throws OperatorException { 101 | return null; 102 | } 103 | 104 | @Override 105 | public double[][] initializePoints(ExampleSet exampleSet, 106 | Attributes attributes) { 107 | double[][] points = new double[exampleSet.size()][attributes.size()]; 108 | int currentExample = 0; 109 | 110 | for (Example example : exampleSet) { 111 | int i = 0; 112 | for (Attribute currentAttribute : attributes) { 113 | points[currentExample][i++] = example 114 | .getValue(currentAttribute); 115 | } 116 | currentExample++; 117 | } 118 | return points; 119 | } 120 | 121 | @SuppressWarnings("unchecked") 122 | public double[][] preprocessing(double[][] points, int n) { 123 | 124 | int distinctPointsnumber = 1; 125 | LinkedList[] lists = new LinkedList[n]; 126 | Point[] orderedPoints = new Point[n]; 127 | for (int i = 0; i < n; i++) { 128 | orderedPoints[i] = new Point(i, points[i]); 129 | } 130 | Arrays.sort(orderedPoints); 131 | int lastIndex = 0; 132 | int inserIn = orderedPoints[0].index; 133 | 134 | lists[inserIn] = new LinkedList(); 135 | lists[inserIn].add(new Integer(orderedPoints[0].index)); 136 | 137 | for (int i = 1; i < n; i++) { 138 | if (orderedPoints[lastIndex].compareTo(orderedPoints[i]) != 0) { 139 | lastIndex = i; 140 | inserIn = orderedPoints[i].index; 141 | lists[inserIn] = new LinkedList(); 142 | distinctPointsnumber++; 143 | } 144 | 145 | lists[inserIn].add(new Integer(orderedPoints[i].index)); 146 | 147 | } 148 | 149 | 150 | distinctPoints = new double[distinctPointsnumber][]; 151 | mapping = new LinkedList[distinctPointsnumber]; 152 | weight = new int[distinctPointsnumber]; 153 | int j = 0; 154 | for (int i = 0; i < n; i++) { 155 | if (lists[i] != null) { 156 | mapping[j] = lists[i]; 157 | weight[j] = lists[i].size(); 158 | distinctPoints[j++] = points[i]; 159 | if (j == distinctPointsnumber) 160 | break; 161 | } 162 | } 163 | 164 | return distinctPoints; 165 | } 166 | 167 | @Override 168 | public void storeResult(ExampleSet exampleSet, double[] res, 169 | Attribute anomalyScore) { 170 | for (int i = 0; i < res.length; i++) { 171 | for (int id : mapping[i]) 172 | exampleSet.getExample(id).setValue(anomalyScore, res[i]); 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/COFAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 1) { 69 | if (k >= n) { 70 | this.logWarning("Setting " + KNNAnomalyDetectionOperator.PARAMETER_K + " to #Datapoints-1."); 71 | k = n-1; 72 | //this.setParameter(KNNAnomalyDetectionOperator.PARAMETER_K, (n-1)+""); 73 | } 74 | 75 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS); 76 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS); 77 | 78 | readModel(n,k,points,weight,measure); 79 | COFEvaluator evaluator = new COFEvaluator(knnCollection, measure,parallel, numberOfThreads, this,n,k,newCollection); 80 | ret = evaluator.evaluate(); 81 | model = new KNNCollectionModel(exampleSet,knnCollection,measure); 82 | modelOutput.deliver(model); 83 | knnCollection = null; 84 | } 85 | return ret; 86 | 87 | } 88 | 89 | @Override 90 | public List getParameterTypes() { 91 | List types = super.getParameterTypes(); 92 | ParameterType type=types.get(1); 93 | types.remove(type); 94 | return types; 95 | } 96 | 97 | } 98 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/INFLOAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 1) { 67 | if (k >= n) { 68 | this.logWarning("Setting " + KNNAnomalyDetectionOperator.PARAMETER_K + " to #Datapoints-1."); 69 | k = n-1; 70 | //this.setParameter(KNNAnomalyDetectionOperator.PARAMETER_K, (n-1)+""); 71 | } 72 | 73 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS); 74 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS); 75 | 76 | readModel(n,k,points,weight,measure); 77 | INFLOEvaluator evaluator = new INFLOEvaluator(knnCollection, 78 | measure,parallel, numberOfThreads, this,n,k,newCollection); 79 | ret = evaluator.evaluate(); 80 | model = new KNNCollectionModel(exampleSet,knnCollection,measure); 81 | modelOutput.deliver(model); 82 | knnCollection = null; 83 | } 84 | 85 | return ret; 86 | } 87 | 88 | @Override 89 | public List getParameterTypes() { 90 | List types = super.getParameterTypes(); 91 | ParameterType type = types.get(1); 92 | types.remove(type); 93 | return types; 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/KNNAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 1) { 98 | if (k >= n) { 99 | this.logWarning("Setting " + KNNAnomalyDetectionOperator.PARAMETER_K + " to #Datapoints-1."); 100 | k = n-1; 101 | //this.setParameter(KNNAnomalyDetectionOperator.PARAMETER_K, (n-1) + ""); 102 | } 103 | readModel(n,k,points,weight,measure); 104 | KNNEvaluator evaluator = new KNNEvaluator(knnCollection, kth, measure, parallel, numberOfThreads, this,n,k,newCollection); 105 | ret = evaluator.evaluate(); 106 | model = new KNNCollectionModel(exampleSet,knnCollection,measure); 107 | modelOutput.deliver(model); 108 | knnCollection = null; 109 | } 110 | return ret; 111 | } 112 | 113 | public DistanceMeasureHelper getMeasureHelper() { 114 | return measureHelper; 115 | } 116 | public void readModel(int n, int k, double[][] points,int[] weight,DistanceMeasure measure) throws OperatorException { 117 | if(modelInput.isConnected()){ 118 | KNNCollectionModel input; 119 | input = modelInput.getData(KNNCollectionModel.class); 120 | knnCollection = input.get(); 121 | newCollection = false; 122 | if(k>knnCollection.getK() || !Arrays.deepEquals(knnCollection.getPoints(),points) ||!measure.getClass().toString().equals(input.measure.getClass().toString()) ){ 123 | if(k>knnCollection.getK()) { 124 | this.logNote("Model at input port can not be used (k too small)."); 125 | } 126 | else { 127 | this.logNote("Model at input port can not be used (Model andExampleSet not matching)."); 128 | } 129 | knnCollection = new KNNCollection(n, k, points, weight); 130 | newCollection = true; 131 | 132 | } 133 | else{ 134 | this.logNote(" Model at input port used for speeding up the operator."); 135 | } 136 | if(k getParameterTypes() { 151 | List types =super.getParameterTypes(); 152 | types 153 | .add(new ParameterTypeInt( 154 | PARAMETER_K, 155 | "This parameter defines the number of neighbours to be considered", 156 | 1, Integer.MAX_VALUE, 10, false)); 157 | ParameterType type = new ParameterTypeBoolean( 158 | PARAMETER_KTH_NEIGHBOR_DISTANCE , 159 | "Sets the anomaly score to the kth-neighbor-distance like the algorithm proposed by Ramaswamy et al (2000) ", 160 | false, false); 161 | 162 | types.add(type); 163 | 164 | types.addAll(DistanceMeasures.getParameterTypes(this)); 165 | 166 | types 167 | .add(new ParameterTypeBoolean( 168 | PARAMETER_PARALLELIZE_EVALUATION_PROCESS, 169 | "Specifies that evaluation process should be performed in parallel", 170 | false, false)); 171 | type = (new ParameterTypeInt(PARAMETER_NUMBER_OF_THREADS, 172 | "Specifies the number of threads for execution.", 1, 173 | Integer.MAX_VALUE, Runtime.getRuntime().availableProcessors(), 174 | false)); 175 | type.registerDependencyCondition(new BooleanParameterCondition(this, 176 | PARAMETER_PARALLELIZE_EVALUATION_PROCESS, true, true)); 177 | types.add(type); 178 | 179 | return types; 180 | 181 | } 182 | 183 | } 184 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/LOCIAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 46 | * The algorithm has the following pros over other approaches. The results are 47 | * not highly affected by the parameters and it provides an automatic 48 | * statistically intuitive cut off to determine the outliers. 49 | *

50 | *

51 | * The computation of the LOCI requires the calculation of MDEF and σMDEF. 52 | * MDEF for a point pi at radius r refers to the deviation of the density of pi 53 | * to that in its average local neighborhood density. σMDEF is the 54 | * normalised standard deviation of the point relative to its local 55 | * neighborhood. 56 | *

57 | *

58 | * The original publication suggests the following flagging scheme the object 59 | * should be flagged as an outlier if MDEF(pi, r, α) > 3 60 | * *σMDEF(pi, r, &alpha). The operator produces an outlier score which 61 | * corresponds to the maximum ratio between MDEF(pi, r, α) and 62 | * &sigmaMDEF(pi, r, &alpha) over all r. The higher the ratio the more outlier 63 | * the object is. The proposed threshold to determine outliers is 3. 64 | *

65 | * 66 | * @author Mennatallah Amer 67 | * 68 | */ 69 | public class LOCIAnomalyDetectionOperator extends 70 | AbstractNearestNeighborBasedAnomalyDetectionOperator { 71 | 72 | /** 73 | * The parameter name for " The minimum number of neighbors in the 74 | * sampling neighborhood. " 75 | **/ 76 | public static String PARAMETER_N_MIN = "n min"; 77 | 78 | /** 79 | * The parameter name for " The ratio of the counting neighborhood 80 | * radius to the sampling neighborhood radius. " 81 | **/ 82 | public static String PARAMETER_ALPHA = "alpha"; 83 | 84 | private DistanceMeasureHelper measureHelper = new DistanceMeasureHelper( 85 | this); 86 | 87 | public LOCIAnomalyDetectionOperator(OperatorDescription description) { 88 | super(description); 89 | getExampleSetInput().addPrecondition( 90 | new DistanceMeasurePrecondition(getExampleSetInput(), this)); 91 | } 92 | 93 | @Override 94 | public double[] doWork(ExampleSet exampleSet, Attributes attributes, 95 | double[][] points, int[] weight) throws OperatorException { 96 | DistanceMeasure measure = measureHelper 97 | .getInitializedMeasure(exampleSet); 98 | double alpha = getParameterAsDouble(PARAMETER_ALPHA); 99 | int nmin = getParameterAsInt(PARAMETER_N_MIN); 100 | int n = points.length; 101 | double[] ret = {1}; 102 | 103 | if (n > 1) { 104 | if (nmin == n) { 105 | this.logWarning("Setting " + PARAMETER_N_MIN + " to #Datapoints-1 because n min can't be equal #Datapoints."); 106 | nmin = n-1; 107 | //this.setParameter(PARAMETER_N_MIN, (n-1)+""); 108 | } 109 | 110 | LOCIEvaluator evaluator = new LOCIEvaluator(measure, alpha, nmin, 111 | points, weight); 112 | ret = evaluator.evaluate(); 113 | } 114 | return ret; 115 | } 116 | 117 | public List getParameterTypes() { 118 | LinkedList types = new LinkedList(); 119 | types 120 | .add(new ParameterTypeDouble( 121 | PARAMETER_ALPHA, 122 | "The ratio of the counting neighborhood radius to the sampling neighborhood radius.", 123 | 0, 1, 0.5)); 124 | types 125 | .add(new ParameterTypeInt( 126 | PARAMETER_N_MIN, 127 | "The minimum number of neighbors in the sampling neighborhood.", 128 | 1, Integer.MAX_VALUE, 20, false)); 129 | 130 | types.addAll(DistanceMeasures.getParameterTypes(this)); 131 | return types; 132 | 133 | } 134 | 135 | } 136 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/LOFAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 40 | * The LOF anomaly detection calculates the anomaly score according to the local 41 | * outlier factor algorithm proposed by Breunig et al[1999;2000]. 42 | *

43 | * 44 | *

45 | * LOF is one of the earliest local density based approaches proposed. There are 46 | * several steps in the calculation of the LOF. The initial step involves 47 | * getting the nearest neighbors set.The definition of the k-distance employed 48 | * is the one proposed in the original paper in order to handle duplicates. The 49 | * definition states that the k-distance(p) has at least k neighbors with 50 | * distinct spatial coordinates that have a distance less than or equal it and 51 | * at most k-1 of such neighbors with distance strictly less than it. The 52 | * reachability distance (reach-dist(p,o)) is the maximum of the distance 53 | * between point p and o and the k-distance(o). The local reachability is the 54 | * inverse of the average reachability distance over the nearest neighborhood 55 | * set. Finally the LOF is calculated as the average of the ratio of the local 56 | * reachability density over the neighborhood set. The values of the LOF 57 | * oscillates with the change in the size of the neighborhood. Thus a range is 58 | * defined for the size of the neighborhood. The maximum LOF over that range is 59 | * taken as the final LOF score. 60 | *

61 | *

62 | * A normal instance has an outlier value of approximately 1, while outliers 63 | * have values greater than 1. 64 | *

65 | * 66 | * @author Mennatallah Amer 67 | * 68 | */ 69 | public class LOFAnomalyDetectionOperator extends KNNAnomalyDetectionOperator { 70 | 71 | public static String PARAMETER_MINIMUM_K = "k_min (MinPtsLB)"; 72 | public static String PARAMETER_MAXIMUM_K = "k_max (MinPtsUB)"; 73 | 74 | public LOFAnomalyDetectionOperator(OperatorDescription description) { 75 | super(description); 76 | } 77 | 78 | @Override 79 | public double[] doWork(ExampleSet exampleSet, Attributes attributes, 80 | double[][] points, int[] weight) throws OperatorException { 81 | DistanceMeasure measure = getMeasureHelper().getInitializedMeasure( 82 | exampleSet); 83 | 84 | int n = points.length; 85 | int minK = getParameterAsInt(PARAMETER_MINIMUM_K); 86 | int maxK = getParameterAsInt(PARAMETER_MAXIMUM_K); 87 | double[] ret = {1}; 88 | 89 | if (n > 1) { 90 | if (maxK >= n) { 91 | this.logWarning("Setting " + PARAMETER_MAXIMUM_K + " to "+ (n-1) + " because there cannot be more neighbors than data points."); 92 | maxK = n-1; 93 | //this.setParameter(PARAMETER_MAXIMUM_K, maxK+""); 94 | } 95 | if (maxK < minK) { 96 | this.logWarning("Setting " + PARAMETER_MINIMUM_K + " to "+ maxK + " to make UpperBound at least as large as LowerBound."); 97 | minK = maxK; 98 | //this.setParameter(PARAMETER_MINIMUM_K, minK+""); 99 | } 100 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS); 101 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS); 102 | 103 | readModel(n,maxK,points,weight,measure); 104 | //KNNCollection knnCollection = new KNNCollection(n, maxK, points, weight); 105 | LOFEvaluator evaluator = new LOFEvaluator(minK, knnCollection, 106 | measure,parallel, numberOfThreads, this, n, maxK , newCollection); 107 | ret = evaluator.evaluate(); 108 | if(newCollection) { 109 | model = new KNNCollectionModel(exampleSet,knnCollection,measure); 110 | } 111 | else { 112 | model = new KNNCollectionModel(exampleSet,modelInput.getData(KNNCollectionModel.class).get(),measure); 113 | } 114 | modelOutput.deliver(model); 115 | knnCollection = null; 116 | 117 | } 118 | return ret; 119 | } 120 | 121 | @Override 122 | public List getParameterTypes() { 123 | List types = super.getParameterTypes(); 124 | types.get(0).setKey(PARAMETER_MINIMUM_K); 125 | types.get(0).setDescription("The lower bound of MinPts"); 126 | types.remove(1); 127 | 128 | types.add(1, new ParameterTypeInt(PARAMETER_MAXIMUM_K, 129 | "The upper bound of the MinPts ", 1, Integer.MAX_VALUE, 20, 130 | false)); 131 | 132 | return types; 133 | } 134 | 135 | } 136 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/LoOPAnomalyDetectionOperator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see 1) { 85 | if (k >= n) { 86 | this.logWarning("Setting " + KNNAnomalyDetectionOperator.PARAMETER_K + " to #Datapoints-1."); 87 | k = n-1; 88 | //this.setParameter(KNNAnomalyDetectionOperator.PARAMETER_K, (n-1)+""); 89 | } 90 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS); 91 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS); 92 | 93 | readModel(n,k,points,weight,measure); 94 | LoOPEvaluator evaluator = new LoOPEvaluator(knnCollection, 95 | measure, lamda,parallel, numberOfThreads, this,n,k,newCollection); 96 | 97 | ret = evaluator.evaluate(); 98 | model = new KNNCollectionModel(exampleSet,knnCollection,measure); 99 | modelOutput.deliver(model); 100 | knnCollection = null; 101 | } 102 | return ret; 103 | } 104 | 105 | public List getParameterTypes() { 106 | List types = super.getParameterTypes(); 107 | types.remove(1); 108 | types 109 | .add( 110 | 1, 111 | new ParameterTypeDouble( 112 | PARAMETER_LAMBDA, 113 | "The normalization factor. The results are weakly affected by this factor. ", 114 | 1, 3, 3, true)); 115 | 116 | return types; 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/Point.java: -------------------------------------------------------------------------------- 1 | package de.dfki.madm.anomalydetection.operator.nearest_neighbor_based; 2 | 3 | 4 | 5 | public class Point implements Comparable { 6 | public int getIndex() { 7 | return index; 8 | } 9 | 10 | double[] point; 11 | int index; 12 | 13 | public Point(int index, double[] point) { 14 | this.index = index; 15 | this.point = point; 16 | } 17 | 18 | @Override 19 | public int compareTo(Point arg0) { 20 | int n = point.length; 21 | for (int i = 0; i < n; i++) 22 | if (point[i] != arg0.point[i]) 23 | if (point[i] < arg0.point[i]) 24 | return -1; 25 | else 26 | return 1; 27 | 28 | return 0; 29 | } 30 | 31 | public boolean equals(Object obj) { 32 | 33 | return compareTo((Point) obj) == 0; 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierCellColorProvider.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see colors; 34 | private int numberOfSpecialAttributes; 35 | private int color; 36 | private OutlierExampleSet exampleSet; 37 | private DataViewerTable dataTable; 38 | 39 | public OutlierCellColorProvider(DataViewerTable dataTable,HashMap colors,int numberOfSpecialAttributes,OutlierExampleSet exampleSet) { 40 | this.colors = colors; 41 | this.numberOfSpecialAttributes = numberOfSpecialAttributes; 42 | this.dataTable = dataTable; 43 | this.exampleSet = exampleSet; 44 | } 45 | 46 | @Override 47 | public Color getCellColor(int row, int column) { 48 | int col = dataTable.convertColumnIndexToModel(column); 49 | if(col <= numberOfSpecialAttributes) { 50 | if(col == 0) { 51 | // first column (index) 52 | if (row % 2 == 0) { 53 | return Color.WHITE; 54 | } else { 55 | return SwingTools.LIGHTEST_BLUE; 56 | } 57 | } 58 | if (row % 2 == 0) { 59 | //i special attributes (like score) 60 | return Color.WHITE; 61 | } else { 62 | return SwingTools.LIGHTEST_YELLOW; 63 | } 64 | } 65 | else { 66 | //i row index 67 | String index = dataTable.getCell(row+1,0); 68 | // attribute name 69 | String attribute_name = dataTable.getCell(0,column); 70 | // index of that attribute needed to fetch the right color value 71 | int tableIndex = exampleSet.getAttributes().get(attribute_name).getTableIndex(); 72 | color = (int)colors.get(tableIndex)[Integer.parseInt(index)-1]; 73 | 74 | /*transform the integer values into actual colors. 75 | * 510 => green (0,255,0) 76 | * 255 => yellow (255,255,0) 77 | * 0 => red (255,0,0) 78 | * ... 79 | */ 80 | if(color == -1){ 81 | return Color.white; 82 | } 83 | else if(color >= 255) { 84 | return new Color((510-color), 255, 0); 85 | } 86 | // second half of colors go from yellow to red (unnormal bins) 87 | else{ 88 | return new Color(255,(color) , 0); 89 | } 90 | } 91 | 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierColorJoin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see eColor = new HashMap(); 69 | for(Attribute att : exampleSetAttributes) { 70 | int[] colors = new int[exampleSet.size()]; 71 | Arrays.fill(colors,-1); 72 | eColor.put(att.getTableIndex(), colors); 73 | } 74 | //for(int x = 0; x< exampleSet.size();x++) { 75 | int x = 0; 76 | outlierExampleSet.remapIds(); 77 | 78 | for(Example e : exampleSet) { 79 | currentId = e.getId(); 80 | int[] i = outlierExampleSet.getExampleIndicesFromId(currentId); 81 | try { 82 | if(i.length > 1 ) { 83 | throw new OperatorException("Ids are not unique."); 84 | } 85 | } 86 | catch (NullPointerException E){ 87 | this.logNote("Data Row with id " +currentId+ " not found in the OutlierExampleSet"); 88 | } 89 | for(Attribute att: exampleSetAttributes) { 90 | for(Attribute outAtt : outlierExampleSetAttributes){ 91 | // find attribute with the same name 92 | if(att.getName() == outAtt.getName()) { 93 | //save color 94 | int[] colors = eColor.get(att.getTableIndex()); 95 | 96 | try { 97 | colors[x] = outlierExampleSet.colors.get(att.getTableIndex())[i[0]]; 98 | } 99 | catch (NullPointerException E ){ 100 | colors[x] = -1; 101 | this.logNote("Attribute "+ att.getName() + " not found in the OutlierExampleSet"); 102 | } 103 | eColor.put(att.getTableIndex(), colors); 104 | } 105 | } 106 | 107 | } 108 | x++; 109 | } 110 | return new OutlierExampleSet(exampleSet,eColor); 111 | 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierDataViewer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see applicableFilterNames = new LinkedList(); 94 | for (String conditionName : ConditionedExampleSet.KNOWN_CONDITION_NAMES) { 95 | try { 96 | ConditionedExampleSet.createCondition(conditionName, exampleSet, null); 97 | applicableFilterNames.add(conditionName); 98 | } catch (ConditionCreationException ex) {} // Do nothing 99 | } 100 | String[] applicableConditions = new String[applicableFilterNames.size()]; 101 | applicableFilterNames.toArray(applicableConditions); 102 | final JComboBox filterSelector = new JComboBox(applicableConditions); 103 | filterSelector.setToolTipText("These filters can be used to skip examples in the view fulfilling the filter condition."); 104 | filterSelector.addItemListener(new ItemListener() { 105 | @Override 106 | public void itemStateChanged(ItemEvent e) { 107 | updateFilter((String)filterSelector.getSelectedItem()); 108 | } 109 | }); 110 | 111 | int maxNumberBeforeFiltering = DEFAULT_MAX_SIZE_FOR_FILTERING; 112 | String maxString = ParameterService.getParameterValue(RapidMinerGUI.PROPERTY_RAPIDMINER_GUI_MAX_STATISTICS_ROWS); 113 | if (maxString != null) { 114 | try { 115 | maxNumberBeforeFiltering = Integer.parseInt(maxString); 116 | } catch (NumberFormatException e) { 117 | // do nothing 118 | } 119 | } 120 | if (exampleSet.size() > maxNumberBeforeFiltering) { 121 | filterSelector.setEnabled(false); 122 | } 123 | toolBar.add(filterSelector, ViewToolBar.RIGHT); 124 | toolBar.setPreferredSize(new Dimension(getWidth(), 29)); 125 | } 126 | 127 | add(toolBar, BorderLayout.NORTH); 128 | JScrollPane tableScrollPane = new ExtendedJScrollPane(dataTable); 129 | tableScrollPane.setBorder(null); 130 | add(tableScrollPane, BorderLayout.CENTER); 131 | setExampleSet(exampleSet); 132 | 133 | /* 134 | * set the right cell color Proiver 135 | */ 136 | final int numberOfSpecialAttributes = exampleSet.getAttributes().specialSize(); 137 | dataTable.setCellColorProvider(new OutlierCellColorProvider(dataTable,exampleSet.colors,numberOfSpecialAttributes, exampleSet)); 138 | } 139 | 140 | public void setExampleSet(ExampleSet exampleSet) { 141 | dataTable.setExampleSet(exampleSet); 142 | } 143 | 144 | private void updateFilter(String conditionName) { 145 | ExampleSet filteredExampleSet = originalExampleSet; 146 | try { 147 | Condition condition = ConditionedExampleSet.createCondition(conditionName, originalExampleSet, null); 148 | filteredExampleSet = new ConditionedExampleSet(originalExampleSet, condition); 149 | } catch (ConditionCreationException ex) { 150 | originalExampleSet.getLog().logError("Cannot create condition '" + conditionName + "' for filtered data view: " + ex.getMessage() + ". Using original data set view..."); 151 | filteredExampleSet = originalExampleSet; 152 | } 153 | updateFilterCounter(filteredExampleSet); 154 | setExampleSet(filteredExampleSet); 155 | } 156 | 157 | private void updateFilterCounter(ExampleSet filteredExampleSet) { 158 | filterCounter.setText("(" + filteredExampleSet.size() + " / " + originalExampleSet.size() + "): "); 159 | } 160 | 161 | @Override 162 | public void prepareReporting() { 163 | dataTable.prepareReporting(); 164 | } 165 | 166 | @Override 167 | public void finishReporting() { 168 | dataTable.finishReporting(); 169 | } 170 | 171 | @Override 172 | public String getColumnName(int columnIndex) { 173 | return dataTable.getColumnName(columnIndex); 174 | } 175 | 176 | @Override 177 | public String getCell(int row, int column) { 178 | return dataTable.getCell(row, column); 179 | } 180 | 181 | @Override 182 | public int getColumnNumber() { 183 | return dataTable.getColumnNumber(); 184 | } 185 | 186 | @Override 187 | public int getRowNumber() { 188 | return dataTable.getRowNumber(); 189 | } 190 | 191 | @Override 192 | public boolean isFirstLineHeader() { return false; } 193 | 194 | @Override 195 | public boolean isFirstColumnHeader() { return false; } 196 | 197 | 198 | 199 | 200 | } 201 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierExampleSet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see colors; 38 | public OutlierExampleSet(ExampleSet exampleSet,HashMap colors){ 39 | this.colors = colors; 40 | this.data = exampleSet; 41 | } 42 | @Override 43 | public String getName() { 44 | return "OutlierExampleSet"; 45 | } 46 | @Override 47 | public Attributes getAttributes() { 48 | return data.getAttributes(); 49 | } 50 | 51 | @Override 52 | public Example getExample(int arg0) { 53 | return data.getExample(arg0); 54 | } 55 | 56 | @Override 57 | public ExampleTable getExampleTable() { 58 | return data.getExampleTable(); 59 | } 60 | 61 | @Override 62 | public int size() { 63 | return data.size(); 64 | } 65 | @Override 66 | public OutlierExampleSet clone() { 67 | OutlierExampleSet result = new OutlierExampleSet((ExampleSet)data.clone(),(HashMap)colors.clone()); 68 | return result; 69 | 70 | } 71 | @Override 72 | public Iterator iterator() { 73 | return data.iterator(); 74 | } 75 | 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierExampleSetDataRenderer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * RapidMiner Anomaly Detection Extension 3 | * 4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer 5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable. 6 | * 7 | * This is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Affero General Public License as published by 9 | * the Free Software Foundation, either version 3 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * You should have received a copy of the GNU Affero General Public License 13 | * along with this software. If not, see