├── README.md
├── build.properties
├── build.xml
├── lib
├── Jama-1.0.3.jar
├── commons-math-2.2.jar
├── copyYourLibsHere
└── encog-mod-3.2.0.jar
├── licenses
├── LICENSE
└── short_license.txt
├── resources
├── ABOUT.NFO
├── META-INF
│ └── icon.png
└── com
│ └── rapidminer
│ └── resources
│ ├── OperatorsAnomalyDetection.xml
│ ├── groupsAnomalyDetection.properties
│ ├── i18n
│ ├── ErrorsAnomalyDetection.properties
│ ├── GUIAnomalyDetection.properties
│ ├── OperatorsDocAnomalyDetection.xml
│ └── UserErrorMessagesAnomalyDetection.properties
│ ├── ioobjectsAnomalyDetection.xml
│ └── parserulesAnomalyDetection.xml
└── src
├── anomalydetection_libsvm
├── COPYRIGHT
├── Kernel.java
├── Svm.java
├── svm_model.java
├── svm_node.java
├── svm_parameter.java
└── svm_problem.java
└── de
└── dfki
└── madm
└── anomalydetection
├── PlugInitAnomalyDetection.java
├── evaluator
├── Evaluator.java
├── cluster_based
│ ├── CBLOFEvaluator.java
│ ├── CMGOSEvaluator.java
│ ├── ClusterOrder.java
│ ├── CovarianceMatrix.java
│ └── LDCOFEvaluator.java
├── evaluation
│ └── ROCEvaluator.java
├── kernel_based
│ ├── AnomalyDetectionLibSVMEvaluator.java
│ └── RBF_Kernel.java
├── nearest_neighbor_based
│ ├── ALOCIEvaluator.java
│ ├── COFEvaluator.java
│ ├── INFLOEvaluator.java
│ ├── KNNCollection.java
│ ├── KNNCollectionModel.java
│ ├── KNNEvaluator.java
│ ├── LOCIEvaluator.java
│ ├── LOFEvaluator.java
│ └── LoOPEvaluator.java
└── statistical_based
│ ├── HistogramBin.java
│ └── HistogramEvaluator.java
└── operator
├── AbstractAnomalyDetectionOperator.java
├── cluster_based
├── AbstractClusteringAnomalyDetectionOperator.java
├── CBLOFAnomalyDetectionOperator.java
├── CMGOSAnomalyDetectionOperator.java
└── LDCOFAnomalyDetectionOperator.java
├── evaluation
├── ROCOperator.java
└── ROCPerformanceVector.java
├── kernel_based
├── AnomalyDetectionLibSVMOperator.java
└── NumberOfSupportVectorsValue.java
├── model_based
├── ALSOOperator.java
└── RNNOperator.java
├── nearest_neighbor_based
├── ALOCIAnomalyDetectionOperator.java
├── AbstractNearestNeighborBasedAnomalyDetectionOperator.java
├── COFAnomalyDetectionOperator.java
├── INFLOAnomalyDetectionOperator.java
├── KNNAnomalyDetectionOperator.java
├── LOCIAnomalyDetectionOperator.java
├── LOFAnomalyDetectionOperator.java
├── LoOPAnomalyDetectionOperator.java
└── Point.java
└── statistical_based
├── HistogramOperator.java
├── OutlierCellColorProvider.java
├── OutlierColorJoin.java
├── OutlierDataViewer.java
├── OutlierExampleSet.java
├── OutlierExampleSetDataRenderer.java
├── OutlierJTable.java
└── RobustPCAOperator.java
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | RapidMiner Anomaly Detection Extension
4 | ======================================
5 |
6 | The Anomaly Detection Extension for RapidMiner comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets. It allows you to find data, which is significantly different from the normal, without the need for the data being labeled.
7 |
8 | Some of the algorithms are:
9 |
10 | * Local Outlier Factor (LOF)
11 | * k-NN Global Anomaly Score
12 | * Connectivity-based Outlier Factor (COF)
13 | * Local Correlation Integral (LOCI)
14 | * Local Outlier Probability (LoOP)
15 | * Cluster-based Local Outlier Factor (CBLOF)
16 |
17 | More information and usage examples can be found on the author's homepage
18 |
19 | Installation
20 | ------------
21 |
22 | * In RapidMiner, go to Help->Updates and Extensions (Marketplace) and search for “anomaly detection” and click on “Install”, or
23 | * Copy the jar file to the “lib/plugins” directory of RapidMiner
24 |
25 | Copyright/ License/ Credits
26 | ---------------------------
27 |
28 | Copyright 2008-2013 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz
29 | Copyright 2008-2019 Markus Goldstein
30 |
31 | This is free software. Licensed under the [GNU AGPL, Version 3](licenses/LICENSE).
32 | There is NO WARRANTY, to the extent permitted by law.
33 |
34 | Authors
35 | -------
36 |
37 | Markus Goldstein
38 | Mennatallah Amer
39 | Johann Gebhardt
40 | Patrick Kalka
41 | Ahmed Elsawy
42 |
43 | This Software is supported by ...
44 | [](http://www.madm.eu/)
45 |
46 |
47 | [](https://www.goldiges.de/)
48 |
--------------------------------------------------------------------------------
/build.properties:
--------------------------------------------------------------------------------
1 | extension.version=2
2 | extension.revision=4
3 | extension.update=001
4 |
--------------------------------------------------------------------------------
/build.xml:
--------------------------------------------------------------------------------
1 |
2 | Build file for the RapidMiner Anomaly Detection extension
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/lib/Jama-1.0.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/Jama-1.0.3.jar
--------------------------------------------------------------------------------
/lib/commons-math-2.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/commons-math-2.2.jar
--------------------------------------------------------------------------------
/lib/copyYourLibsHere:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/copyYourLibsHere
--------------------------------------------------------------------------------
/lib/encog-mod-3.2.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/lib/encog-mod-3.2.0.jar
--------------------------------------------------------------------------------
/licenses/short_license.txt:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2009 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 |
--------------------------------------------------------------------------------
/resources/ABOUT.NFO:
--------------------------------------------------------------------------------
1 | The Anomaly Detection Extension comprises the most well know unsupervised anomaly detection algorithms, assigning individual anomaly scores to data rows of example sets.
2 |
--------------------------------------------------------------------------------
/resources/META-INF/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/META-INF/icon.png
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | table_new.png
6 |
7 |
8 |
9 | k-NN Global Anomaly Score
10 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.KNNAnomalyDetectionOperator
11 |
12 |
13 |
14 |
15 | Local Outlier Factor (LOF)
16 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LOFAnomalyDetectionOperator
17 |
18 |
19 |
20 | Connectivity-Based Outlier Factor (COF)
21 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.COFAnomalyDetectionOperator
22 |
23 |
24 |
25 |
26 | Local Correlation Integeral (LOCI)
27 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LOCIAnomalyDetectionOperator
28 |
29 |
30 |
31 |
32 | approximate Local Correlation Integral (aLOCI)
33 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.ALOCIAnomalyDetectionOperator
34 |
35 |
36 |
37 |
38 | Local Outlier Probablity (LoOP)
39 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.LoOPAnomalyDetectionOperator
40 |
41 |
42 |
43 |
44 | Influenced Outlierness (INFLO)
45 | de.dfki.madm.anomalydetection.operator.nearest_neighbor_based.INFLOAnomalyDetectionOperator
46 |
47 |
48 |
49 |
50 |
51 |
52 | Cluster-Based Local Outlier Factor (CBLOF)
53 | de.dfki.madm.anomalydetection.operator.cluster_based.CBLOFAnomalyDetectionOperator
54 |
55 |
56 |
57 |
58 | Local Density Cluster-Based Outlier Factor (LDCOF)
59 | de.dfki.madm.anomalydetection.operator.cluster_based.LDCOFAnomalyDetectionOperator
60 |
61 |
62 |
63 |
64 | Clustering-based Multivariate Gaussian Outlier Score (CMGOS)
65 | de.dfki.madm.anomalydetection.operator.cluster_based.CMGOSAnomalyDetectionOperator
66 |
67 |
68 |
69 |
70 |
71 |
72 | Histogram-based Outlier Score (HBOS)
73 | de.dfki.madm.anomalydetection.operator.statistical_based.HistogramOperator
74 |
75 |
76 |
77 | Color Coded Join
78 | de.dfki.madm.anomalydetection.operator.statistical_based.OutlierColorJoin
79 |
80 |
81 |
82 | Robust Principal Component Analysis Anomaly Score (rPCA)
83 | de.dfki.madm.anomalydetection.operator.statistical_based.RobustPCAOperator
84 |
85 |
86 |
87 |
88 |
89 | One-Class LIBSVM Anomaly Score
90 | de.dfki.madm.anomalydetection.operator.kernel_based.AnomalyDetectionLibSVMOperator
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 | Attribute-wise Learning (ALSO)
99 | de.dfki.madm.anomalydetection.operator.model_based.ALSOOperator
100 |
101 |
102 | Replicator Neural Network (RNN)
103 | de.dfki.madm.anomalydetection.operator.model_based.RNNOperator
104 |
105 |
106 |
107 |
108 |
109 | Generate ROC
110 | de.dfki.madm.anomalydetection.operator.evaluation.ROCOperator
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/groupsAnomalyDetection.properties:
--------------------------------------------------------------------------------
1 | # red
2 | group.anomaly_detection.color = #f1d9e8
3 |
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/i18n/ErrorsAnomalyDetection.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/com/rapidminer/resources/i18n/ErrorsAnomalyDetection.properties
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/i18n/GUIAnomalyDetection.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Markus-Go/rapidminer-anomalydetection/1dec0f604ef04ae044659ceee9120aa4a53e7ae0/resources/com/rapidminer/resources/i18n/GUIAnomalyDetection.properties
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/i18n/UserErrorMessagesAnomalyDetection.properties:
--------------------------------------------------------------------------------
1 | error.1001.name = Error in sub process
2 | error.1001.short = Error in sub process
3 | error.1001.long = An error has occurred in the subprocess for learning the models.
4 |
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/ioobjectsAnomalyDetection.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 | de.dfki.madm.anomalydetection.operator.statistical_based.OutlierExampleSetDataRenderer
8 | com.rapidminer.gui.renderer.data.ExampleSetDataRenderer
9 | com.rapidminer.gui.renderer.data.ExampleSetMetaDataRenderer
10 | com.rapidminer.gui.renderer.data.ExampleSetPlotRenderer
11 | com.rapidminer.gui.new_plotter.integration.ExpertDataTableRenderer
12 | com.rapidminer.gui.renderer.AnnotationsRenderer
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/resources/com/rapidminer/resources/parserulesAnomalyDetection.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Copyright (c) 2000-2005 Chih-Chung Chang and Chih-Jen Lin
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions
6 | are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright
12 | notice, this list of conditions and the following disclaimer in the
13 | documentation and/or other materials provided with the distribution.
14 |
15 | 3. Neither name of copyright holders nor the names of its contributors
16 | may be used to endorse or promote products derived from this software
17 | without specific prior written permission.
18 |
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/Kernel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 | package anomalydetection_libsvm;
24 |
25 | public abstract class Kernel extends QMatrix {
26 | private svm_node[][] x;
27 | private final double[] x_square;
28 |
29 | // svm_parameter
30 | private final int kernel_type;
31 | private final int degree;
32 | private final double gamma;
33 | private final double coef0;
34 |
35 | @Override
36 | abstract float[] get_Q(int column, int len);
37 | @Override
38 | abstract float[] get_QD();
39 |
40 | @Override
41 | void swap_index(int i, int j)
42 | {
43 | do {svm_node[] _=x[i]; x[i]=x[j]; x[j]=_;} while(false);
44 | if(x_square != null) do {double _=x_square[i]; x_square[i]=x_square[j]; x_square[j]=_;} while(false);
45 | }
46 |
47 | private static double powi(double base, int times) {
48 | double tmp = base, ret = 1.0;
49 |
50 | for(int t=times; t>0; t/=2)
51 | {
52 | if(t%2!=0) ret*=tmp;
53 | tmp = tmp * tmp;
54 | }
55 | return ret;
56 | }
57 |
58 | private static double tanh(double x) {
59 | double e = Math.exp(x);
60 | return 1.0-2.0/(e*e+1);
61 | }
62 |
63 | public double kernel_function(int i, int j) {
64 | switch(kernel_type) {
65 | case svm_parameter.LINEAR:
66 | return dot(x[i],x[j]);
67 | case svm_parameter.POLY:
68 | return powi(gamma*dot(x[i],x[j])+coef0,degree);
69 | case svm_parameter.RBF:
70 | return Math.exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
71 | case svm_parameter.SIGMOID:
72 | return tanh(gamma*dot(x[i],x[j])+coef0);
73 | case svm_parameter.PRECOMPUTED:
74 | return x[i][(int)(x[j][0].value)].value;
75 | default:
76 | return 0; // java
77 | }
78 | }
79 |
80 | Kernel(int l, svm_node[][] x_, svm_parameter param) {
81 | this.kernel_type = param.kernel_type;
82 | this.degree = param.degree;
83 | this.gamma = param.gamma;
84 | this.coef0 = param.coef0;
85 |
86 | x = x_.clone();
87 |
88 | if(kernel_type == svm_parameter.RBF)
89 | {
90 | x_square = new double[l];
91 | for(int i=0;i y[j].index)
111 | ++j;
112 | else
113 | ++i;
114 | }
115 | }
116 | return sum;
117 | }
118 |
119 | static double k_function(svm_node[] x, svm_node[] y,
120 | svm_parameter param)
121 | {
122 | switch(param.kernel_type)
123 | {
124 | case svm_parameter.LINEAR:
125 | return dot(x,y);
126 | case svm_parameter.POLY:
127 | return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
128 | case svm_parameter.RBF:
129 | {
130 | double sum = 0;
131 | int xlen = x.length;
132 | int ylen = y.length;
133 | int i = 0;
134 | int j = 0;
135 | while(i < xlen && j < ylen)
136 | {
137 | if(x[i].index == y[j].index)
138 | {
139 | double d = x[i++].value - y[j++].value;
140 | sum += d*d;
141 | }
142 | else if(x[i].index > y[j].index)
143 | {
144 | sum += y[j].value * y[j].value;
145 | ++j;
146 | }
147 | else
148 | {
149 | sum += x[i].value * x[i].value;
150 | ++i;
151 | }
152 | }
153 |
154 | while(i < xlen)
155 | {
156 | sum += x[i].value * x[i].value;
157 | ++i;
158 | }
159 |
160 | while(j < ylen)
161 | {
162 | sum += y[j].value * y[j].value;
163 | ++j;
164 | }
165 |
166 | return Math.exp(-param.gamma*sum);
167 | }
168 | case svm_parameter.SIGMOID:
169 | return tanh(param.gamma*dot(x,y)+param.coef0);
170 | case svm_parameter.PRECOMPUTED:
171 | return x[(int)(y[0].value)].value;
172 | default:
173 | return 0; // java
174 | }
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/svm_model.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 | package anomalydetection_libsvm;
24 |
25 | public class svm_model implements java.io.Serializable {
26 |
27 | private static final long serialVersionUID = 7974831813044169852L;
28 |
29 | public svm_parameter param; // parameter
30 | public int nr_class; // number of classes, = 2 in regression/one class svm
31 | public int l; // total #SV
32 | public int nBSV; // Bound SV(only correct in case of one-class SVM)
33 | public int []nBSVIndicies; // Indicies of bound support vectors relative to the original exampleset
34 | public svm_node[][] SV; // SVs (SV[l])
35 | public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
36 | public double[] rho; // constants in decision functions (rho[k*(k-1)/2])
37 | public double[] probA; // pariwise probability information
38 | public double[] probB;
39 |
40 | // for classification only
41 |
42 | public int[] label; // label of each class (label[k])
43 | public int[] nSV; // number of SVs for each class (nSV[k])
44 | // nSV[0] + nSV[1] + ... + nSV[k-1] = l
45 | public double max_confidence; // confidence used for anomaly detection score
46 |
47 | public double[] labelValues; // actual label values for all support vectors (only used for displaying)
48 | }
49 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/svm_node.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 | package anomalydetection_libsvm;
24 |
25 | public class svm_node implements java.io.Serializable {
26 |
27 | private static final long serialVersionUID = -3046511301730620312L;
28 |
29 | public int index;
30 | public double value;
31 | }
32 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/svm_parameter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 | package anomalydetection_libsvm;
24 |
25 | public class svm_parameter implements Cloneable,java.io.Serializable {
26 |
27 | private static final long serialVersionUID = -2733609912517132812L;
28 |
29 | /* svm_type */
30 | public static final int C_SVC = 0;
31 | public static final int NU_SVC = 1;
32 | public static final int ONE_CLASS = 2;
33 | public static final int EPSILON_SVR = 3;
34 | public static final int NU_SVR = 4;
35 | public static final int ROBUST_ONE_CLASS = 5;
36 | public static final int ETA_ONE_CLASS = 6;
37 |
38 |
39 | /* kernel_type */
40 | public static final int LINEAR = 0;
41 | public static final int POLY = 1;
42 | public static final int RBF = 2;
43 | public static final int SIGMOID = 3;
44 | public static final int PRECOMPUTED = 4;
45 |
46 | public int svm_type;
47 | public int kernel_type;
48 | public int degree; // for poly
49 | public double gamma; // for poly/rbf/sigmoid
50 | public double coef0; // for poly/sigmoid
51 |
52 | // these are for training only
53 | public double cache_size; // in MB
54 | public double eps; // stopping criteria
55 | public double C; // for C_SVC, EPSILON_SVR and NU_SVR
56 | public int nr_weight; // for C_SVC
57 | public int[] weight_label; // for C_SVC
58 | public double[] weight; // for C_SVC
59 | public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR
60 | public double p; // for EPSILON_SVR
61 | public int shrinking; // use the shrinking heuristics
62 | public int probability; // do probability estimates
63 | public double lambda; // lambda
64 |
65 | @Override
66 | public Object clone() {
67 | try
68 | {
69 | return super.clone();
70 | } catch (CloneNotSupportedException e)
71 | {
72 | return null;
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/anomalydetection_libsvm/svm_problem.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner
3 | *
4 | * Copyright (C) 2001-2013 by Rapid-I and the contributors
5 | *
6 | * Complete list of developers available at our web site:
7 | *
8 | * http://rapid-i.com
9 | *
10 | * This program is free software: you can redistribute it and/or modify
11 | * it under the terms of the GNU Affero General Public License as published by
12 | * the Free Software Foundation, either version 3 of the License, or
13 | * (at your option) any later version.
14 | *
15 | * This program is distributed in the hope that it will be useful,
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | * GNU Affero General Public License for more details.
19 | *
20 | * You should have received a copy of the GNU Affero General Public License
21 | * along with this program. If not, see http://www.gnu.org/licenses/.
22 | */
23 | package anomalydetection_libsvm;
24 |
25 | public class svm_problem implements java.io.Serializable {
26 |
27 | private static final long serialVersionUID = -4451389443706847272L;
28 |
29 | public int l;
30 | public double[] y;
31 | public svm_node[][] x;
32 | }
33 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/PlugInitAnomalyDetection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see = sizeNormal)
90 | numberOfLargeClusters = i;
91 | else {
92 | if (i != numberOfClusters - 1) {
93 | if (clusterOrders[i].getClusterSize() / clusterOrders[i+1].getClusterSize() >= beta)
94 | numberOfLargeClusters = i;
95 | }
96 | }
97 | }
98 | else break;
99 | }
100 | for (int i=0; i< numberOfClusters; i++)
101 | result[clusterOrders[i].getClusterIndex()]= i<=numberOfLargeClusters;
102 |
103 | return result;
104 | }
105 |
106 | /**
107 | * The method the computes CBLOF
108 | *
109 | * @param weighting
110 | *
111 | *
112 | * @return The array containing the cblof scores.
113 | *
114 | */
115 | public double[] evaluate() {
116 | int n = points.length;
117 |
118 | double[] cblof = new double[n];
119 |
120 | int numberOfClusters= centroids.length;
121 |
122 | // calculates cblof
123 | for (int i = 0; i < n; i++) {
124 | int clusterIndex = belongsToCluster[i];
125 | if (largeCluster[clusterIndex]) {
126 | // It is a large cluster
127 | cblof[i] = measure.calculateDistance(centroids[clusterIndex],
128 | points[i]);
129 | if (weighting)
130 | cblof[i] *= clusterSize[clusterIndex];
131 | } else {
132 | // It is a small cluster
133 |
134 | double MinDistance = Double.MAX_VALUE;
135 |
136 | // search for the nearest large cluster
137 | for (int j = 0; j {
33 | private int clusterIndex;
34 | private int clusterSize;
35 | public ClusterOrder(int index, int size) {
36 | clusterIndex= index;
37 | clusterSize= size;
38 | }
39 | @Override
40 | public int compareTo(ClusterOrder o) {
41 |
42 | return o.clusterSize- clusterSize;
43 | }
44 | public int getClusterIndex() {
45 | return clusterIndex;
46 | }
47 | public int getClusterSize() {
48 | return clusterSize;
49 | }
50 |
51 | public static ClusterOrder[] getOrderedClusters(int [] clusterSize){
52 | int numberOfClusters= clusterSize.length;
53 | ClusterOrder[]clusterOrders= new ClusterOrder[numberOfClusters];
54 | for (int i=0; i< numberOfClusters; i++){
55 | clusterOrders[i]= new ClusterOrder(i, clusterSize[i]);
56 | }
57 | Arrays.sort(clusterOrders);
58 | return clusterOrders;
59 | }
60 |
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/cluster_based/CovarianceMatrix.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2012 by Deutsches Forschungszentrum fuer Kuenstliche
5 | * Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify it under the
8 | * terms of the GNU Affero General Public License as published by the Free
9 | * Software Foundation, either version 3 of the License, or (at your option) any
10 | * later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see data, int numberOfThreads) {
52 | this.data = new double[data.size()][];
53 | int index = 0;
54 | for (double[] ar : data) {
55 | this.data[index] = ar;
56 | index++;
57 | }
58 | this.numberOfThreads = numberOfThreads;
59 | this.doWork();
60 | }
61 |
62 | public CovarianceMatrix(double[][] data, int numberOfThreads) {
63 | this.data = data;
64 | this.numberOfThreads = numberOfThreads;
65 | this.doWork();
66 | }
67 |
68 | private void calcCovMat() {
69 | Thread[] temp = new Thread[this.numberOfThreads];
70 | int count = 0;
71 | for (int i = 0; i < dim; i++) {
72 | for (int j = 0; j < dim; j++) {
73 | temp[count]= new worker(CovMat, i, j, data);
74 | temp[count].start();
75 | count++;
76 | if (count == this.numberOfThreads) {
77 | count = 0;
78 | for (int j1 = 0; j1 < this.numberOfThreads; j1++) {
79 | try {
80 | temp[j1].join();
81 | } catch (InterruptedException e) {
82 | e.printStackTrace();
83 | }
84 | }
85 | }
86 | }
87 | }
88 | }
89 | }
90 |
91 | class worker extends Thread {
92 | double[][] CovMat;
93 | double[][] points;
94 | private int j;
95 | private int k;
96 |
97 | public worker(double[][] CovMat, int j, int k, double[][] points) {
98 | this.CovMat = CovMat;
99 | this.points = points;
100 | this.j = j;
101 | this.k = k;
102 | }
103 |
104 | @Override
105 | public void run() {
106 | double ret = 0;
107 |
108 | for (int i = 0; i < points.length; i++) {
109 | ret += (points[i][j] * points[i][k]);
110 | }
111 |
112 | ret = (1.0 / (points.length - 1)) * ret;
113 |
114 | synchronized (CovMat) {
115 | CovMat[j][k] = ret;
116 | }
117 | }
118 | }
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/cluster_based/LDCOFEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see = minimumClusterSize;
108 | }
109 | return result;
110 |
111 | }
112 |
113 | public double[] evaluate() {
114 | int n = points.length;
115 | int numberOfClusters = centroids.length;
116 | double[] result = new double[n];
117 | double[] distances = new double[n];
118 | int[] belongsToLargeCluster = new int[n];
119 | double[] summationDistances = new double[numberOfClusters];
120 | for (int i = 0; i < n; i++) {
121 | int clusterIndex = belongsToCluster[i];
122 | if (largeCluster[clusterIndex]) {
123 | // It is a large cluster
124 | distances[i] = measure.calculateDistance(
125 | centroids[clusterIndex], points[i]);
126 | summationDistances[clusterIndex] += distances[i];
127 | } else {
128 | // It is a small cluster
129 | double MinDistance = Double.MAX_VALUE;
130 |
131 | // search for the nearest large cluster
132 | for (int j = 0; j < numberOfClusters; j++) {
133 | if (!largeCluster[j])
134 | continue;
135 | double temp = measure.calculateDistance(centroids[j],
136 | points[i]);
137 | if (temp < MinDistance) {
138 | MinDistance = temp;
139 | clusterIndex = j;
140 | }
141 | }
142 |
143 | distances[i] = MinDistance;
144 |
145 | }
146 |
147 | belongsToLargeCluster[i] = clusterIndex;
148 | }
149 |
150 | for (int i = 0; i < numberOfClusters; i++)
151 | summationDistances[i] /= clusterSize[i];
152 |
153 | for (int i = 0; i < n; i++) {
154 | if(summationDistances[belongsToLargeCluster[i]]== 0.0) {
155 | result[i] = 0;
156 | }
157 | else {
158 | result[i] = distances[i]
159 | / summationDistances[belongsToLargeCluster[i]];
160 | }
161 |
162 | }
163 |
164 | return result;
165 | }
166 |
167 | }
168 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/evaluation/ROCEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer Kuenstliche
5 | * Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify it under the
8 | * terms of the GNU Affero General Public License as published by the Free
9 | * Software Foundation, either version 3 of the License, or (at your option) any
10 | * later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see {
30 | private int index;
31 | private double outlierScore;
32 |
33 | public OutlierPair(int i, double o) {
34 | index = i;
35 | outlierScore = o;
36 | }
37 |
38 | @Override
39 | public int compareTo(OutlierPair arg0) {
40 | if (outlierScore > arg0.outlierScore)
41 | return -1;
42 | if (outlierScore < arg0.outlierScore)
43 | return 1;
44 | return 0;
45 | }
46 |
47 | @Override
48 | public String toString() {
49 | return this.outlierScore + " " + this.index;
50 | }
51 | }
52 |
53 | public double auc;
54 | public ArrayList out = new ArrayList();
55 | private String normal = "";
56 |
57 | public String getNormalClass() {
58 | return this.normal;
59 | }
60 | public Object[][] pre = null; //prediction / recall
61 | /**
62 | * The returned array has 2 columns denoting: false positive rate,true positive rate
63 | * precision/recall will be stored in pre. (true positive rate==recall)
64 | */
65 | public Object[][] evaluate(String outlierString, Object[] labels, double[] res) throws OperatorException {
66 | int size = res.length;
67 | Object[][] result;
68 |
69 | LinkedList rocPoints = new LinkedList();
70 |
71 | int count = 0;
72 | int anz_outlier = 0;
73 |
74 | int positive = 0;
75 | int negative = 0;
76 | int truePositive = 0;
77 | int falsePositive = 0;
78 | OutlierPair[] outliers = new OutlierPair[size];
79 | for (int j = 0; j < size; j++) {
80 | if (labels[j].toString().equals(outlierString)) {
81 | anz_outlier++;
82 | }
83 | outliers[j] = new OutlierPair(j, res[j]);
84 | }
85 | Arrays.sort(outliers);
86 | double Area = 0;
87 | double[] last = new double[] { 0, 0 };
88 | for (int j = 0; j < size; j++) {
89 |
90 | int x = outliers[j].index;
91 |
92 | if (count < anz_outlier) {
93 | this.out.add(outliers[j].index);
94 | count++;
95 | }
96 |
97 | if (labels[x].toString().equals(outlierString)) {
98 | truePositive++;
99 | positive++;
100 | } else {
101 | if (this.normal.equals("")) {
102 | this.normal = labels[x].toString();
103 | }
104 | else {
105 | if (!this.normal.equals(labels[x].toString()) && !outlierString.equals("")) {
106 | throw new OperatorException("There should be only two labels (normal & outlier). Currently found :" + outlierString + ", " + this.normal + " and " + labels[x].toString());
107 | }
108 | }
109 | falsePositive++;
110 | negative++;
111 |
112 | }
113 | if (j != size - 1 && outliers[j].outlierScore == outliers[j + 1].outlierScore)
114 | continue;
115 | Area += last[1] * ((double)falsePositive - last[0]) + (double)0.5 * ((double)falsePositive - last[0]) * ((double)truePositive - last[1]);
116 | rocPoints.add(new double[] { falsePositive, truePositive , truePositive*1.0/(truePositive+falsePositive), outliers[j].outlierScore});
117 | last[0] = falsePositive;
118 | last[1] = truePositive;
119 |
120 | }
121 | if (positive == 0) {
122 | throw new OperatorException("'" + outlierString + "' not found in the labels");
123 | }
124 | if (negative == 0) {
125 | throw new OperatorException("All the records are '" + outlierString + "'");
126 | }
127 | double totalArea = (double)positive * (double)negative;
128 |
129 | auc = Area / totalArea;
130 | result = new Object[rocPoints.size()][2];
131 | int i = 0;
132 | pre = new Object[rocPoints.size()][2];
133 | for (double[] r : rocPoints) {
134 | result[i][0] = r[0] / negative;
135 | result[i++][1] = r[1] / positive;
136 | }
137 | i=0;
138 | for(double[] r : rocPoints) {
139 | pre[i][0] = r[2]; // precision = tp /(tp+fp)
140 | pre[i++][1] = r[1] / positive; //recall = tp(so far) / all outlier
141 | }
142 |
143 | return result;
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/kernel_based/AnomalyDetectionLibSVMEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see maxConfidence)
112 | maxConfidence = prob[0];
113 | }
114 | double[] result = new double[testSet.length];
115 | maxConfidence = Math.abs(maxConfidence);
116 | model.max_confidence = maxConfidence;
117 |
118 | for (int i = 0; i < testSet.length; i++) {
119 | Svm.svm_predict_values(model, trainingSet[i], prob);
120 | result[i] = (maxConfidence - prob[0]) / Math.abs(maxConfidence);
121 |
122 | }
123 | return result;
124 | }
125 |
126 | @Override
127 | public double[] evaluate() {
128 | int l = values.length;
129 | double[] results;
130 | svm_problem problem;
131 | double[] labels;
132 | if (params.kernel_type == svm_parameter.RBF && automatic_gamma_learning) {
133 | params.gamma = RBF_Kernel.learnGamma(values);
134 | }
135 |
136 | labels = new double[l];
137 | problem = new svm_problem();
138 | problem.l = l;
139 | problem.x = values;
140 | problem.y = labels;
141 | model = Svm.svm_train(problem, params);
142 | results = computeAnomalyScore(model, values, values);
143 | return results;
144 | }
145 |
146 | public svm_model getModel() {
147 | return model;
148 | }
149 |
150 | }
151 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/kernel_based/RBF_Kernel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see y[j].index) {
80 | sum += y[j].value * y[j].value;
81 | ++j;
82 | } else {
83 | sum += x[i].value * x[i].value;
84 | ++i;
85 | }
86 | }
87 |
88 | while (i < xlen) {
89 | sum += x[i].value * x[i].value;
90 | ++i;
91 | }
92 |
93 | while (j < ylen) {
94 | sum += y[j].value * y[j].value;
95 | ++j;
96 | }
97 |
98 | double k_value = Math.exp(-sum/(alpha*alpha));
99 | return new double[]{k_value, sum*k_value/(alpha*alpha*alpha)};
100 | }
101 |
102 | public static double learnGamma(svm_node[][] values){
103 | return learnGamma(values, estimateInitialAlpha(values));
104 | }
105 |
106 | /**
107 | * Perform gradient ascent to maximize J= s^2 / (k_avg +eps)
108 | * where s^2= sum_i_0_{l-1}(sum_j_{i+1}_{l-1}((k(i,j)-k_avg)^2))/(number-1)
109 | * k_avg = sum_i_0_{l-1}(sum_j_{i+1}_{l-1}(k(i,j))/ number
110 | * dJ/d_alpha = sum_i_0_{l-1}(sum_j_{i+1}_{l-1}(
111 | * 2 * (k(i,j) - k_avg) * (k(i,j)' - k_avg') * (k_avg + eps)
112 | * -
113 | * k_avg' * (k(i,j) - k_avg) ^ 2
114 | * )/
115 | * (number-1) * (k_avg + eps) ^ 2
116 | * @param values input values
117 | * @param eps
118 | * @return
119 | */
120 | public static double learnGamma(svm_node[][] values, double initialAlpha){
121 | int l= values.length; // size of dataset
122 | int number= l*(l-1)/2; // number of non-diagonal kernel elements
123 | double alpha = initialAlpha; // standard deviation of gaussian
124 | double learning_rate= initialAlpha*initialAlpha; // learning rate of gradient ascent
125 | double eps_conv = Math.pow(10,-3); // to test for convergence
126 | double eps = Math.pow(10,-12);
127 | double lastValue = 0;
128 | for(int f=0; f<100 ; f++){
129 | double [] k;
130 | double k_avg = 0; // Average of non-diagonal entries in
131 | double k_d_avg = 0; // differential of k_avg relative to alpha
132 | for(int i=0; i< l ; i++) {
133 | for(int j= i+1; j< l ; j++) {
134 | k = RBF_Kernel.k_function(values[i], values[j], alpha);
135 | k_avg += k[0];
136 | k_d_avg += k[1];
137 | }
138 | }
139 |
140 | k_avg /= number;
141 | k_d_avg /= number;
142 |
143 | double diff = 0.0; // gradient of maximization objective
144 | double s_2=0.0; // variance of non-diagonal
145 | for(int i=0; i < l; i++) {
146 | for(int j = i+1; j< l ; j++) {
147 | k = RBF_Kernel.k_function(values[i], values[j], alpha);
148 | s_2+=(k[0]-k_avg)*(k[0]-k_avg);
149 | diff+=2*(k[0]-k_avg)*(k[1]-k_d_avg)*(k_avg+eps)-(k[0]-k_avg)*(k[0]-k_avg)* k_d_avg;
150 | }
151 | }
152 | diff/= (number-1)*(k_avg+ eps)*(k_avg+eps);
153 | s_2/= (number-1);
154 |
155 |
156 | double temp = learning_rate* diff;
157 | if(f==0){
158 | while(alpha+ temp <0)
159 | {
160 | learning_rate/=3;
161 | temp/=3;
162 | }
163 | }
164 | else {
165 | // reduce the learning rate because it is too large.
166 | while(temp* lastValue < 0){
167 | learning_rate/=3;
168 | temp = learning_rate * diff;
169 | }
170 | while(alpha+ temp <0)
171 | {
172 | learning_rate/=3;
173 | temp/=3;
174 | }
175 | }
176 |
177 | lastValue = temp;
178 | alpha += temp;
179 |
180 | if(debug) {
181 | System.out.println("maximized value at iteration "+ f+" "+ s_2/(k_avg+eps));
182 | System.out.println("After iteration "+f+ " alpha = "+ alpha+ " gamma ="+ (1.0/(alpha*alpha)));
183 | }
184 |
185 | if(Math.abs(temp) < eps_conv){
186 | break;
187 | }
188 |
189 | }
190 | if(debug)
191 | System.out.println("Returned Gamma "+ (1.0/(alpha*alpha)));
192 | return 1.0/(alpha*alpha);
193 | }
194 |
195 | public static double computeOptimizationObjective(svm_node[][] values, double eps, double gamma){
196 | double alpha = Math.sqrt(1.0/gamma);
197 | double k_avg=0.0;
198 | double s2=0.0;
199 | int l = values.length;
200 | int number = l*(l-1)/2;
201 | double [] k;
202 | for(int i=0; i< l ; i++) {
203 | for(int j=i+1; j < l; j++) {
204 | k = k_function(values[i], values[j], alpha);
205 | k_avg+=k[0];
206 | }
207 | }
208 | k_avg/=number;
209 | for(int i=0; i < l; i++) {
210 | for(int j = i + 1; j [] kdist = getKnnCollection().getKdistNeighbors();
64 | DistanceMeasure measure = getMeasure();
65 |
66 | // The array that will contain the average chaining distance
67 | double[] acDist = new double[n];
68 | double[] cof = new double[n];
69 |
70 | // attributes used for intermediate calculations
71 |
72 | // tempDistances[x] contains the minimum distance to connect the set
73 | // already connected to the element with index indicies[x]
74 | double[] tempDistances = new double[n];
75 | int[] indicies = new int[n];
76 |
77 | int j;
78 | int size;
79 |
80 | // calculating average chaining distance
81 | // The average chaining distance has the following formula
82 | // (summation from i=1 to cardinality of 2*(cardinality-i+1) * ei
83 | // )/(cardinality *(cardinality-1))
84 | for (int i = 0; i < n; i++) {
85 |
86 | int cardinality = weight[i] - 1;
87 |
88 | size = k + kdist[i].size();
89 |
90 | int minIndex = 0;
91 |
92 | for (j = 0; j < k; j++) {
93 | tempDistances[j] = neighborDistances[i][j];
94 | indicies[j] = neighborIndicies[i][j];
95 | cardinality += weight[neighborIndicies[i][j]];
96 | }
97 |
98 | for (int x : kdist[i]) {
99 | tempDistances[j] = neighborDistances[i][k - 1];
100 | indicies[j] = x;
101 | cardinality += weight[x];
102 | j++;
103 | }
104 |
105 |
106 |
107 | double summation = 0;
108 | // weighSofar represents (cardinality -i+1) in the above formula
109 | int weightSofar = cardinality - weight[i] + 1;
110 |
111 | double denominator = cardinality * (cardinality + 1);
112 |
113 | for (int l = 0; l < size; l++) {
114 | // in case we have X duplicates of the same point then we will have
115 | // the weight of the current edge equal to 2*(weightSofar + (weightSofar-1)+....+
116 | // (weightSofar-X+1)) which is equal to the summation of i from
117 | // i= weightSoFar-X+1 to weightSofar which is equal (
118 | // weightSOFar*(weightSoFar+1) -
119 | // (weighSoFar-X)*(weighSofar-X+1)) let t1
120 | // =weightSOfar*(weightSofar+1) and t2 =
121 | // (weighSofar-X)*(weighSofar-X+1) then the weight of the
122 | // current edge should be equal to t1-t2
123 |
124 | // currentweight = t1
125 | int currentweight = weightSofar * (weightSofar + 1);
126 |
127 | // weighSofar = weightSofar -X
128 | weightSofar -= weight[indicies[minIndex]];
129 |
130 | // currentweight= currentweight- t2
131 | currentweight -= weightSofar * (weightSofar + 1);
132 |
133 | summation += currentweight * tempDistances[minIndex];
134 |
135 | // the index of the point just added to the set
136 | int currentIndex = indicies[minIndex];
137 |
138 | // an index of -1 indicates that the point was already reached
139 | // and thus shouldn't need to be reached again
140 | indicies[minIndex] = -1;
141 |
142 | // This contains the index of the point that is the nearest
143 | // neighbor of the set from the set indicies[0..j]
144 | minIndex = -1;
145 |
146 | for (j = 0; j < size; j++) {
147 | if (indicies[j] == -1)
148 | continue;
149 |
150 | double temp = measure.calculateDistance(
151 | points[currentIndex], points[indicies[j]]);
152 | if (temp < tempDistances[j])
153 | tempDistances[j] = temp;
154 |
155 | if (minIndex == -1
156 | || tempDistances[minIndex] > tempDistances[j]
157 | || (tempDistances[minIndex] == tempDistances[j] && indicies[j] < indicies[minIndex])) {
158 | // assigns the nearest neighbor if non exists or if
159 | // point i is nearer than the current nearest neighbor,
160 | // in case they have the same distance ties are broken
161 | // by taking the earlier index
162 |
163 | minIndex = j;
164 | }
165 |
166 | }
167 |
168 | }
169 | acDist[i] = summation / denominator;
170 |
171 | }
172 |
173 | // calculating cof
174 | for (int i = 0; i < n; i++) {
175 |
176 | int cardinality = weight[i] - 1;
177 | double summation = cardinality * acDist[i];
178 | for (j = 0; j < k; j++) {
179 | int currentIndex = neighborIndicies[i][j];
180 | summation += weight[currentIndex] * acDist[currentIndex];
181 | cardinality += weight[currentIndex];
182 | }
183 | for (int x : kdist[i]) {
184 | summation += weight[x] * acDist[x];
185 | cardinality += weight[x];
186 | }
187 | cof[i] = cardinality * acDist[i] / summation;
188 |
189 | }
190 |
191 | return cof;
192 |
193 | }
194 |
195 | /**
196 | * The method is called to initialize the evaluation process.
197 | */
198 | @Override
199 | public double[] evaluate() {
200 | super.evaluate();
201 | double[] cof = cof();
202 | return cof;
203 | }
204 |
205 | @Override
206 | public double[] reEvaluate(int step) {
207 | getKnnCollection().shrink(step);
208 | double[] cof = cof();
209 | return cof;
210 |
211 | }
212 |
213 | /** Method is overridden to avoid doing extra work **/
214 | @Override
215 | protected void setAnomalyScore(int i, double[] neighBorDistanceSoFar,
216 | int[] neighBorIndiciesSoFar, int numberOfNeighbors) {
217 |
218 | }
219 |
220 | }
221 |
222 | class Node implements Comparable {
223 |
224 | int index;
225 | double distance;
226 |
227 | public Node(int index, double distance) {
228 | this.index = index;
229 | this.distance = distance;
230 | }
231 |
232 | @Override
233 | public int compareTo(Node arg0) {
234 | if (distance < arg0.distance)
235 | return -1;
236 | if (distance > arg0.distance)
237 | return 1;
238 | if (index < arg0.index)
239 | return -1;
240 | if (index > arg0.index)
241 | return 1;
242 | return 0;
243 | }
244 | }
245 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/INFLOEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see [] kdistNeighbors = getKnnCollection().getKdistNeighbors();
67 | int n = getN();
68 | double[] inflo = new double[n];
69 |
70 | // for intermediate work
71 | int[] cardinality = new int[n];
72 | double[] summationDensities = new double[n];
73 |
74 | for (int i = 0; i < n; i++) {
75 | int end = neighborNumbers[i];
76 | double kdist = distances[i][end - 1];
77 |
78 | cardinality[i]+= weight[i]-1;
79 | summationDensities[i]+= (weight[i]-1)* 1/kdist;
80 | for (int j = 0; j < end; j++) {
81 | int currentIndex = neighbors[i][j];
82 | int currentWeight = weight[currentIndex];
83 | cardinality[i] += currentWeight;
84 | double currentDistance = distances[i][j];
85 | double currentKdist = distances[currentIndex][neighborNumbers[currentIndex] - 1];
86 |
87 | summationDensities[i] += currentWeight * 1.0 / currentKdist;
88 | if (currentDistance > currentKdist) {
89 | cardinality[currentIndex] += weight[i];
90 | summationDensities[currentIndex] += weight[i] * 1.0 / kdist;
91 |
92 | }
93 | }
94 |
95 | for(int currentIndex: kdistNeighbors[i])
96 | {
97 | int currentWeight= weight[currentIndex];
98 | cardinality[i] += currentWeight;
99 | double currentDistance = distances[i][neighborNumbers[i]-1];
100 | double currentKdist = distances[currentIndex][neighborNumbers[currentIndex] - 1];
101 |
102 | summationDensities[i] += currentWeight* 1.0 / currentKdist;
103 | if (currentDistance > currentKdist) {
104 | cardinality[currentIndex] += weight[i];
105 | summationDensities[currentIndex] += weight[i]* 1.0 / kdist;
106 |
107 | }
108 |
109 | }
110 | }
111 |
112 | for (int i = 0; i < n; i++) {
113 | int end = neighborNumbers[i];
114 | double kdist = distances[i][end - 1];
115 | inflo[i] = summationDensities[i] * kdist / cardinality[i];
116 |
117 | }
118 |
119 | return inflo;
120 |
121 | }
122 |
123 | @Override
124 | protected void setAnomalyScore(int i, double[] neighBorDistanceSoFar,
125 | int[] neighBorIndiciesSoFar, int numberOfNeighbors) {
126 |
127 | }
128 |
129 | }
130 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/KNNCollection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see [] kdistNeighbors;
70 |
71 | /**
72 | * The weight of the points. Which corresponds to the number of elements in
73 | * the example set that have these coordinates.
74 | **/
75 | private int[] weight;
76 |
77 | @SuppressWarnings("unchecked")
78 | public KNNCollection(int n, int k, double[][] points, int[] weight) {
79 | this.n = n;
80 | this.k = k;
81 | this.points = points;
82 | this.weight = weight;
83 |
84 | neighborIndicies = new int[n][k];
85 | neighborDistances = new double[n][k];
86 | numberOfNeighbors = new int[n];
87 | kdistNeighbors = new LinkedList[n];
88 |
89 | for (int i = 0; i < n; i++)
90 | kdistNeighbors[i] = new LinkedList();
91 |
92 | }
93 |
94 | public int getK() {
95 | return k;
96 | }
97 |
98 | public LinkedList[] getKdistNeighbors() {
99 | return kdistNeighbors;
100 | }
101 |
102 | public int getN() {
103 | return n;
104 | }
105 |
106 | public double[][] getNeighBorDistanceSoFar() {
107 | return neighborDistances;
108 | }
109 |
110 | public int[][] getNeighBorIndiciesSoFar() {
111 | return neighborIndicies;
112 | }
113 |
114 | public int[] getNumberOfNeighborsSoFar() {
115 | return numberOfNeighbors;
116 | }
117 |
118 | public double[][] getPoints() {
119 | return points;
120 | }
121 |
122 | public int[] getWeight() {
123 | return weight;
124 | }
125 |
126 | public void shrink(int shrinkBy){
127 | for (int i=0; i< shrinkBy; i++)
128 | shrink();
129 | }
130 | /**
131 | * This method shrinks the kNNCollection to k-1
132 | */
133 | public void shrink() {
134 | k--;
135 | if (k == 0)
136 | return;
137 | for (int index = 0; index < n; index++) {
138 | // reduce the number of distinct neighbors by 1
139 | numberOfNeighbors[index]--;
140 | // removed index is equal to the old numberofNeighbors -1 which is equal to the new number of neighbors
141 | int removedIndex = numberOfNeighbors[index];
142 | int newLast = removedIndex - 1;
143 | if (neighborDistances[index][newLast] == neighborDistances[index][removedIndex]) {
144 | kdistNeighbors[index]
145 | .add(neighborIndicies[index][removedIndex]);
146 | } else
147 | kdistNeighbors[index].clear();
148 | }
149 |
150 | }
151 |
152 | /**
153 | * This method updates the KNNcollection by adding the currentDistance and
154 | * point2 to the set of the nearest neighbors of point1 if applicable.
155 | *
156 | * @param point1
157 | * The point we are updating the neighborhood set for.
158 | * @param point2
159 | * @param currentDistance
160 | * The distance between point1 and point2.
161 | */
162 | public void updateNearestNeighbors(int point1, int point2,
163 | double currentDistance) {
164 | // if this is the first neighbor then add it to the neighborhood set.
165 | if (numberOfNeighbors[point1] == 0) {
166 | neighborIndicies[point1][0] = point2;
167 | neighborDistances[point1][0] = currentDistance;
168 | numberOfNeighbors[point1]++;
169 | return;
170 | }
171 |
172 | int last = numberOfNeighbors[point1] - 1;
173 |
174 | // if the number of neighbors is less than k or the currentDistance is
175 | // less than the max distance in the neighborhood so far then add point2
176 | // to the set
177 |
178 | if (neighborDistances[point1][last] >= currentDistance
179 | || numberOfNeighbors[point1] < k) {
180 |
181 | boolean flag = true;
182 | if (numberOfNeighbors[point1] < k)
183 | numberOfNeighbors[point1]++;
184 | else {
185 |
186 | if (neighborDistances[point1][last] == currentDistance) {
187 | // if the current distance as the maximum distance then the
188 | // point should be added to the nearest neighborhood set
189 | kdistNeighbors[point1].add(point2);
190 | flag = false;
191 | } else {
192 |
193 | if (last > 0
194 | && neighborDistances[point1][last - 1] == neighborDistances[point1][last])
195 | // if the maximum distance is the same as the second
196 | // maximum distace then last point which is going to
197 | // removed should be added to the list.
198 | kdistNeighbors[point1]
199 | .add(neighborIndicies[point1][last]);
200 | else
201 | // else the kdist neighbors are reset.
202 | // kdistNeighbors[point1].empty();
203 | kdistNeighbors[point1].clear();
204 | }
205 | }
206 |
207 | // Adding point2 to the neighborhood in the appropriate position
208 | // using insertion sort.
209 |
210 | if (flag) {
211 | int i = Math.min(last, k - 2);
212 | for (; i >= 0; i--)
213 | if (neighborDistances[point1][i] > currentDistance) {
214 | neighborDistances[point1][i + 1] = neighborDistances[point1][i];
215 | neighborIndicies[point1][i + 1] = neighborIndicies[point1][i];
216 | } else
217 | break;
218 |
219 | neighborDistances[point1][i + 1] = currentDistance;
220 | neighborIndicies[point1][i + 1] = point2;
221 | }
222 | }
223 | }
224 | public static KNNCollection clone(KNNCollection a){
225 | KNNCollection ret = new KNNCollection(a.n,a.k,a.points,a.weight);
226 | ret.neighborIndicies = a.neighborIndicies.clone();
227 | ret.neighborDistances = a.neighborDistances.clone();
228 | ret.numberOfNeighbors = a.numberOfNeighbors.clone();
229 | ret.kdistNeighbors = a.kdistNeighbors.clone();
230 | return ret;
231 | }
232 | }
233 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/evaluator/nearest_neighbor_based/KNNCollectionModel.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2013 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see ids;
79 | /**
80 | * HashMap containing the mapping of the example Ids to the index of the
81 | * example which will be used in the further processing
82 | */
83 | private HashMap
50 | *
51 | * The computation of the LOCI requires the calculation of MDEF and σMDEF.
52 | * MDEF for a point pi at radius r refers to the deviation of the density of pi
53 | * to that in its average local neighborhood density. σMDEF is the
54 | * normalised standard deviation of the point relative to its local
55 | * neighborhood.
56 | *
57 | *
58 | * The original publication suggests the following flagging scheme the object
59 | * should be flagged as an outlier if MDEF(pi, r, α) > 3
60 | * *σMDEF(pi, r, &alpha). The operator produces an outlier score which
61 | * corresponds to the maximum ratio between MDEF(pi, r, α) and
62 | * &sigmaMDEF(pi, r, &alpha) over all r. The higher the ratio the more outlier
63 | * the object is. The proposed threshold to determine outliers is 3.
64 | *
65 | *
66 | * @author Mennatallah Amer
67 | *
68 | */
69 | public class LOCIAnomalyDetectionOperator extends
70 | AbstractNearestNeighborBasedAnomalyDetectionOperator {
71 |
72 | /**
73 | * The parameter name for " The minimum number of neighbors in the
74 | * sampling neighborhood. "
75 | **/
76 | public static String PARAMETER_N_MIN = "n min";
77 |
78 | /**
79 | * The parameter name for " The ratio of the counting neighborhood
80 | * radius to the sampling neighborhood radius. "
81 | **/
82 | public static String PARAMETER_ALPHA = "alpha";
83 |
84 | private DistanceMeasureHelper measureHelper = new DistanceMeasureHelper(
85 | this);
86 |
87 | public LOCIAnomalyDetectionOperator(OperatorDescription description) {
88 | super(description);
89 | getExampleSetInput().addPrecondition(
90 | new DistanceMeasurePrecondition(getExampleSetInput(), this));
91 | }
92 |
93 | @Override
94 | public double[] doWork(ExampleSet exampleSet, Attributes attributes,
95 | double[][] points, int[] weight) throws OperatorException {
96 | DistanceMeasure measure = measureHelper
97 | .getInitializedMeasure(exampleSet);
98 | double alpha = getParameterAsDouble(PARAMETER_ALPHA);
99 | int nmin = getParameterAsInt(PARAMETER_N_MIN);
100 | int n = points.length;
101 | double[] ret = {1};
102 |
103 | if (n > 1) {
104 | if (nmin == n) {
105 | this.logWarning("Setting " + PARAMETER_N_MIN + " to #Datapoints-1 because n min can't be equal #Datapoints.");
106 | nmin = n-1;
107 | //this.setParameter(PARAMETER_N_MIN, (n-1)+"");
108 | }
109 |
110 | LOCIEvaluator evaluator = new LOCIEvaluator(measure, alpha, nmin,
111 | points, weight);
112 | ret = evaluator.evaluate();
113 | }
114 | return ret;
115 | }
116 |
117 | public List getParameterTypes() {
118 | LinkedList types = new LinkedList();
119 | types
120 | .add(new ParameterTypeDouble(
121 | PARAMETER_ALPHA,
122 | "The ratio of the counting neighborhood radius to the sampling neighborhood radius.",
123 | 0, 1, 0.5));
124 | types
125 | .add(new ParameterTypeInt(
126 | PARAMETER_N_MIN,
127 | "The minimum number of neighbors in the sampling neighborhood.",
128 | 1, Integer.MAX_VALUE, 20, false));
129 |
130 | types.addAll(DistanceMeasures.getParameterTypes(this));
131 | return types;
132 |
133 | }
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/LOFAnomalyDetectionOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see
40 | * The LOF anomaly detection calculates the anomaly score according to the local
41 | * outlier factor algorithm proposed by Breunig et al[1999;2000].
42 | *
43 | *
44 | *
45 | * LOF is one of the earliest local density based approaches proposed. There are
46 | * several steps in the calculation of the LOF. The initial step involves
47 | * getting the nearest neighbors set.The definition of the k-distance employed
48 | * is the one proposed in the original paper in order to handle duplicates. The
49 | * definition states that the k-distance(p) has at least k neighbors with
50 | * distinct spatial coordinates that have a distance less than or equal it and
51 | * at most k-1 of such neighbors with distance strictly less than it. The
52 | * reachability distance (reach-dist(p,o)) is the maximum of the distance
53 | * between point p and o and the k-distance(o). The local reachability is the
54 | * inverse of the average reachability distance over the nearest neighborhood
55 | * set. Finally the LOF is calculated as the average of the ratio of the local
56 | * reachability density over the neighborhood set. The values of the LOF
57 | * oscillates with the change in the size of the neighborhood. Thus a range is
58 | * defined for the size of the neighborhood. The maximum LOF over that range is
59 | * taken as the final LOF score.
60 | *
61 | *
62 | * A normal instance has an outlier value of approximately 1, while outliers
63 | * have values greater than 1.
64 | *
65 | *
66 | * @author Mennatallah Amer
67 | *
68 | */
69 | public class LOFAnomalyDetectionOperator extends KNNAnomalyDetectionOperator {
70 |
71 | public static String PARAMETER_MINIMUM_K = "k_min (MinPtsLB)";
72 | public static String PARAMETER_MAXIMUM_K = "k_max (MinPtsUB)";
73 |
74 | public LOFAnomalyDetectionOperator(OperatorDescription description) {
75 | super(description);
76 | }
77 |
78 | @Override
79 | public double[] doWork(ExampleSet exampleSet, Attributes attributes,
80 | double[][] points, int[] weight) throws OperatorException {
81 | DistanceMeasure measure = getMeasureHelper().getInitializedMeasure(
82 | exampleSet);
83 |
84 | int n = points.length;
85 | int minK = getParameterAsInt(PARAMETER_MINIMUM_K);
86 | int maxK = getParameterAsInt(PARAMETER_MAXIMUM_K);
87 | double[] ret = {1};
88 |
89 | if (n > 1) {
90 | if (maxK >= n) {
91 | this.logWarning("Setting " + PARAMETER_MAXIMUM_K + " to "+ (n-1) + " because there cannot be more neighbors than data points.");
92 | maxK = n-1;
93 | //this.setParameter(PARAMETER_MAXIMUM_K, maxK+"");
94 | }
95 | if (maxK < minK) {
96 | this.logWarning("Setting " + PARAMETER_MINIMUM_K + " to "+ maxK + " to make UpperBound at least as large as LowerBound.");
97 | minK = maxK;
98 | //this.setParameter(PARAMETER_MINIMUM_K, minK+"");
99 | }
100 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS);
101 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS);
102 |
103 | readModel(n,maxK,points,weight,measure);
104 | //KNNCollection knnCollection = new KNNCollection(n, maxK, points, weight);
105 | LOFEvaluator evaluator = new LOFEvaluator(minK, knnCollection,
106 | measure,parallel, numberOfThreads, this, n, maxK , newCollection);
107 | ret = evaluator.evaluate();
108 | if(newCollection) {
109 | model = new KNNCollectionModel(exampleSet,knnCollection,measure);
110 | }
111 | else {
112 | model = new KNNCollectionModel(exampleSet,modelInput.getData(KNNCollectionModel.class).get(),measure);
113 | }
114 | modelOutput.deliver(model);
115 | knnCollection = null;
116 |
117 | }
118 | return ret;
119 | }
120 |
121 | @Override
122 | public List getParameterTypes() {
123 | List types = super.getParameterTypes();
124 | types.get(0).setKey(PARAMETER_MINIMUM_K);
125 | types.get(0).setDescription("The lower bound of MinPts");
126 | types.remove(1);
127 |
128 | types.add(1, new ParameterTypeInt(PARAMETER_MAXIMUM_K,
129 | "The upper bound of the MinPts ", 1, Integer.MAX_VALUE, 20,
130 | false));
131 |
132 | return types;
133 | }
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/LoOPAnomalyDetectionOperator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2011 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see 1) {
85 | if (k >= n) {
86 | this.logWarning("Setting " + KNNAnomalyDetectionOperator.PARAMETER_K + " to #Datapoints-1.");
87 | k = n-1;
88 | //this.setParameter(KNNAnomalyDetectionOperator.PARAMETER_K, (n-1)+"");
89 | }
90 | boolean parallel = getParameterAsBoolean(PARAMETER_PARALLELIZE_EVALUATION_PROCESS);
91 | int numberOfThreads = getParameterAsInt(PARAMETER_NUMBER_OF_THREADS);
92 |
93 | readModel(n,k,points,weight,measure);
94 | LoOPEvaluator evaluator = new LoOPEvaluator(knnCollection,
95 | measure, lamda,parallel, numberOfThreads, this,n,k,newCollection);
96 |
97 | ret = evaluator.evaluate();
98 | model = new KNNCollectionModel(exampleSet,knnCollection,measure);
99 | modelOutput.deliver(model);
100 | knnCollection = null;
101 | }
102 | return ret;
103 | }
104 |
105 | public List getParameterTypes() {
106 | List types = super.getParameterTypes();
107 | types.remove(1);
108 | types
109 | .add(
110 | 1,
111 | new ParameterTypeDouble(
112 | PARAMETER_LAMBDA,
113 | "The normalization factor. The results are weakly affected by this factor. ",
114 | 1, 3, 3, true));
115 |
116 | return types;
117 | }
118 |
119 | }
120 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/nearest_neighbor_based/Point.java:
--------------------------------------------------------------------------------
1 | package de.dfki.madm.anomalydetection.operator.nearest_neighbor_based;
2 |
3 |
4 |
5 | public class Point implements Comparable {
6 | public int getIndex() {
7 | return index;
8 | }
9 |
10 | double[] point;
11 | int index;
12 |
13 | public Point(int index, double[] point) {
14 | this.index = index;
15 | this.point = point;
16 | }
17 |
18 | @Override
19 | public int compareTo(Point arg0) {
20 | int n = point.length;
21 | for (int i = 0; i < n; i++)
22 | if (point[i] != arg0.point[i])
23 | if (point[i] < arg0.point[i])
24 | return -1;
25 | else
26 | return 1;
27 |
28 | return 0;
29 | }
30 |
31 | public boolean equals(Object obj) {
32 |
33 | return compareTo((Point) obj) == 0;
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierCellColorProvider.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see colors;
34 | private int numberOfSpecialAttributes;
35 | private int color;
36 | private OutlierExampleSet exampleSet;
37 | private DataViewerTable dataTable;
38 |
39 | public OutlierCellColorProvider(DataViewerTable dataTable,HashMap colors,int numberOfSpecialAttributes,OutlierExampleSet exampleSet) {
40 | this.colors = colors;
41 | this.numberOfSpecialAttributes = numberOfSpecialAttributes;
42 | this.dataTable = dataTable;
43 | this.exampleSet = exampleSet;
44 | }
45 |
46 | @Override
47 | public Color getCellColor(int row, int column) {
48 | int col = dataTable.convertColumnIndexToModel(column);
49 | if(col <= numberOfSpecialAttributes) {
50 | if(col == 0) {
51 | // first column (index)
52 | if (row % 2 == 0) {
53 | return Color.WHITE;
54 | } else {
55 | return SwingTools.LIGHTEST_BLUE;
56 | }
57 | }
58 | if (row % 2 == 0) {
59 | //i special attributes (like score)
60 | return Color.WHITE;
61 | } else {
62 | return SwingTools.LIGHTEST_YELLOW;
63 | }
64 | }
65 | else {
66 | //i row index
67 | String index = dataTable.getCell(row+1,0);
68 | // attribute name
69 | String attribute_name = dataTable.getCell(0,column);
70 | // index of that attribute needed to fetch the right color value
71 | int tableIndex = exampleSet.getAttributes().get(attribute_name).getTableIndex();
72 | color = (int)colors.get(tableIndex)[Integer.parseInt(index)-1];
73 |
74 | /*transform the integer values into actual colors.
75 | * 510 => green (0,255,0)
76 | * 255 => yellow (255,255,0)
77 | * 0 => red (255,0,0)
78 | * ...
79 | */
80 | if(color == -1){
81 | return Color.white;
82 | }
83 | else if(color >= 255) {
84 | return new Color((510-color), 255, 0);
85 | }
86 | // second half of colors go from yellow to red (unnormal bins)
87 | else{
88 | return new Color(255,(color) , 0);
89 | }
90 | }
91 |
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierColorJoin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see eColor = new HashMap();
69 | for(Attribute att : exampleSetAttributes) {
70 | int[] colors = new int[exampleSet.size()];
71 | Arrays.fill(colors,-1);
72 | eColor.put(att.getTableIndex(), colors);
73 | }
74 | //for(int x = 0; x< exampleSet.size();x++) {
75 | int x = 0;
76 | outlierExampleSet.remapIds();
77 |
78 | for(Example e : exampleSet) {
79 | currentId = e.getId();
80 | int[] i = outlierExampleSet.getExampleIndicesFromId(currentId);
81 | try {
82 | if(i.length > 1 ) {
83 | throw new OperatorException("Ids are not unique.");
84 | }
85 | }
86 | catch (NullPointerException E){
87 | this.logNote("Data Row with id " +currentId+ " not found in the OutlierExampleSet");
88 | }
89 | for(Attribute att: exampleSetAttributes) {
90 | for(Attribute outAtt : outlierExampleSetAttributes){
91 | // find attribute with the same name
92 | if(att.getName() == outAtt.getName()) {
93 | //save color
94 | int[] colors = eColor.get(att.getTableIndex());
95 |
96 | try {
97 | colors[x] = outlierExampleSet.colors.get(att.getTableIndex())[i[0]];
98 | }
99 | catch (NullPointerException E ){
100 | colors[x] = -1;
101 | this.logNote("Attribute "+ att.getName() + " not found in the OutlierExampleSet");
102 | }
103 | eColor.put(att.getTableIndex(), colors);
104 | }
105 | }
106 |
107 | }
108 | x++;
109 | }
110 | return new OutlierExampleSet(exampleSet,eColor);
111 |
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierDataViewer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see applicableFilterNames = new LinkedList();
94 | for (String conditionName : ConditionedExampleSet.KNOWN_CONDITION_NAMES) {
95 | try {
96 | ConditionedExampleSet.createCondition(conditionName, exampleSet, null);
97 | applicableFilterNames.add(conditionName);
98 | } catch (ConditionCreationException ex) {} // Do nothing
99 | }
100 | String[] applicableConditions = new String[applicableFilterNames.size()];
101 | applicableFilterNames.toArray(applicableConditions);
102 | final JComboBox filterSelector = new JComboBox(applicableConditions);
103 | filterSelector.setToolTipText("These filters can be used to skip examples in the view fulfilling the filter condition.");
104 | filterSelector.addItemListener(new ItemListener() {
105 | @Override
106 | public void itemStateChanged(ItemEvent e) {
107 | updateFilter((String)filterSelector.getSelectedItem());
108 | }
109 | });
110 |
111 | int maxNumberBeforeFiltering = DEFAULT_MAX_SIZE_FOR_FILTERING;
112 | String maxString = ParameterService.getParameterValue(RapidMinerGUI.PROPERTY_RAPIDMINER_GUI_MAX_STATISTICS_ROWS);
113 | if (maxString != null) {
114 | try {
115 | maxNumberBeforeFiltering = Integer.parseInt(maxString);
116 | } catch (NumberFormatException e) {
117 | // do nothing
118 | }
119 | }
120 | if (exampleSet.size() > maxNumberBeforeFiltering) {
121 | filterSelector.setEnabled(false);
122 | }
123 | toolBar.add(filterSelector, ViewToolBar.RIGHT);
124 | toolBar.setPreferredSize(new Dimension(getWidth(), 29));
125 | }
126 |
127 | add(toolBar, BorderLayout.NORTH);
128 | JScrollPane tableScrollPane = new ExtendedJScrollPane(dataTable);
129 | tableScrollPane.setBorder(null);
130 | add(tableScrollPane, BorderLayout.CENTER);
131 | setExampleSet(exampleSet);
132 |
133 | /*
134 | * set the right cell color Proiver
135 | */
136 | final int numberOfSpecialAttributes = exampleSet.getAttributes().specialSize();
137 | dataTable.setCellColorProvider(new OutlierCellColorProvider(dataTable,exampleSet.colors,numberOfSpecialAttributes, exampleSet));
138 | }
139 |
140 | public void setExampleSet(ExampleSet exampleSet) {
141 | dataTable.setExampleSet(exampleSet);
142 | }
143 |
144 | private void updateFilter(String conditionName) {
145 | ExampleSet filteredExampleSet = originalExampleSet;
146 | try {
147 | Condition condition = ConditionedExampleSet.createCondition(conditionName, originalExampleSet, null);
148 | filteredExampleSet = new ConditionedExampleSet(originalExampleSet, condition);
149 | } catch (ConditionCreationException ex) {
150 | originalExampleSet.getLog().logError("Cannot create condition '" + conditionName + "' for filtered data view: " + ex.getMessage() + ". Using original data set view...");
151 | filteredExampleSet = originalExampleSet;
152 | }
153 | updateFilterCounter(filteredExampleSet);
154 | setExampleSet(filteredExampleSet);
155 | }
156 |
157 | private void updateFilterCounter(ExampleSet filteredExampleSet) {
158 | filterCounter.setText("(" + filteredExampleSet.size() + " / " + originalExampleSet.size() + "): ");
159 | }
160 |
161 | @Override
162 | public void prepareReporting() {
163 | dataTable.prepareReporting();
164 | }
165 |
166 | @Override
167 | public void finishReporting() {
168 | dataTable.finishReporting();
169 | }
170 |
171 | @Override
172 | public String getColumnName(int columnIndex) {
173 | return dataTable.getColumnName(columnIndex);
174 | }
175 |
176 | @Override
177 | public String getCell(int row, int column) {
178 | return dataTable.getCell(row, column);
179 | }
180 |
181 | @Override
182 | public int getColumnNumber() {
183 | return dataTable.getColumnNumber();
184 | }
185 |
186 | @Override
187 | public int getRowNumber() {
188 | return dataTable.getRowNumber();
189 | }
190 |
191 | @Override
192 | public boolean isFirstLineHeader() { return false; }
193 |
194 | @Override
195 | public boolean isFirstColumnHeader() { return false; }
196 |
197 |
198 |
199 |
200 | }
201 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierExampleSet.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see colors;
38 | public OutlierExampleSet(ExampleSet exampleSet,HashMap colors){
39 | this.colors = colors;
40 | this.data = exampleSet;
41 | }
42 | @Override
43 | public String getName() {
44 | return "OutlierExampleSet";
45 | }
46 | @Override
47 | public Attributes getAttributes() {
48 | return data.getAttributes();
49 | }
50 |
51 | @Override
52 | public Example getExample(int arg0) {
53 | return data.getExample(arg0);
54 | }
55 |
56 | @Override
57 | public ExampleTable getExampleTable() {
58 | return data.getExampleTable();
59 | }
60 |
61 | @Override
62 | public int size() {
63 | return data.size();
64 | }
65 | @Override
66 | public OutlierExampleSet clone() {
67 | OutlierExampleSet result = new OutlierExampleSet((ExampleSet)data.clone(),(HashMap)colors.clone());
68 | return result;
69 |
70 | }
71 | @Override
72 | public Iterator iterator() {
73 | return data.iterator();
74 | }
75 |
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/src/de/dfki/madm/anomalydetection/operator/statistical_based/OutlierExampleSetDataRenderer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * RapidMiner Anomaly Detection Extension
3 | *
4 | * Copyright (C) 2009-2014 by Deutsches Forschungszentrum fuer
5 | * Kuenstliche Intelligenz GmbH or its licensors, as applicable.
6 | *
7 | * This is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU Affero General Public License as published by
9 | * the Free Software Foundation, either version 3 of the License, or
10 | * (at your option) any later version.
11 | *
12 | * You should have received a copy of the GNU Affero General Public License
13 | * along with this software. If not, see