├── .gitignore ├── LICENSE ├── README.md ├── ensemble-clustering-spark ├── docstyle.css ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── oculusinfo │ │ └── ml │ │ └── spark │ │ ├── CsvParser.java │ │ ├── Field.java │ │ ├── SparkDataSet.java │ │ ├── SparkInstanceParser.java │ │ ├── SparkInstanceParserHelper.java │ │ └── unsupervised │ │ └── cluster │ │ ├── SparkClusterResult.java │ │ ├── dpmeans │ │ └── DPMeansClusterer.java │ │ ├── functions │ │ ├── AggregateClusterFunction.java │ │ ├── AggregateClustersFunction.java │ │ ├── BestClusterFunction.java │ │ ├── ComputeCentroidFunction.java │ │ ├── DistanceFunction.java │ │ ├── FindBestClusterFunction.java │ │ ├── IncrementalClusterFunction.java │ │ └── InstanceToClusterFunction.java │ │ ├── kmeans │ │ └── KMeansClusterer.java │ │ └── threshold │ │ └── ThresholdClusterer.java │ └── test │ └── java │ └── com │ └── oculusinfo │ └── ml │ └── spark │ └── unsupervised │ ├── InstanceParser.java │ ├── TestDPMeans.java │ ├── TestKMeans.java │ └── TestThresholdClusterer.java ├── ensemble-clustering ├── docstyle.css ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── oculusinfo │ │ ├── geometry │ │ ├── SphereUtilities.java │ │ ├── cartesian │ │ │ ├── CubicBSpline.java │ │ │ ├── QuarticSpline.java │ │ │ └── Spline.java │ │ └── geodesic │ │ │ ├── Position.java │ │ │ ├── PositionCalculationParameters.java │ │ │ ├── PositionCalculationType.java │ │ │ ├── Track.java │ │ │ ├── TrackPlotter.java │ │ │ ├── WrappingRectangle.java │ │ │ └── tracks │ │ │ ├── Cartesian2DTrack.java │ │ │ ├── Cartesian3DTrack.java │ │ │ ├── GeodeticTrack.java │ │ │ └── SphericalTrack.java │ │ ├── math │ │ ├── algebra │ │ │ └── AngleUtilities.java │ │ ├── linearalgebra │ │ │ ├── ListUtilities.java │ │ │ ├── TriDiagonalMatrix.java │ │ │ └── Vector.java │ │ └── statistics │ │ │ └── StatTracker.java │ │ └── ml │ │ ├── DataSet.java │ │ ├── Instance.java │ │ ├── InstanceJsonMapper.java │ │ ├── centroid │ │ └── Centroid.java │ │ ├── distance │ │ └── DistanceFunction.java │ │ ├── feature │ │ ├── Feature.java │ │ ├── FeatureTable.java │ │ ├── bagofwords │ │ │ ├── BagOfWordsFeature.java │ │ │ ├── centroid │ │ │ │ └── BagOfWordsCentroid.java │ │ │ └── distance │ │ │ │ ├── CosineDistance.java │ │ │ │ ├── EditDistance.java │ │ │ │ └── ExactTokenMatchDistance.java │ │ ├── numeric │ │ │ ├── NumericVectorFeature.java │ │ │ ├── centroid │ │ │ │ └── MeanNumericVectorCentroid.java │ │ │ └── distance │ │ │ │ └── EuclideanDistance.java │ │ ├── semantic │ │ │ ├── SemanticFeature.java │ │ │ ├── centroid │ │ │ │ └── SemanticCentroid.java │ │ │ └── distance │ │ │ │ ├── Concept.java │ │ │ │ ├── SemMFDistance.java │ │ │ │ └── WuPalmerDistance.java │ │ ├── spatial │ │ │ ├── GeoSpatialFeature.java │ │ │ ├── TrackFeature.java │ │ │ ├── centroid │ │ │ │ ├── FastGeoSpatialCentroid.java │ │ │ │ ├── GeoSpatialCentroid.java │ │ │ │ └── TrackCentroid.java │ │ │ └── distance │ │ │ │ ├── EquitangularDistance.java │ │ │ │ ├── EuclideanDistance.java │ │ │ │ ├── HaversineDistance.java │ │ │ │ ├── SphericalCosineDistance.java │ │ │ │ └── TrackDistance.java │ │ ├── string │ │ │ ├── StringFeature.java │ │ │ ├── centroid │ │ │ │ └── StringMedianCentroid.java │ │ │ └── distance │ │ │ │ ├── EditDistance.java │ │ │ │ └── ExactTokenMatchDistance.java │ │ └── temporal │ │ │ ├── TemporalFeature.java │ │ │ ├── centroid │ │ │ ├── TemporalCentroid.java │ │ │ └── TemporalMinMaxCentroid.java │ │ │ └── distance │ │ │ └── TemporalDistance.java │ │ ├── search │ │ ├── ObjectiveFunction.java │ │ ├── SearchException.java │ │ ├── Solution.java │ │ └── stochastic │ │ │ └── SimulatedAnnealing.java │ │ ├── stats │ │ ├── FeatureFreqComparator.java │ │ ├── FeatureFrequency.java │ │ ├── FeatureFrequencyTable.java │ │ └── TrackClusterWrapper.java │ │ ├── unsupervised │ │ └── cluster │ │ │ ├── AbstractClusterer.java │ │ │ ├── BaseClusterer.java │ │ │ ├── Cluster.java │ │ │ ├── ClusterFactory.java │ │ │ ├── ClusterJsonMapper.java │ │ │ ├── ClusterResult.java │ │ │ ├── Clusterer.java │ │ │ ├── FeatureTypeDefinition.java │ │ │ ├── InMemoryClusterResult.java │ │ │ ├── dpmeans │ │ │ └── DPMeans.java │ │ │ ├── kmeans │ │ │ └── KMeans.java │ │ │ └── threshold │ │ │ └── ThresholdClusterer.java │ │ ├── utils │ │ └── StringTools.java │ │ └── validation │ │ └── unsupervised │ │ ├── external │ │ ├── BCubed.java │ │ ├── BCubedHierarchical.java │ │ ├── Hierarchical.java │ │ └── NormMutualInformation.java │ │ └── internal │ │ ├── Cohesion.java │ │ └── Separation.java │ └── test │ └── java │ ├── TestGeoSpatialCentroid.java │ └── com │ └── oculusinfo │ ├── geometry │ ├── SphereUtilityTests.java │ ├── cartesian │ │ ├── ProgramaticSplineTest.java │ │ └── VisualSplineTest.java │ └── geodesic │ │ ├── PositionTests.java │ │ ├── TrackSimplificationTests.java │ │ └── TrackTest.java │ ├── math │ ├── algebra │ │ └── TestAngleUtilities.java │ ├── linearalgebra │ │ ├── ListUtilitiesTests.java │ │ ├── TestTriDiagonalMatrix.java │ │ └── VectorTests.java │ └── statistics │ │ └── TestStats.java │ └── ml │ ├── TestNormalization.java │ ├── TestStringTools.java │ ├── distance │ ├── TestBagOfWordsEditDistance.java │ ├── TestCosineDistance.java │ ├── TestEquitangularDistance.java │ ├── TestEuclideanDistance.java │ ├── TestExactStringMatchDistance.java │ ├── TestExactTokenMatchDistance.java │ ├── TestHaversineDistance.java │ ├── TestSphericalCosineDistance.java │ ├── TestStringEditDistance.java │ └── TestTemporalDistance.java │ ├── search │ └── TestAnnealer.java │ ├── tracks │ ├── TestFrame.java │ ├── TestTrackCluster.java │ ├── TrackCentroidTests.java │ └── TrackClusteringTests.java │ └── unsupervised │ ├── TestBagOfWordsClustering.java │ ├── TestDPMeans.java │ ├── TestGeoClusteringWithDPMeans.java │ ├── TestKMeans.java │ ├── TestNameLocationClustering.java │ ├── TestStringClustering.java │ ├── TestStringClusteringWithDPMeans.java │ ├── TestStringClusteringWithMissingFeatures.java │ └── TestThresholdClusterer.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.class 3 | *.prefs 4 | bin 5 | .classpath 6 | .settings 7 | .project 8 | target 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Oculus Info Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | Spark Ensemble Clustering Library 5 | Spark Ensemble Clustering Library 6 | ensemble-clustering-spark 7 | jar 8 | 9 | 10 | ml 11 | com.oculusinfo 12 | 0.1.0-SNAPSHOT 13 | 14 | 15 | 16 | 17 | cloudera-releases 18 | https://repository.cloudera.com/artifactory/cloudera-repos 19 | 20 | 21 | 22 | 23 | 24 | com.oculusinfo 25 | ensemble-clustering 26 | 0.1.0-SNAPSHOT 27 | 28 | 29 | 30 | org.apache.spark 31 | spark-core_2.10 32 | 1.0.0 33 | 34 | 35 | 36 | org.apache.hadoop 37 | hadoop-client 38 | 2.0.0-mr1-cdh4.6.0 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | org.apache.maven.plugins 47 | maven-javadoc-plugin 48 | 2.9 49 | 50 | Spark Ensemble Clustering Library API 51 | Spark Ensemble Clustering Library 52 | ${basedir}/docstyle.css 53 | 54 | 55 | 56 | package-javadoc 57 | 58 | jar 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/CsvParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | 29 | public class CsvParser { 30 | public static List fsmParse(String input) { 31 | ArrayList result = new ArrayList(); 32 | int startChar = 0; 33 | int endChar = 0; 34 | boolean inString = false; 35 | while(endChar instances; 46 | 47 | /*** 48 | * The constructor must be passed a Spark Context used to communicate with the Spark installation. 49 | * 50 | * @param sc - the Spark context 51 | */ 52 | public SparkDataSet(JavaSparkContext sc) { 53 | this.sc = sc; 54 | } 55 | 56 | /*** 57 | * The SparkDataSet loads data using this method to populate the DataSet with Instances. 58 | * 59 | * @param path the location of the data to load (filesystem or HDFS path) 60 | * @param parser is the object that converts each line in the data into an Instance 61 | * @param minSplits determine the number of Spark partitions to split the data into 62 | */ 63 | public void load(String path, SparkInstanceParser parser, int minSplits) { 64 | try { 65 | JavaRDD lines = sc.textFile(path, minSplits); 66 | instances = lines.mapToPair( parser ); 67 | } catch (Exception e) { 68 | e.printStackTrace(); 69 | } 70 | } 71 | 72 | /*** 73 | * The SparkDataSet loads data using this method to populate the DataSet with Instances. 74 | * 75 | * @param path the location of the data to load (filesystem or HDFS path) 76 | * @param parser is the object that converts each line in the data into an Instance 77 | */ 78 | public void load(String path, SparkInstanceParser parser) { 79 | try { 80 | JavaRDD lines = sc.textFile(path); 81 | instances = lines.mapToPair( parser ); 82 | } catch (Exception e) { 83 | e.printStackTrace(); 84 | } 85 | } 86 | 87 | /*** 88 | * The SparkDataSet can be directly provided a pre-defined RDD of Instances 89 | * 90 | * @param rdd the RDD of Instances 91 | */ 92 | public void load(JavaPairRDD rdd) { 93 | this.instances = rdd; 94 | } 95 | 96 | /*** 97 | * Return the underlying Spark RDD containing hte DataSet Instances 98 | * @return the RDD 99 | */ 100 | public JavaPairRDD getRDD() { 101 | return this.instances; 102 | } 103 | 104 | /*** 105 | * Return the Spark context this DataSet is bound to 106 | * 107 | * @return the JavaSparkContext 108 | */ 109 | public JavaSparkContext getContext() { 110 | return this.sc; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/SparkInstanceParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark; 26 | 27 | import scala.Tuple2; 28 | import org.apache.spark.api.java.function.PairFunction; 29 | 30 | import com.oculusinfo.ml.Instance; 31 | 32 | /*** 33 | * This abstract class defines the abstract method each SparkInstanceParser must implement to convert 34 | * a line of data into an Instance that is added to a SparkDataSet 35 | * 36 | * @author slangevin 37 | * 38 | */ 39 | public abstract class SparkInstanceParser implements PairFunction { 40 | private static final long serialVersionUID = -8686959633799632078L; 41 | 42 | @Override 43 | public abstract Tuple2 call(String arg0) throws Exception; 44 | } 45 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/SparkClusterResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster; 26 | 27 | import java.util.Iterator; 28 | 29 | import org.apache.spark.api.java.JavaPairRDD; 30 | 31 | import com.oculusinfo.ml.Instance; 32 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 33 | import com.oculusinfo.ml.unsupervised.cluster.ClusterResult; 34 | 35 | /*** 36 | * This class represents a cluster result that stores cluster membership in a Spark RDD 37 | * 38 | * @author slangevin 39 | * 40 | */ 41 | public class SparkClusterResult implements ClusterResult { 42 | private static final long serialVersionUID = -1586537333107747750L; 43 | 44 | JavaPairRDD rdd; 45 | 46 | public SparkClusterResult(JavaPairRDD clusters) { 47 | rdd = clusters; 48 | } 49 | 50 | public JavaPairRDD getRDD() { 51 | return rdd; 52 | } 53 | 54 | @Override 55 | public Iterator iterator() { 56 | throw new RuntimeException("Iterator is not supported for SparkClusterResult"); 57 | } 58 | 59 | @Override 60 | public boolean isEmpty() { 61 | throw new RuntimeException("isEmpty is not supported for SparkClusterResult"); 62 | } 63 | 64 | @Override 65 | public int size() { 66 | throw new RuntimeException("size is not supported for SparkClusterResult"); 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/AggregateClusterFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.Map; 28 | 29 | import scala.Tuple2; 30 | import org.apache.spark.api.java.function.Function2; 31 | 32 | import com.oculusinfo.ml.Instance; 33 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 34 | 35 | public class AggregateClusterFunction implements Function2, Map, Map> { 36 | private static final long serialVersionUID = 3732157666163441695L; 37 | 38 | private double threshold; 39 | private DistanceFunction distFunc; 40 | 41 | public AggregateClusterFunction(DistanceFunction distFunc, double threshold) { 42 | this.distFunc = distFunc; 43 | this.threshold = threshold; 44 | } 45 | 46 | @Override 47 | public Map call(Map clusters1, Map cluster2) throws Exception { 48 | BestClusterFunction bestClusterFunc = new BestClusterFunction( distFunc, clusters1, threshold ); 49 | 50 | for (String key : cluster2.keySet()) { 51 | Instance instance = cluster2.get(key); 52 | Tuple2 result = bestClusterFunc.call(new Tuple2(instance.getId(), instance)); 53 | 54 | if (result == null) { 55 | clusters1.put(instance.getId(), instance); 56 | } 57 | else { 58 | Cluster cluster = (Cluster)clusters1.get(result._1); 59 | cluster.add(instance); // revise the cluster centroid 60 | cluster.getMembers().clear(); // no need to retain the member list 61 | } 62 | } 63 | return clusters1; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/AggregateClustersFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.Map; 28 | 29 | import scala.Tuple2; 30 | import org.apache.spark.api.java.function.Function2; 31 | 32 | import com.oculusinfo.ml.Instance; 33 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 34 | 35 | public class AggregateClustersFunction implements Function2, Map, Map> { 36 | private static final long serialVersionUID = 1L; 37 | 38 | private double threshold; 39 | private DistanceFunction distFunc; 40 | 41 | public AggregateClustersFunction(DistanceFunction distFunc, double threshold) { 42 | this.distFunc = distFunc; 43 | this.threshold = threshold; 44 | } 45 | 46 | @Override 47 | public Map call(Map clusterList1, Map clusterList2) throws Exception { 48 | 49 | if (clusterList1.isEmpty()) { 50 | clusterList1.putAll(clusterList2); 51 | return clusterList1; 52 | } 53 | 54 | for (String id : clusterList2.keySet()) { 55 | Instance c = clusterList2.get(id); 56 | BestClusterFunction bestClusterFunc = new BestClusterFunction( distFunc, clusterList1, threshold ); 57 | Tuple2 result = bestClusterFunc.call(new Tuple2(c.getId(), c)); 58 | 59 | if (result._1 == null) { 60 | clusterList1.put(c.getId(), c); 61 | } 62 | else { 63 | ((Cluster)clusterList1.get(result._1)).add(c); 64 | } 65 | } 66 | return clusterList1; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/BestClusterFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.Map; 28 | 29 | import scala.Tuple2; 30 | import org.apache.spark.api.java.function.PairFunction; 31 | 32 | import com.oculusinfo.ml.Instance; 33 | 34 | public class BestClusterFunction implements PairFunction, String, Instance> { 35 | private static final long serialVersionUID = 1508254065822051773L; 36 | 37 | private double threshold = Double.MAX_VALUE; 38 | private DistanceFunction distFunc; 39 | private Map clusters; 40 | 41 | 42 | public BestClusterFunction(DistanceFunction distFunc, Map clusters) { 43 | this.distFunc = distFunc; 44 | this.clusters = clusters; 45 | } 46 | 47 | public BestClusterFunction(DistanceFunction distFunc, Map clusters, double threshold) { 48 | this.distFunc = distFunc; 49 | this.clusters = clusters; 50 | this.threshold = threshold; 51 | } 52 | 53 | @Override 54 | public Tuple2 call(Tuple2 inst) throws Exception { 55 | Instance bestCluster = null; 56 | double bestScore = Double.MAX_VALUE; 57 | 58 | for (String clusterId : clusters.keySet()) { 59 | Instance cluster = clusters.get(clusterId); 60 | double d = distFunc.distance(inst._2, cluster); 61 | 62 | if (d < bestScore && d < threshold) { 63 | bestScore = d; 64 | bestCluster = cluster; 65 | } 66 | } 67 | //System.out.println(inst._1 + " -> " + bestCluster); 68 | if (bestCluster == null) { 69 | return null; 70 | } 71 | 72 | return new Tuple2(bestCluster.getId(), inst._2); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/ComputeCentroidFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import org.apache.spark.api.java.function.Function2; 28 | 29 | import com.oculusinfo.ml.Instance; 30 | import com.oculusinfo.ml.centroid.Centroid; 31 | import com.oculusinfo.ml.feature.Feature; 32 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 33 | import com.oculusinfo.ml.unsupervised.cluster.ClusterFactory; 34 | 35 | public class ComputeCentroidFunction implements Function2 { 36 | private static final long serialVersionUID = -3281886552602674327L; 37 | 38 | private ClusterFactory clusterFactory; 39 | 40 | public ComputeCentroidFunction(ClusterFactory clusterFactory) { 41 | this.clusterFactory = clusterFactory; 42 | } 43 | 44 | @SuppressWarnings("unchecked") 45 | private void updateCluster(Instance inst, Cluster cluster) { 46 | if (inst instanceof Cluster) { // merge the clusters 47 | Cluster c = (Cluster)inst; 48 | for (String key : c.getCentroids().keySet()) { 49 | Centroid update = c.getCentroids().get(key); 50 | Centroid centroid = cluster.getCentroids().get(key); 51 | 52 | // get all the aggregate feature values associated with update 53 | for (Feature f : update.getAggregatableCentroid()) { 54 | centroid.add(f); 55 | } 56 | } 57 | 58 | // after merging cluster we should manually update the resulting centroid 59 | cluster.updateCentroid(); 60 | 61 | // TODO should handle merging the cluster members 62 | } 63 | else { // simply add the instance to the cluster 64 | cluster.add(inst); 65 | } 66 | } 67 | 68 | @Override 69 | public Instance call(Instance inst1, Instance inst2) throws Exception { 70 | Cluster c = clusterFactory.create(); 71 | 72 | // aggregate inst1 and inst2 in a cluster 73 | updateCluster(inst1, c); 74 | updateCluster(inst2, c); 75 | 76 | return c; 77 | } 78 | 79 | } -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/DistanceFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.io.Serializable; 28 | import java.util.Map; 29 | 30 | import com.oculusinfo.ml.Instance; 31 | import com.oculusinfo.ml.feature.Feature; 32 | import com.oculusinfo.ml.unsupervised.cluster.FeatureTypeDefinition; 33 | 34 | public class DistanceFunction implements Serializable { 35 | private static final long serialVersionUID = 5075251184563041056L; 36 | private Map typeDefs; 37 | 38 | public DistanceFunction(Map typeDefs) { 39 | this.typeDefs = typeDefs; 40 | } 41 | 42 | @SuppressWarnings("unchecked") 43 | public double distance(Instance inst1, Instance inst2) { 44 | double totalDist = 0; 45 | 46 | try { 47 | for (FeatureTypeDefinition typedef : typeDefs.values()) { 48 | if (typedef.distFunc.getWeight() < 0.00001) continue; // skip if weight is near zero 49 | 50 | Feature f1 = inst1.getFeature(typedef.featureName); 51 | Feature f2 = inst2.getFeature(typedef.featureName); 52 | if (f1 == null || f2 == null) continue; 53 | 54 | totalDist += typedef.distFunc.distance(f1, f2) * typedef.distFunc.getWeight(); 55 | } 56 | } 57 | catch (Exception e) { 58 | // TODO - handle exception 59 | } 60 | return totalDist; 61 | } 62 | } -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/FindBestClusterFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | import scala.Tuple2; 31 | import org.apache.spark.api.java.function.Function; 32 | import com.oculusinfo.ml.Instance; 33 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 34 | import com.oculusinfo.ml.unsupervised.cluster.ClusterFactory; 35 | 36 | public class FindBestClusterFunction implements Function, Map> { 37 | private static final long serialVersionUID = 1508254065822051773L; 38 | 39 | private double threshold = Double.MAX_VALUE; 40 | private DistanceFunction distFunc; 41 | private Map clusters; 42 | private ClusterFactory clusterFactory; 43 | 44 | public FindBestClusterFunction(DistanceFunction distFunc, Map clusters) { 45 | this.distFunc = distFunc; 46 | this.clusters = clusters; 47 | } 48 | 49 | public FindBestClusterFunction(DistanceFunction distFunc, Map clusters, double threshold, ClusterFactory clusterFactory) { 50 | this.distFunc = distFunc; 51 | this.clusters = clusters; 52 | this.threshold = threshold; 53 | this.clusterFactory = clusterFactory; 54 | } 55 | 56 | 57 | @Override 58 | public Map call(Tuple2 inst) throws Exception { 59 | Instance bestCluster = null; 60 | double bestScore = Double.MAX_VALUE; 61 | 62 | for (String clusterId : clusters.keySet()) { 63 | Instance cluster = clusters.get(clusterId); 64 | double d = distFunc.distance(inst._2, cluster); 65 | 66 | if (d < bestScore && d < threshold) { 67 | bestScore = d; 68 | bestCluster = cluster; 69 | } 70 | } 71 | Map result = new HashMap(); 72 | if (bestCluster == null) { 73 | Cluster c = clusterFactory.create(); 74 | c.add(inst._2); 75 | result.put(c.getId(), c); 76 | } 77 | else { 78 | result.put(bestCluster.getId(), bestCluster); 79 | } 80 | return result; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/IncrementalClusterFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.HashMap; 28 | import java.util.Iterator; 29 | import java.util.Map; 30 | 31 | import com.oculusinfo.ml.Instance; 32 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 33 | import com.oculusinfo.ml.unsupervised.cluster.ClusterFactory; 34 | 35 | import scala.Tuple2; 36 | import org.apache.spark.api.java.function.FlatMapFunction; 37 | 38 | public class IncrementalClusterFunction implements FlatMapFunction>, Instance> { 39 | private static final long serialVersionUID = 5096750219795665262L; 40 | 41 | private double threshold; 42 | private ClusterFactory clusterFactory; 43 | private DistanceFunction distFunc; 44 | 45 | public IncrementalClusterFunction(DistanceFunction distFunc, ClusterFactory clusterFactory, double threshold) { 46 | this.threshold = threshold; 47 | this.distFunc = distFunc; 48 | this.clusterFactory = clusterFactory; 49 | } 50 | 51 | @Override 52 | public Iterable call(Iterator> instances) throws Exception { 53 | Map clusters = new HashMap(); 54 | 55 | BestClusterFunction bestClusterFunc = new BestClusterFunction(distFunc, clusters, threshold); 56 | 57 | while (instances.hasNext()) { 58 | Tuple2 inst = instances.next(); 59 | 60 | Tuple2 result = bestClusterFunc.call(inst); 61 | 62 | Cluster c; 63 | if (result._1 == null) { 64 | c = clusterFactory.create(); 65 | clusters.put(c.getId(), c); 66 | } 67 | else { 68 | c = (Cluster)clusters.get(result._1); 69 | } 70 | c.add(inst._2); 71 | } 72 | return clusters.values(); 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/main/java/com/oculusinfo/ml/spark/unsupervised/cluster/functions/InstanceToClusterFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised.cluster.functions; 26 | 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | import com.oculusinfo.ml.Instance; 31 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 32 | import com.oculusinfo.ml.unsupervised.cluster.ClusterFactory; 33 | 34 | import scala.Tuple2; 35 | import org.apache.spark.api.java.function.Function; 36 | 37 | public class InstanceToClusterFunction implements Function, Map> { 38 | private static final long serialVersionUID = 5096750219795665262L; 39 | private ClusterFactory clusterFactory; 40 | 41 | public InstanceToClusterFunction(ClusterFactory clusterFactory) { 42 | this.clusterFactory = clusterFactory; 43 | } 44 | 45 | @Override 46 | public Map call(Tuple2 instance) throws Exception { 47 | Map result = new HashMap(); 48 | Cluster c = clusterFactory.create(); 49 | c.add(instance._2); 50 | result.put(c.getId(), c); 51 | return result; 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /ensemble-clustering-spark/src/test/java/com/oculusinfo/ml/spark/unsupervised/InstanceParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.spark.unsupervised; 26 | 27 | import scala.Tuple2; 28 | 29 | import com.oculusinfo.ml.Instance; 30 | import com.oculusinfo.ml.feature.numeric.NumericVectorFeature; 31 | import com.oculusinfo.ml.spark.SparkInstanceParser; 32 | 33 | public class InstanceParser extends SparkInstanceParser { 34 | private static final long serialVersionUID = -2680908243993104457L; 35 | 36 | @Override 37 | public Tuple2 call(String line) throws Exception { 38 | Instance inst = new Instance(); 39 | 40 | String tokens[] = line.split(","); 41 | 42 | NumericVectorFeature v = new NumericVectorFeature("point"); 43 | 44 | double x = Double.parseDouble(tokens[0]); 45 | double y = Double.parseDouble(tokens[1]); 46 | v.setValue( new double[] { x, y } ); 47 | 48 | inst.addFeature(v); 49 | 50 | return new Tuple2(inst.getId(), inst); 51 | } 52 | } -------------------------------------------------------------------------------- /ensemble-clustering/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | Ensemble Clustering Library 5 | Ensemble Clustering Library 6 | ensemble-clustering 7 | jar 8 | 9 | 10 | ml 11 | com.oculusinfo 12 | 0.1.0-SNAPSHOT 13 | 14 | 15 | 16 | 17 | 18 | 19 | org.apache.maven.plugins 20 | maven-javadoc-plugin 21 | 2.9 22 | 23 | Ensemble Clustering Library API 24 | Ensemble Clustering Library 25 | ${basedir}/docstyle.css 26 | 27 | 28 | 29 | package-javadoc 30 | 31 | jar 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/geometry/cartesian/QuarticSpline.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.geometry.cartesian; 26 | 27 | /** 28 | * A fourth-order spline that allows fitting of points and velocities, and 29 | * matching of acceleration, at join points. 30 | * 31 | * The general formula for the spline is a basic parameterized fourth-order 32 | * polynomial, by segment (s): 33 | * x_s_n(t) = a_s_n t^4 + b_s_n t^3 + c_s_n t^2 + d_s_n t + e_s_n 34 | * x'_s_n(t) = 4 a_s_n t^3 + 3 b_s_n t^2 + 2 c_s_n t + d_s_n 35 | * x"_s_n(t) = 12 a_s_n t^2 + 6 b_s_n t + 2 c_s_n 36 | * 37 | * We are trying to match: 38 | * x_s_n(0) = p0_s_n X_s(0) is the first point of segment s 39 | * x'_s_n(0) = v0_s_n X'_s(0) is the velocity at the first point of the segment s 40 | * x_s_n(1) = p1_s_n X_s(1) is the last point of the segment s 41 | * x'_s_n(1) = v1_s_n X'_s(1) is the velocity at the last point of the segment s 42 | * p1_s_n = p0_s+1_n The last point of segment s matches the first point of segment s+1 43 | * v1_s_n = v0_s+1_n The velocity at the last point of segment s matches the velocity at the first point of segment s+1 44 | * x"_s_n(1) = x"_s+1_n(0) The acceleration at the last point of segment s matches the acceleration at the first point of segment s+1 45 | * 46 | * This give us our 5 unknowns 47 | * 48 | * e_s_n = p0_s_n 49 | * d_s_n = v0_s_n 50 | * 51 | * 52 | * @author Nathan 53 | */ 54 | public class QuarticSpline { 55 | 56 | } 57 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/geometry/geodesic/PositionCalculationParameters.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.geometry.geodesic; 26 | 27 | 28 | 29 | 30 | 31 | 32 | public class PositionCalculationParameters { 33 | private PositionCalculationType _type; 34 | private double _allowedError; 35 | private double _epsilon; 36 | private boolean _ignoreDirection; 37 | 38 | /** 39 | * Create a set of parameters that determine how geodetic calculations are 40 | * performed 41 | * 42 | * @param type 43 | * The way in which the calculations are to be performed (i.e., 44 | * using what system and approcimation) 45 | * @param allowedError 46 | * The proportion of error allowed in approximations 47 | * @param precision 48 | * The amount of error allowed in equality tests 49 | */ 50 | public PositionCalculationParameters (PositionCalculationType type, 51 | double allowedError, double precision, 52 | boolean ignoreDirection) { 53 | _type = type; 54 | _allowedError = allowedError; 55 | _epsilon = precision; 56 | _ignoreDirection = ignoreDirection; 57 | } 58 | 59 | /** 60 | * Get the way in which the calculations are to be performed (i.e., using 61 | * what system and approcimation) 62 | * 63 | * @see PositionCalculationType 64 | */ 65 | public PositionCalculationType getCalculationType () { 66 | return _type; 67 | } 68 | 69 | /** 70 | * Get the proportion of error allowed in approximations 71 | */ 72 | public double getAllowedError () { 73 | return _allowedError; 74 | } 75 | 76 | /** 77 | * Get the amount of error allowed in equality tests 78 | */ 79 | public double getPrecision () { 80 | return _epsilon; 81 | } 82 | 83 | /** 84 | * If true, the direction in which a track goes should be ignored when 85 | * calculating distance between tracks. 86 | */ 87 | public boolean ignoreDirection () { 88 | return _ignoreDirection; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/geometry/geodesic/PositionCalculationType.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.geometry.geodesic; 26 | 27 | public enum PositionCalculationType { 28 | /** 29 | * True ellipsoidal calculations in lon/lat space. 30 | */ 31 | Geodetic, 32 | /** 33 | * Nearly true calculations, assuming the globe to be a perfect sphere 34 | */ 35 | Spherical, 36 | /** 37 | * True calcualtions, in 3D space, with the center of the world at [0, 38 | * 0, 0], and the point 0N, 0E at [0, 0, R] 39 | */ 40 | Cartesian3D, 41 | /** 42 | * Vastly simplified calculations, just in lat/lon, but pretending they 43 | * are cartesian. Not even scaled by latitude 44 | */ 45 | Cartesian2D 46 | } 47 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/geometry/geodesic/tracks/SphericalTrack.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.geometry.geodesic.tracks; 26 | 27 | import java.util.List; 28 | 29 | import com.oculusinfo.geometry.SphereUtilities; 30 | import com.oculusinfo.geometry.geodesic.Position; 31 | import com.oculusinfo.geometry.geodesic.PositionCalculationParameters; 32 | import com.oculusinfo.geometry.geodesic.PositionCalculationType; 33 | import com.oculusinfo.geometry.geodesic.Track; 34 | 35 | public class SphericalTrack extends Track { 36 | 37 | public SphericalTrack (PositionCalculationParameters parameters, 38 | Position... points) { 39 | super(parameters, points); 40 | } 41 | 42 | public SphericalTrack (PositionCalculationParameters parameters, 43 | List points) { 44 | super(parameters, points); 45 | } 46 | 47 | public SphericalTrack (PositionCalculationParameters parameters, 48 | List points, 49 | List parameterization) { 50 | super(parameters, points, parameterization); 51 | } 52 | 53 | public SphericalTrack (Track oldTrack) { 54 | super(oldTrack, 55 | new PositionCalculationParameters(PositionCalculationType.Spherical, 56 | oldTrack.getParameters().getAllowedError(), 57 | oldTrack.getParameters().getPrecision(), 58 | oldTrack.getParameters().ignoreDirection())); 59 | } 60 | 61 | public SphericalTrack (Track oldTrack, double allowedError, double precision, boolean ignoreDirection) { 62 | super(oldTrack, 63 | new PositionCalculationParameters(PositionCalculationType.Spherical, allowedError, precision, ignoreDirection)); 64 | } 65 | 66 | @Override 67 | protected double getSegmentDistance (Position a, Position b) { 68 | return SphereUtilities.getDistance(a, b); 69 | } 70 | 71 | @Override 72 | protected Position interpolate (Position start, Position end, double t) { 73 | return SphereUtilities.interpolate(start, end, t); 74 | } 75 | 76 | @Override 77 | protected double getRelativeError (Position a, Position b, Position c) { 78 | double triangleArea = SphereUtilities.getTriangleArea(a, b, c); 79 | double longSide = SphereUtilities.getDistance(a, b); 80 | return triangleArea/(longSide*longSide); 81 | } 82 | 83 | @Override 84 | protected Track createTrack (List points) { 85 | return new SphericalTrack(getParameters(), points); 86 | } 87 | 88 | @Override 89 | protected Track createTrack (List points, 90 | List parameterization) { 91 | return new SphericalTrack(getParameters(), points, parameterization); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/math/algebra/AngleUtilities.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.algebra; 26 | 27 | public class AngleUtilities { 28 | public static double fromDMS (double degrees, double minutes, double seconds) { 29 | return degrees+minutes/60.0+seconds/3600.0; 30 | } 31 | 32 | /** 33 | * Returns the input angle put into the 360 degree range centered on the 34 | * input center, [center-180, center+180) 35 | * 36 | * @param center 37 | * The center of the desired output range 38 | * @param angle 39 | * The angle to return in the given range 40 | * @return angle, but specified in the given range, in degrees. 41 | */ 42 | public static double intoRangeDegrees (double center, double angle) { 43 | return intoRange(center, angle, 360); 44 | } 45 | 46 | /** 47 | * Returns the input angle put into the 2pi radian range centered on the 48 | * input center, [center-pi, center+pi) 49 | * 50 | * @param center 51 | * The center of the desired output range 52 | * @param angle 53 | * The angle to return in the given range 54 | * @return angle, but specified in the given range, in radians. 55 | */ 56 | public static double intoRangeRadians (double center, double angle) { 57 | return intoRange(center, angle, Math.PI * 2.0); 58 | } 59 | 60 | /** 61 | * Essentially number mod modulus, but with the ability to specify the 62 | * output range. 63 | * 64 | * @param center 65 | * The center of the desired output range 66 | * @param number 67 | * The number in question 68 | * @param modulus 69 | * The modulus - i.e., the width of the range 70 | * @returnthe equivalent of number, mod modulus, centered on the given 71 | * center (i.e., in the range [center-modulus/2, 72 | * center+modulus/2)) 73 | */ 74 | public static double intoRange (double center, double number, double modulus) { 75 | return number - modulus * Math.round((number - center) / modulus); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/math/statistics/StatTracker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.statistics; 26 | 27 | public class StatTracker { 28 | private int _n; 29 | private double _sumX; 30 | private double _sumXSquared; 31 | private double _min; 32 | private double _max; 33 | 34 | public StatTracker () { 35 | reset(); 36 | } 37 | 38 | public void reset () { 39 | _n = 0; 40 | _sumX = 0; 41 | _sumXSquared = 0; 42 | _min = Double.NaN; 43 | _max = Double.NaN; 44 | } 45 | 46 | public void addStat (double value) { 47 | ++_n; 48 | _sumX += value; 49 | _sumXSquared += value*value; 50 | if (Double.isNaN(_min) || value < _min) 51 | _min = value; 52 | if (Double.isNaN(_max) || value > _max) 53 | _max = value; 54 | } 55 | 56 | public int numItems () { 57 | return _n; 58 | } 59 | 60 | public double mean () { 61 | return _sumX/_n; 62 | } 63 | 64 | public double max () { 65 | return _max; 66 | } 67 | 68 | public double min () { 69 | return _min; 70 | } 71 | 72 | /** 73 | * Normalize a value to fit in the range we've tracked 74 | * 75 | * @return 0 if value is at the minimum tracked, 1 if at the 76 | * max, linear interpolations thereof for other values, and NaN if 77 | * no values have been tracked 78 | */ 79 | public double normalizeValue (double value) { 80 | if (1 > _n) return 0.0; 81 | if (1 == _n) { 82 | if (value == _min) return 1.0; 83 | else return 0.0; 84 | } 85 | return (value-_min)/(_max-_min); 86 | } 87 | 88 | public double variance () { 89 | double mean = mean(); 90 | return _sumXSquared/_n - mean*mean; 91 | } 92 | 93 | public double standardDeviation () { 94 | return Math.sqrt(variance()); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/InstanceJsonMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml; 26 | 27 | import java.io.IOException; 28 | import java.io.StringWriter; 29 | 30 | import org.codehaus.jackson.JsonFactory; 31 | import org.codehaus.jackson.JsonGenerationException; 32 | import org.codehaus.jackson.JsonGenerator; 33 | import org.codehaus.jackson.JsonParseException; 34 | import org.codehaus.jackson.map.JsonMappingException; 35 | import org.codehaus.jackson.map.ObjectMapper; 36 | 37 | /*** 38 | * Serializer for Instance object to/from JSON 39 | * 40 | * @author slangevin 41 | * 42 | */ 43 | public class InstanceJsonMapper { 44 | 45 | private static ObjectMapper mapper = new ObjectMapper(); 46 | private static JsonFactory factory = new JsonFactory(); 47 | 48 | public static Instance fromJson(String jsonAsString) 49 | throws JsonMappingException, JsonParseException, IOException { 50 | 51 | mapper.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL); 52 | Instance inst = mapper.readValue(jsonAsString, Instance.class); 53 | 54 | return inst; 55 | } 56 | 57 | public static String toJson(Instance inst, boolean prettyPrint) 58 | throws JsonMappingException, JsonGenerationException, IOException { 59 | 60 | StringWriter writer = new StringWriter(); 61 | JsonGenerator generator = factory.createJsonGenerator(writer); 62 | 63 | if (prettyPrint) { 64 | generator.useDefaultPrettyPrinter(); 65 | } 66 | 67 | mapper.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL); 68 | mapper.writeValue(generator, inst); 69 | return writer.toString(); 70 | } 71 | } -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/centroid/Centroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.centroid; 26 | 27 | import java.io.Serializable; 28 | import java.util.Collection; 29 | 30 | import com.oculusinfo.ml.feature.Feature; 31 | 32 | /*** 33 | * Interface that all Cluster Centroid objects must implement 34 | * 35 | * @author slangevin 36 | * 37 | * @param 38 | */ 39 | public interface Centroid extends Serializable { 40 | 41 | /*** 42 | * Add an Instance feature to this centroid. 43 | * 44 | * The centroid will aggregate these features and compute a centroid value that summarizes them 45 | * 46 | * @param feature feature to add to this centroid 47 | */ 48 | public void add (T feature); 49 | 50 | /*** 51 | * Remove an Instance feature from this centroid 52 | * 53 | * @param feature 54 | */ 55 | public void remove (T feature); 56 | 57 | public void setName(String name); 58 | 59 | public String getName(); 60 | 61 | public Class getType(); 62 | 63 | public void reset(); 64 | 65 | /** 66 | * Get the centroid value represented 67 | */ 68 | public T getCentroid (); 69 | 70 | /** 71 | * Get the centroid value represented, modified to be aggregatable with 72 | * other centroids or features. This will often simply be the centroid, but 73 | * may contain more information (see, for example, semantic and word 74 | * frequency centroids) 75 | */ 76 | public Collection getAggregatableCentroid(); 77 | } 78 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/Feature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature; 26 | 27 | import java.io.Serializable; 28 | 29 | import org.codehaus.jackson.annotate.JsonIgnore; 30 | 31 | /*** 32 | * Feature represents one data type of an instance in a data set. 33 | * 34 | * Each feature must have a unique name to distinguish it from others. 35 | * 36 | * @author slangevin 37 | * 38 | */ 39 | public abstract class Feature implements Serializable { 40 | private static final long serialVersionUID = 192274668774344842L; 41 | 42 | // the unique name of the feature 43 | protected String name; 44 | 45 | // The weight of this feature 46 | private double weight; 47 | 48 | public Feature() { 49 | // empty constructor 50 | this.weight = 1.0; 51 | } 52 | 53 | public Feature(String name) { 54 | this.name = name; 55 | this.weight = 1.0; 56 | } 57 | 58 | public double getWeight () { 59 | return weight; 60 | } 61 | 62 | public void setWeight (double weight) { 63 | this.weight = weight; 64 | } 65 | 66 | public String getName() { 67 | return name; 68 | } 69 | 70 | public void setName(String name) { 71 | this.name = name; 72 | } 73 | 74 | @JsonIgnore 75 | public String getId() { 76 | return name; 77 | } 78 | 79 | @Override 80 | public int hashCode() { 81 | return name.hashCode(); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/FeatureTable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature; 26 | 27 | import java.io.Serializable; 28 | import java.util.Collection; 29 | import java.util.HashMap; 30 | import java.util.LinkedList; 31 | import java.util.List; 32 | import java.util.Map; 33 | 34 | import org.codehaus.jackson.annotate.JsonIgnore; 35 | 36 | public class FeatureTable implements Serializable { 37 | private static final long serialVersionUID = -5842770909462107078L; 38 | private Map> table = new HashMap>(); 39 | 40 | public FeatureTable() { super(); } 41 | 42 | public boolean containsFeature(String name) { 43 | return table.containsKey(name); 44 | } 45 | 46 | public void addFeature(Feature f) { 47 | if (table.containsKey(f.getName()) == false) { 48 | table.put(f.getName(), new LinkedList()); 49 | } 50 | Collection featureList = table.get(f.getName()); 51 | featureList.add(f); 52 | } 53 | 54 | public void addFeatures(Collection features) { 55 | for (Feature f : features) { 56 | addFeature(f); 57 | } 58 | } 59 | 60 | public void addFeatures(FeatureTable copyTable) { 61 | for (Collection list : copyTable.getFeatures()) { 62 | addFeatures(list); 63 | } 64 | } 65 | 66 | public List removeFeature(String name) { 67 | return table.remove(name); 68 | } 69 | 70 | public List getFeature(String name) { 71 | return table.get(name); 72 | } 73 | 74 | @JsonIgnore 75 | public Collection getFeatureNames() { 76 | return table.keySet(); 77 | } 78 | 79 | public Map> getTable() { 80 | return table; 81 | } 82 | 83 | public void setTable(Map> table) { 84 | this.table = table; 85 | } 86 | 87 | public int numFeatures() { 88 | return table.size(); 89 | } 90 | 91 | @JsonIgnore 92 | public Collection> getFeatures() { 93 | return table.values(); 94 | } 95 | 96 | @Override 97 | public String toString() { 98 | return toString(""); 99 | } 100 | 101 | public String toString(String prefix) { 102 | StringBuilder str = new StringBuilder(); 103 | for (Collection features : table.values()) { 104 | int i = 1; 105 | for (Feature f : features) { 106 | str.append(prefix + f); 107 | if (i < features.size()) str.append(","); 108 | i++; 109 | } 110 | } 111 | return str.toString(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/bagofwords/BagOfWordsFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.bagofwords; 26 | 27 | import java.util.Collection; 28 | 29 | import org.codehaus.jackson.annotate.JsonIgnore; 30 | 31 | import com.oculusinfo.ml.feature.Feature; 32 | import com.oculusinfo.ml.feature.string.StringFeature; 33 | import com.oculusinfo.ml.stats.FeatureFrequency; 34 | import com.oculusinfo.ml.stats.FeatureFrequencyTable; 35 | 36 | /*** 37 | * A BagOfWordsFeature represents a Set of Words each of which is associated with a frequency count. 38 | * 39 | * Useful for representing Document or other Text fields in a DataSet. 40 | * 41 | * @author slangevin 42 | * 43 | */ 44 | public class BagOfWordsFeature extends Feature { 45 | private static final long serialVersionUID = 6927104885425283254L; 46 | private FeatureFrequencyTable freqTable = new FeatureFrequencyTable(); 47 | 48 | public BagOfWordsFeature() { 49 | super(); 50 | } 51 | 52 | public BagOfWordsFeature(String name) { 53 | super(name); 54 | } 55 | 56 | public void setCount(FeatureFrequency freq) { 57 | freqTable.remove(freq); 58 | freqTable.add(freq); 59 | } 60 | 61 | public void setCount(String term, int count) { 62 | FeatureFrequency freq = new FeatureFrequency(new StringFeature(term)); 63 | freq.frequency = count; 64 | setCount(freq); 65 | } 66 | 67 | public void incrementValue(String term) { 68 | freqTable.add(new StringFeature(term)); 69 | } 70 | 71 | public void decrementValue(String value) { 72 | freqTable.decrement(new StringFeature(value)); 73 | } 74 | 75 | public FeatureFrequency getCount(String term) { 76 | return freqTable.get(new StringFeature(term)); 77 | } 78 | 79 | @JsonIgnore 80 | public Collection getValues() { 81 | return freqTable.getAll(); 82 | } 83 | 84 | public FeatureFrequencyTable getFreqTable() { 85 | return this.freqTable; 86 | } 87 | 88 | public void setFreqTable(FeatureFrequencyTable table) { 89 | freqTable = table; 90 | } 91 | 92 | @Override 93 | public String toString() { 94 | StringBuilder str = new StringBuilder(); 95 | str.append(this.getName() + ":["); 96 | int i=1; 97 | for (FeatureFrequency f : freqTable.getAll()) { 98 | str.append(f.feature.getName() + "=" + f.frequency); 99 | if (i < freqTable.getAll().size()) str.append(";"); 100 | i++; 101 | } 102 | str.append("]"); 103 | 104 | return str.toString(); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/bagofwords/centroid/BagOfWordsCentroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.bagofwords.centroid; 26 | 27 | import java.util.Collection; 28 | import java.util.Collections; 29 | import com.oculusinfo.ml.centroid.Centroid; 30 | import com.oculusinfo.ml.feature.bagofwords.BagOfWordsFeature; 31 | import com.oculusinfo.ml.stats.FeatureFrequency; 32 | import com.oculusinfo.ml.stats.FeatureFrequencyTable; 33 | 34 | /*** 35 | * A Centroid for BagOfWordsFeatures that represents the centroid as the top 10 words with the highest frequency 36 | * 37 | * @author slangevin 38 | * 39 | */ 40 | public class BagOfWordsCentroid implements Centroid { 41 | private static final long serialVersionUID = -5723416814427314073L; 42 | private String name; 43 | private static final int MAX_CENTROID_FEATURES = 10; 44 | protected final FeatureFrequencyTable freqTable = new FeatureFrequencyTable(); 45 | 46 | @Override 47 | public void add(BagOfWordsFeature feature) { 48 | for (FeatureFrequency nom : feature.getValues()) { 49 | freqTable.add(nom); 50 | } 51 | } 52 | 53 | @Override 54 | public void remove(BagOfWordsFeature feature) { 55 | for (FeatureFrequency nom : feature.getValues()) { 56 | freqTable.decrementBy(nom.feature, nom.frequency); 57 | } 58 | } 59 | 60 | @Override 61 | public Collection getAggregatableCentroid() { 62 | BagOfWordsFeature rawCounts = new BagOfWordsFeature(name); 63 | rawCounts.setFreqTable(freqTable); 64 | return Collections.singleton(rawCounts); 65 | } 66 | 67 | @Override 68 | public BagOfWordsFeature getCentroid() { 69 | // centroid is the top N most frequent words 70 | Collection freqs = freqTable.getTopN(MAX_CENTROID_FEATURES); 71 | BagOfWordsFeature medoid = new BagOfWordsFeature(name); 72 | for (FeatureFrequency freq : freqs) { 73 | medoid.setCount(freq); 74 | } 75 | return medoid; 76 | } 77 | 78 | @Override 79 | public void setName(String name) { 80 | this.name = name; 81 | } 82 | 83 | @Override 84 | public String getName() { 85 | return this.name; 86 | } 87 | 88 | @Override 89 | public Class getType() { 90 | return BagOfWordsFeature.class; 91 | } 92 | 93 | public FeatureFrequencyTable getFreqTable() { 94 | return this.freqTable; 95 | } 96 | 97 | @Override 98 | public void reset() { 99 | freqTable.clear(); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/bagofwords/distance/CosineDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.bagofwords.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.bagofwords.BagOfWordsFeature; 29 | import com.oculusinfo.ml.stats.FeatureFrequency; 30 | 31 | /*** 32 | * A distance function that computes 1 - Cosine Similarity between two BagOfWordFeatures 33 | * 34 | * @author slangevin 35 | * 36 | */ 37 | public class CosineDistance extends DistanceFunction { 38 | private static final long serialVersionUID = -635994591459075095L; 39 | 40 | public CosineDistance() { 41 | this(1); 42 | } 43 | 44 | public CosineDistance(double weight) { 45 | super(weight); 46 | } 47 | 48 | @Override 49 | public double distance(BagOfWordsFeature x, BagOfWordsFeature y) { 50 | double dotprod = 0, xlength = 0, ylength = 0; 51 | 52 | for (FeatureFrequency xf : x.getValues()) { 53 | xlength += xf.frequency * xf.frequency; 54 | FeatureFrequency yf = y.getCount(xf.feature.getName()); 55 | if (yf != null) dotprod += xf.frequency * yf.frequency; 56 | } 57 | for (FeatureFrequency yf : y.getValues()) { 58 | ylength += yf.frequency * yf.frequency; 59 | } 60 | // if both are empty then distance is max 61 | if (xlength == 0 || ylength == 0) return 1.0; 62 | 63 | return 1.0 - (dotprod / ( Math.sqrt(xlength) * Math.sqrt(ylength) )); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/bagofwords/distance/ExactTokenMatchDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.bagofwords.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.bagofwords.BagOfWordsFeature; 29 | import com.oculusinfo.ml.stats.FeatureFrequency; 30 | 31 | /*** 32 | * A distance function that computes distance between two BagOfWordFeatures by 33 | * the size of the intersection of their words divided the largest set of words 34 | * 35 | * @author slangevin 36 | * 37 | */ 38 | public class ExactTokenMatchDistance extends DistanceFunction { 39 | private static final long serialVersionUID = -3651531184290382230L; 40 | 41 | public ExactTokenMatchDistance() { 42 | super(1); 43 | } 44 | 45 | public ExactTokenMatchDistance(double weight) { 46 | super(weight); 47 | } 48 | 49 | @Override 50 | public double distance(BagOfWordsFeature x, BagOfWordsFeature y) { 51 | double dist = 0; 52 | int m = x.getValues().size(); 53 | int n = y.getValues().size(); 54 | double norm = Math.max(m, n); 55 | 56 | // set a to be the largest nominal list 57 | BagOfWordsFeature a = x, b = y; 58 | if (m < n) { 59 | a = y; 60 | b = x; 61 | } 62 | 63 | for (FeatureFrequency xf : a.getValues()) { 64 | if (b.getCount(xf.feature.getName()) == null) { 65 | dist += 1; 66 | } 67 | } 68 | 69 | return dist / norm; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/numeric/NumericVectorFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.numeric; 26 | 27 | import java.util.ArrayList; 28 | import java.util.List; 29 | 30 | import com.oculusinfo.ml.feature.Feature; 31 | 32 | /*** 33 | * A NumericVectorFeature represents a vector of double precision numbers. 34 | * 35 | * Useful for representing a large class of data that is numeric in nature or can be encoded as such 36 | * 37 | * @author slangevin 38 | * 39 | */ 40 | public class NumericVectorFeature extends Feature { 41 | private static final long serialVersionUID = 4845380498652903996L; 42 | private double[] vector; 43 | 44 | private String vectorToString() { 45 | StringBuilder str = new StringBuilder(); 46 | str.append("["); 47 | for (int i=0; i < vector.length; i++) { 48 | str.append(vector[i]); 49 | if (i < vector.length - 1) str.append(";"); 50 | } 51 | str.append("]"); 52 | return str.toString(); 53 | } 54 | 55 | @Override 56 | public String toString() { 57 | return (this.getName() + ":" + vectorToString()); 58 | } 59 | 60 | public NumericVectorFeature() { 61 | super(); 62 | } 63 | 64 | public NumericVectorFeature(String name) { 65 | super(name); 66 | } 67 | 68 | public void setValue(double[] vector) { 69 | this.vector = vector; 70 | } 71 | 72 | public void setValue(List vector) { 73 | setValue(new ArrayList(vector)); 74 | } 75 | 76 | public double[] getValue() { 77 | return this.vector; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/numeric/centroid/MeanNumericVectorCentroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.numeric.centroid; 26 | 27 | import java.util.Collection; 28 | import java.util.Collections; 29 | 30 | import com.oculusinfo.ml.centroid.Centroid; 31 | import com.oculusinfo.ml.feature.numeric.NumericVectorFeature; 32 | 33 | /*** 34 | * A Centroid for NumericVectorFeatures that represents the centroid as the average number of each vector component 35 | * 36 | * @author slangevin 37 | * 38 | */ 39 | public class MeanNumericVectorCentroid implements Centroid { 40 | private static final long serialVersionUID = 8127455596937762659L; 41 | private String name; 42 | private double weight; 43 | private double[] meanVector; 44 | 45 | @Override 46 | public void add(NumericVectorFeature feature) { 47 | double addedWeight = feature.getWeight(); 48 | double newWeight = weight + addedWeight; 49 | 50 | if (meanVector == null) { 51 | if (feature.getValue() != null) { 52 | meanVector = feature.getValue().clone(); 53 | weight = addedWeight; 54 | } 55 | } 56 | else { 57 | if (feature.getValue() != null) { 58 | // incrementally revise the centroid vector 59 | for (int i=0; i < meanVector.length; i++) { 60 | meanVector[i] = (meanVector[i] * weight + feature.getValue()[i] * addedWeight) / newWeight; 61 | } 62 | weight = newWeight; 63 | } 64 | } 65 | } 66 | 67 | @Override 68 | public void remove(NumericVectorFeature feature) { 69 | double removedWeight = feature.getWeight(); 70 | double newWeight = weight - removedWeight; 71 | 72 | if (0.0 == weight) return; 73 | 74 | // decrement centroid vector 75 | for (int i=0; i < meanVector.length; i++) { 76 | meanVector[i] = (meanVector[i] * weight - feature.getValue()[i] * removedWeight) / newWeight; 77 | } 78 | weight = newWeight; 79 | } 80 | 81 | @Override 82 | public void setName(String name) { 83 | this.name = name; 84 | } 85 | 86 | @Override 87 | public String getName() { 88 | return this.name; 89 | } 90 | 91 | @Override 92 | public Class getType() { 93 | return NumericVectorFeature.class; 94 | } 95 | 96 | @Override 97 | public Collection getAggregatableCentroid() { 98 | return Collections.singleton(getCentroid()); 99 | } 100 | 101 | @Override 102 | public NumericVectorFeature getCentroid() { 103 | // create the centroid geospatial feature set 104 | NumericVectorFeature mean = new NumericVectorFeature(name); 105 | 106 | mean.setValue(meanVector); 107 | mean.setWeight(weight); 108 | 109 | return mean; 110 | } 111 | 112 | @Override 113 | public void reset() { 114 | meanVector = null; 115 | weight = 0; 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/numeric/distance/EuclideanDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.numeric.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.numeric.NumericVectorFeature; 29 | 30 | /*** 31 | * A distance function that computes the Euclidean distance between two VectorFeatures 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class EuclideanDistance extends DistanceFunction{ 37 | private static final long serialVersionUID = -1493313434323633636L; 38 | 39 | public EuclideanDistance(double weight) { 40 | super(weight); 41 | } 42 | 43 | @Override 44 | public double distance(NumericVectorFeature x, NumericVectorFeature y) { 45 | double[] vector1 = x.getValue(); 46 | double[] vector2 = y.getValue(); 47 | 48 | double d = 0; 49 | for (int i = 0; i < vector1.length; i++) { 50 | d += Math.pow(vector1[i] - vector2[i], 2); 51 | } 52 | 53 | // return euclidean distance 54 | return Math.sqrt( d / (double)vector1.length ); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/semantic/SemanticFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.semantic; 26 | 27 | import com.oculusinfo.ml.feature.Feature; 28 | 29 | /*** 30 | * A SemanticFeature represents an instance of a semantic class in an Ontology 31 | * 32 | * The SemanticFeature is associated with: 33 | * * An Ontology Concept class name 34 | * * A uri that uniquely identifies the instance 35 | * * A label that describes the instance 36 | * 37 | * Example: Concept = foaf:Person, URI = http://danbri.org/ , Label = Dan Brickley 38 | * 39 | * @author slangevin 40 | * 41 | */ 42 | public class SemanticFeature extends Feature { 43 | private static final long serialVersionUID = -6524985038597553461L; 44 | private String concept; 45 | private String uri; 46 | private String label; 47 | 48 | public SemanticFeature() { 49 | super(); 50 | } 51 | 52 | public SemanticFeature(String name) { 53 | super(name); 54 | } 55 | 56 | public void setValue(String concept, String uri) { 57 | this.concept = concept; 58 | this.uri = uri; 59 | } 60 | 61 | public void setValue(String concept, String uri, String label) { 62 | this.concept = concept; 63 | this.uri = uri; 64 | this.label = label; 65 | } 66 | 67 | public String getConcept() { 68 | return concept; 69 | } 70 | 71 | public void setConcept(String concept) { 72 | this.concept = concept; 73 | } 74 | 75 | public String getUri() { 76 | return uri; 77 | } 78 | 79 | public void setUri(String uri) { 80 | this.uri = uri; 81 | } 82 | 83 | public String getLabel() { 84 | return label; 85 | } 86 | 87 | public void setLabel(String label) { 88 | this.label = label; 89 | } 90 | 91 | @Override 92 | public String toString() { 93 | return toString(false); 94 | } 95 | 96 | public String toString(boolean suppressLabel) { 97 | StringBuilder output = new StringBuilder(); 98 | output.append(this.getId()); 99 | if (label != null && !suppressLabel) { 100 | output.append(":" + label); 101 | } 102 | return output.toString(); 103 | } 104 | 105 | @Override 106 | public String getId() { 107 | return (name + ":" + concept); // + ":" + uri); 108 | } 109 | 110 | @Override 111 | public int hashCode() { 112 | return getId().hashCode(); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/semantic/distance/SemMFDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.semantic.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.semantic.SemanticFeature; 29 | 30 | /*** 31 | * A distance function that computes the distance between two SemanticFeatures 32 | * 33 | * In order to use this distance function you must provide a taxonomy that describes the Semantic Class hierarchy as a tree 34 | * 35 | * The distance calculation finds the lowest common ancestor between the two SemanticFeatures and computes 36 | * the distance using the algorithm described in: 37 | * 38 | * Oldakowski, R. and Bizer, C., SemMF: a framework for calculating semantic similarity of objects represented as RDF graphs. 4th International Semantic Web Conference (ISWC 2005). 39 | * 40 | * @author slangevin 41 | * 42 | */ 43 | public class SemMFDistance extends DistanceFunction { 44 | private static final long serialVersionUID = -7485093350764491674L; 45 | private Concept taxonomy; 46 | 47 | public SemMFDistance(Concept taxonomy) { 48 | this(taxonomy, 1); 49 | } 50 | 51 | public SemMFDistance(Concept taxonomy, double weight) { 52 | super(weight); 53 | this.taxonomy = taxonomy; 54 | } 55 | 56 | private double distance(Concept x, Concept y) { 57 | int xlvl = x.getDepth(); 58 | int ylvl = y.getDepth(); 59 | return 0.5 / Math.pow(2, ylvl) - 0.5 / Math.pow(2, xlvl); 60 | } 61 | 62 | @Override 63 | public double distance(SemanticFeature x, SemanticFeature y) { 64 | double dist = 1; 65 | 66 | Concept cx = taxonomy.findConcept(x.getConcept()); 67 | Concept cy = taxonomy.findConcept(y.getConcept()); 68 | Concept lca = cx.findCommonAncestor(cy); 69 | 70 | // No common ancestor exists - return max distance 71 | if (lca != null) { 72 | dist = distance(cx, lca) + distance(cy, lca); 73 | } 74 | 75 | return dist; 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/semantic/distance/WuPalmerDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.semantic.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.semantic.SemanticFeature; 29 | 30 | /*** 31 | * A distance function that computes the distance between two SemanticFeatures 32 | * 33 | * In order to use this distance function you must provide a taxonomy that describes the Semantic Class hierarchy as a tree 34 | * 35 | * The distance calculation finds the lowest common ancestor between the two SemanticFeatures and computes 36 | * the distance using the algorithm described in: 37 | * 38 | * Wu, Z. and Palmer, M., Verbs semantics and lexical selection. In Proceedings of the 32nd annual meeting on Association for Computational Linguistics (Jun. 1994). 39 | * 40 | * @author slangevin 41 | * 42 | */ 43 | public class WuPalmerDistance extends DistanceFunction { 44 | private static final long serialVersionUID = 2357149443801960761L; 45 | private Concept taxonomy; 46 | 47 | public WuPalmerDistance(Concept taxonomy) { 48 | this(taxonomy, 1); 49 | } 50 | 51 | public WuPalmerDistance(Concept taxonomy, double weight) { 52 | super(weight); 53 | this.taxonomy = taxonomy; 54 | } 55 | 56 | @Override 57 | public double distance(SemanticFeature x, SemanticFeature y) { 58 | double dist = 1; 59 | // double penalty = 0.2; //0.3; // penalty for not being the same uri 60 | 61 | // // For now there is no distance if the uri is the same entity or same concept 62 | // if (x.getUri().equalsIgnoreCase(y.getUri())) { 63 | // return 0; 64 | // } 65 | 66 | Concept cx = taxonomy.findConcept(x.getConcept()); 67 | Concept cy = taxonomy.findConcept(y.getConcept()); 68 | Concept lca = cx.findCommonAncestor(cy); 69 | 70 | // No common ancestor exists - return max distance 71 | if (lca != null) { 72 | int n3 = lca.getDepth(); 73 | int n1 = cx.getDepth() - n3; 74 | int n2 = cy.getDepth() - n3; 75 | dist = 1.0 - 2.0 * n3 / (n1 + n2 + 2.0 * n3); 76 | } 77 | return dist; 78 | // return (dist + penalty) / (1 + penalty); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/GeoSpatialFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial; 26 | 27 | import com.oculusinfo.ml.feature.Feature; 28 | 29 | /*** 30 | * A GeoSpatialFeature represents a latitude and longitude geo-spatial coordinate 31 | * 32 | * @author slangevin 33 | * 34 | */ 35 | public class GeoSpatialFeature extends Feature { 36 | private static final long serialVersionUID = 7917681828048658982L; 37 | private double latitude; 38 | private double longitude; 39 | 40 | @Override 41 | public String toString() { 42 | return (this.getName() + ":[" + latitude + ";" + longitude + "]"); 43 | } 44 | 45 | public GeoSpatialFeature() { 46 | super(); 47 | } 48 | 49 | public GeoSpatialFeature(String name) { 50 | super(name); 51 | } 52 | 53 | public void setValue(double latitude, double longitude) { 54 | this.latitude = latitude; 55 | this.longitude = longitude; 56 | } 57 | 58 | public double getLatitude() { 59 | return latitude; 60 | } 61 | 62 | public void setLatitude(double latitude) { 63 | this.latitude = latitude; 64 | } 65 | 66 | public double getLongitude() { 67 | return longitude; 68 | } 69 | 70 | public void setLongitude(double longitude) { 71 | this.longitude = longitude; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/TrackFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial; 26 | 27 | import com.oculusinfo.geometry.geodesic.Track; 28 | import com.oculusinfo.ml.feature.Feature; 29 | 30 | /*** 31 | * A TrackFeature represents a sequence of geo-spatial latitude and longitude coordinates 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class TrackFeature extends Feature { 37 | private static final long serialVersionUID = -7923265691042687996L; 38 | 39 | 40 | 41 | private Track track; 42 | 43 | @Override 44 | public String toString() { 45 | return (this.getName() + " : " + track.toString()); 46 | } 47 | 48 | public TrackFeature() { 49 | super(); 50 | } 51 | 52 | public TrackFeature(String name) { 53 | super(name); 54 | } 55 | 56 | public void setValue(Track track) { 57 | this.track = track; 58 | } 59 | 60 | public Track getValue() { 61 | return track; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/centroid/FastGeoSpatialCentroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.centroid; 26 | 27 | import java.util.Collection; 28 | import java.util.Collections; 29 | 30 | import com.oculusinfo.ml.centroid.Centroid; 31 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 32 | 33 | /*** 34 | * A Centroid for GeoSpatialFeatures that represents the centroid using a naive average of latitude and longitude 35 | * 36 | * @author slangevin 37 | * 38 | */ 39 | public class FastGeoSpatialCentroid implements Centroid { 40 | private static final long serialVersionUID = 538283509674357135L; 41 | 42 | private String name; 43 | private double weight = 0.0; 44 | private double clat = 0, clon; 45 | 46 | @Override 47 | public void add(GeoSpatialFeature feature) { 48 | double addedWeight = feature.getWeight(); 49 | double newWeight = weight + addedWeight; 50 | 51 | // incrementally revise the centroid coordinates 52 | clat = (clat * weight + feature.getLatitude() * addedWeight) / newWeight; 53 | clon = (clon * weight + feature.getLongitude() * addedWeight) / newWeight; 54 | weight = newWeight; 55 | } 56 | 57 | @Override 58 | public void remove(GeoSpatialFeature feature) { 59 | double removedWeight = feature.getWeight(); 60 | double newWeight = weight - removedWeight; 61 | 62 | if (weight <= 0.0) { 63 | System.out.println("Attempt to remove from empty GeoSpatialCentroid"); 64 | } else { 65 | clat = (clat * weight - feature.getLatitude() * removedWeight) / newWeight; 66 | clon = (clon * weight - feature.getLongitude() * removedWeight) / newWeight; 67 | weight = newWeight; 68 | } 69 | } 70 | 71 | @Override 72 | public Collection getAggregatableCentroid () { 73 | return Collections.singleton(getCentroid()); 74 | } 75 | 76 | @Override 77 | public GeoSpatialFeature getCentroid() { 78 | // create the centroid geospatial feature set 79 | GeoSpatialFeature centroid = new GeoSpatialFeature(name); 80 | centroid.setValue( (clat), (clon) ); // return average lat, lon - very crude method of determining centroid for geo 81 | centroid.setWeight(weight); 82 | return centroid; 83 | } 84 | 85 | @Override 86 | public void setName(String name) { 87 | this.name = name; 88 | } 89 | 90 | @Override 91 | public String getName() { 92 | return this.name; 93 | } 94 | 95 | @Override 96 | public Class getType() { 97 | return GeoSpatialFeature.class; 98 | } 99 | 100 | @Override 101 | public void reset() { 102 | weight = 0; 103 | clat = 0; 104 | clon = 0; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/distance/EquitangularDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 29 | 30 | /*** 31 | * A distance function that computes the equitangular distance between two GeoSpatialFeatures 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class EquitangularDistance extends DistanceFunction { 37 | private static final long serialVersionUID = -1226497733338508060L; 38 | 39 | public EquitangularDistance(double weight) { 40 | super(weight); 41 | } 42 | 43 | @Override 44 | public double distance(GeoSpatialFeature x, GeoSpatialFeature y) { 45 | double lat1 = Math.toRadians(x.getLatitude()); 46 | double lat2 = Math.toRadians(y.getLatitude()); 47 | double lon1 = Math.toRadians(x.getLongitude()); 48 | double lon2 = Math.toRadians(y.getLongitude()); 49 | 50 | double a = (lon2-lon1) * Math.cos((lat1+lat2)/2); 51 | double b = (lat2-lat1); 52 | double d = Math.sqrt(a*a + b*b); 53 | 54 | return d; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/distance/EuclideanDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 29 | 30 | /*** 31 | * A distance function that computes the Euclidean distance between two GeoSpatialFeatures ignoring curvature 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class EuclideanDistance extends DistanceFunction { 37 | private static final long serialVersionUID = -123522038033912391L; 38 | private final static double EARTH_RADIUS = 6371.00; // Radius in Kilometers default 39 | private final static double normConst = 1 / Math.sqrt(Math.pow(360, 2) + Math.pow(180, 2)); // 1 / 12742.0; 40 | 41 | public EuclideanDistance(double weight) { 42 | super(weight); 43 | } 44 | 45 | @Override 46 | public double distance(GeoSpatialFeature x, GeoSpatialFeature y) { 47 | double lat1 = x.getLatitude(); 48 | double lat2 = y.getLatitude(); 49 | double lon1 = x.getLongitude(); 50 | double lon2 = y.getLongitude(); 51 | 52 | // return normalized euclidean distance [0,1] 53 | return Math.sqrt(Math.pow(lat2 - lat1, 2) + Math.pow(lon2 - lon1, 2)) * normConst; 54 | } 55 | 56 | public double distanceInCartesianPlane(GeoSpatialFeature x, GeoSpatialFeature y) { 57 | double lat1 = Math.toRadians(x.getLatitude()); 58 | double lat2 = Math.toRadians(y.getLatitude()); 59 | double lon1 = Math.toRadians(x.getLongitude()); 60 | double lon2 = Math.toRadians(y.getLongitude()); 61 | double x1 = EARTH_RADIUS * Math.cos(lat1) * Math.cos(lon1); 62 | double x2 = EARTH_RADIUS * Math.cos(lat2) * Math.cos(lon2); 63 | double y1 = EARTH_RADIUS * Math.cos(lat1) * Math.sin(lon2); 64 | double y2 = EARTH_RADIUS * Math.cos(lat1) * Math.sin(lon2); 65 | double z1 = EARTH_RADIUS * Math.sin(lat1); 66 | double z2 = EARTH_RADIUS * Math.sin(lat2); 67 | 68 | // return normalized euclidean distance [0,1] 69 | return Math.sqrt(Math.pow(x2 - x1, 2) + Math.pow(y2 - y1, 2) + Math.pow(z2 - z1, 2)) * normConst; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/distance/HaversineDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 29 | 30 | /*** 31 | * A distance function that computes the normalized Haversine (Great Circle) distance between two GeoSpatialFeatures 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class HaversineDistance extends DistanceFunction { 37 | private static final long serialVersionUID = -779446291214228343L; 38 | private final static double EARTH_RADIUS = 6371.00; // Radius in Kilometers default 39 | 40 | public HaversineDistance(double weight) { 41 | super(weight); 42 | } 43 | 44 | @Override 45 | public double distance(GeoSpatialFeature x, GeoSpatialFeature y) { 46 | double lat1 = x.getLatitude(); 47 | double lat2 = y.getLatitude(); 48 | double lon1 = x.getLongitude(); 49 | double lon2 = y.getLongitude(); 50 | double dLat = Math.toRadians(lat2 - lat1); 51 | double dLng = Math.toRadians(lon2 - lon1); 52 | 53 | double a = Math.sin(dLat / 2) * Math.sin(dLat / 2) + 54 | Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * 55 | Math.sin(dLng / 2) * Math.sin(dLng / 2); 56 | 57 | double normDist = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)) / Math.PI; 58 | return normDist; 59 | } 60 | 61 | public double distanceInKM(GeoSpatialFeature x, GeoSpatialFeature y) { 62 | double lat1 = x.getLatitude(); 63 | double lat2 = y.getLatitude(); 64 | double lon1 = x.getLongitude(); 65 | double lon2 = y.getLongitude(); 66 | double dLat = Math.toRadians(lat2-lat1); 67 | double dLng = Math.toRadians(lon2-lon1); 68 | double a = Math.sin(dLat/2) * Math.sin(dLat/2) + 69 | Math.cos(Math.toRadians(lat1)) * Math.cos(Math.toRadians(lat2)) * 70 | Math.sin(dLng/2) * Math.sin(dLng/2); 71 | double c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1-a)); 72 | double dist = EARTH_RADIUS * c; 73 | return dist; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/distance/SphericalCosineDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 29 | 30 | /*** 31 | * A distance function that computes the Spherical Cosine distance between two GeoSpatialFeatures 32 | * @author slangevin 33 | * 34 | */ 35 | public class SphericalCosineDistance extends DistanceFunction { 36 | private static final long serialVersionUID = -4202417997475962513L; 37 | private final static double EARTH_RADIUS = 6371.00; // Radius in Kilometers default 38 | 39 | public SphericalCosineDistance(double weight) { 40 | super(weight); 41 | } 42 | 43 | @Override 44 | public double distance(GeoSpatialFeature x, GeoSpatialFeature y) { 45 | double lat1 = Math.toRadians(x.getLatitude()); 46 | double lat2 = Math.toRadians(y.getLatitude()); 47 | double lon1 = Math.toRadians(x.getLongitude()); 48 | double lon2 = Math.toRadians(y.getLongitude()); 49 | 50 | double d = Math.acos(Math.sin(lat1)*Math.sin(lat2) + 51 | Math.cos(lat1)*Math.cos(lat2) * 52 | Math.cos(lon2-lon1)); 53 | double normDist = d / Math.PI; 54 | return normDist; 55 | } 56 | 57 | public double distanceInKM(GeoSpatialFeature x, GeoSpatialFeature y) { 58 | return distance(x, y) * EARTH_RADIUS; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/spatial/distance/TrackDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.spatial.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.spatial.TrackFeature; 29 | 30 | /*** 31 | * A distance function that computes the distance between two TrackFeatures 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class TrackDistance extends DistanceFunction{ 37 | private static final long serialVersionUID = -6853731905216910630L; 38 | 39 | 40 | 41 | public TrackDistance(double weight) { 42 | super(weight); 43 | } 44 | 45 | @Override 46 | public double distance(TrackFeature x, TrackFeature y) { 47 | return x.getValue().getDistance(y.getValue()); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/string/StringFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.string; 26 | 27 | import com.oculusinfo.ml.feature.Feature; 28 | 29 | /*** 30 | * A StringFeature represents any single string value 31 | * 32 | * Useful for representing labels, names or categorical values 33 | * 34 | * @author slangevin 35 | * 36 | */ 37 | public class StringFeature extends Feature { 38 | private static final long serialVersionUID = -2290284204885879224L; 39 | private String value; 40 | 41 | public StringFeature() { 42 | super(); 43 | } 44 | 45 | public StringFeature(String name) { 46 | super(name); 47 | } 48 | 49 | public void setValue(String value) { 50 | this.value = value; 51 | } 52 | 53 | public String getValue() { 54 | return value; 55 | } 56 | 57 | @Override 58 | public String toString() { 59 | return this.getName() + ":" + value; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/string/distance/ExactTokenMatchDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.string.distance; 26 | 27 | import com.oculusinfo.ml.distance.DistanceFunction; 28 | import com.oculusinfo.ml.feature.string.StringFeature; 29 | 30 | /*** 31 | * A distance function that compares two StringFeatures and returns 1 when they are equal 0 otherwise 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public class ExactTokenMatchDistance extends DistanceFunction { 37 | private static final long serialVersionUID = -3651531184290382230L; 38 | 39 | public ExactTokenMatchDistance() { 40 | super(1); 41 | } 42 | 43 | public ExactTokenMatchDistance(double weight) { 44 | super(weight); 45 | } 46 | 47 | @Override 48 | public double distance(StringFeature x, StringFeature y) { 49 | return (x.getValue().equalsIgnoreCase(y.getValue())) ? 0 : 1; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/temporal/TemporalFeature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.temporal; 26 | 27 | import java.util.Date; 28 | 29 | import com.oculusinfo.ml.feature.Feature; 30 | 31 | /*** 32 | * A TemporalFeature represents a time period with as start and end date 33 | * 34 | * Useful for representing events 35 | * 36 | * @author slangevin 37 | * 38 | */ 39 | public class TemporalFeature extends Feature { 40 | private static final long serialVersionUID = 679871263379162267L; 41 | private Date start; 42 | private Date end; 43 | 44 | @Override 45 | public String toString() { 46 | return (this.getName() + ":[" + start + "; " + end + "]"); 47 | } 48 | 49 | public TemporalFeature() { 50 | super(); 51 | } 52 | 53 | public TemporalFeature(String name) { 54 | super(name); 55 | } 56 | 57 | public void setValue(Date start, Date end) { 58 | this.start = start; 59 | this.end = end; 60 | } 61 | 62 | public Date getStart() { 63 | return start; 64 | } 65 | 66 | public void setStart(Date start) { 67 | this.start = start; 68 | } 69 | 70 | public Date getEnd() { 71 | return end; 72 | } 73 | 74 | public void setEnd(Date end) { 75 | this.end = end; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/temporal/centroid/TemporalMinMaxCentroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.temporal.centroid; 26 | 27 | import java.util.Collection; 28 | import java.util.Collections; 29 | import java.util.Date; 30 | 31 | import com.oculusinfo.ml.centroid.Centroid; 32 | import com.oculusinfo.ml.feature.temporal.TemporalFeature; 33 | 34 | /*** 35 | * A Centroid for TemporalFeatures that represents the centroid by the min start and max end date 36 | * 37 | * @author slangevin 38 | * 39 | */ 40 | public class TemporalMinMaxCentroid implements Centroid { 41 | private static final long serialVersionUID = 845737125746792593L; 42 | private String name; 43 | private long cstart = 0, cend = 0; 44 | 45 | @Override 46 | public void add(TemporalFeature feature) { 47 | long start = 0, end = 0; 48 | 49 | start = feature.getStart().getTime(); 50 | end = feature.getEnd().getTime(); 51 | 52 | // revise the centroid start and end 53 | cstart = (cstart == 0 ? start : Math.min(start, cstart)); 54 | cend = (cend == 0 ? end : Math.max(end, cend)); 55 | } 56 | 57 | @Override 58 | public void remove(TemporalFeature feature) { 59 | // TODO Should give the second smallest start and second largest end to revise 60 | } 61 | 62 | @Override 63 | public Collection getAggregatableCentroid () { 64 | return Collections.singleton(getCentroid()); 65 | } 66 | 67 | @Override 68 | public TemporalFeature getCentroid() { 69 | // create the centroid temporal feature set 70 | TemporalFeature centroid = new TemporalFeature(name); 71 | centroid.setValue(new Date(cstart), new Date(cend)); 72 | return centroid; 73 | } 74 | 75 | @Override 76 | public void setName(String name) { 77 | this.name = name; 78 | } 79 | 80 | @Override 81 | public String getName() { 82 | return this.name; 83 | } 84 | 85 | @Override 86 | public Class getType() { 87 | return TemporalFeature.class; 88 | } 89 | 90 | @Override 91 | public void reset() { 92 | cstart = 0; 93 | cend = 0; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/feature/temporal/distance/TemporalDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.feature.temporal.distance; 26 | 27 | import java.util.Date; 28 | 29 | import com.oculusinfo.ml.distance.DistanceFunction; 30 | import com.oculusinfo.ml.feature.temporal.TemporalFeature; 31 | 32 | /*** 33 | * A distance function that computes the distance between two TemporalFeatures 34 | * 35 | * The distance calculation first computes the overlapping interval between the two TemporalFeatures 36 | * and computes the distance as: 37 | * 38 | * 1 - 2 * overlap duration in ms / ( duration of feature1 in ms + duration of featur2 in ms) 39 | * 40 | * @author slangevin 41 | * 42 | */ 43 | public class TemporalDistance extends DistanceFunction { 44 | private static final long serialVersionUID = 1910227375920657644L; 45 | private final static double MS_PER_DAY = 86400000; 46 | 47 | public TemporalDistance(double weight) { 48 | super(weight); 49 | } 50 | 51 | @Override 52 | public double distance(TemporalFeature x, TemporalFeature y) { 53 | Date start1 = x.getStart(); 54 | Date start2 = y.getStart(); 55 | Date end1 = x.getEnd(); 56 | Date end2 = y.getEnd(); 57 | 58 | Date s = null, e = null; 59 | 60 | // TODO sanity check inputs to make sure: start1 <= end1 and start2 <= end2 61 | 62 | // check if the date intervals overlap - return 1 if they don't 63 | if (start2.after(end1) || start1.after(end2)) return 1; 64 | 65 | // these are identical time regions 66 | if (start1.equals(start2) && end1.equals(end2)) return 0; 67 | 68 | // calculate the overlapping interval [s,e] 69 | s = (start2.after(start1) ? start2 : start1); 70 | e = (end2.before(end1) ? end2 : end1); 71 | 72 | // normalize distance 73 | double normDist = 1 - (2 * durationInMS(s, e) / (durationInMS(start1, end1) + durationInMS(start2, end2))); 74 | return normDist; 75 | } 76 | 77 | private double durationInMS(Date start, Date end) { 78 | double duration = end.getTime() - start.getTime(); 79 | // handle edge case so single points are not zero 80 | return (duration == 0 ? MS_PER_DAY : duration); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/search/ObjectiveFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.search; 26 | 27 | public interface ObjectiveFunction { 28 | public double score(Solution solution); 29 | } 30 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/search/SearchException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.search; 26 | 27 | public class SearchException extends Exception { 28 | private static final long serialVersionUID = 1350271155608046478L; 29 | 30 | public SearchException(String msg) { 31 | super(msg); 32 | } 33 | 34 | public SearchException(String msg, Throwable e) { 35 | super(msg, e); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/search/Solution.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.search; 26 | 27 | public interface Solution { 28 | 29 | public Solution neighbor(double temp); 30 | } 31 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/stats/FeatureFreqComparator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.stats; 26 | 27 | import java.util.Comparator; 28 | 29 | public class FeatureFreqComparator implements Comparator { 30 | public int compare(final FeatureFrequency f1, final FeatureFrequency f2) { 31 | return f2.frequency - f1.frequency; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/stats/FeatureFrequency.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.stats; 26 | 27 | import java.io.Serializable; 28 | 29 | import com.oculusinfo.ml.feature.Feature; 30 | 31 | public class FeatureFrequency implements Serializable { 32 | private static final long serialVersionUID = -3008472634828604901L; 33 | public int frequency = 0; 34 | public Feature feature; 35 | 36 | public FeatureFrequency() { } 37 | 38 | public FeatureFrequency(Feature feature) { 39 | this.feature = feature; 40 | frequency = 1; 41 | } 42 | 43 | public Feature getFeature() { 44 | return feature; 45 | } 46 | 47 | public void setFeature(Feature feature) { 48 | this.feature = feature; 49 | } 50 | 51 | public int getFrequency() { 52 | return frequency; 53 | } 54 | 55 | public void setFrequency(int frequency) { 56 | this.frequency = frequency; 57 | } 58 | 59 | @Override 60 | public String toString() { 61 | return feature + " (" + frequency + ")"; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/ClusterFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.io.Serializable; 28 | import java.util.Map; 29 | import java.util.UUID; 30 | 31 | 32 | public class ClusterFactory implements Serializable { 33 | private static final long serialVersionUID = -5094018550295526175L; 34 | 35 | private boolean onlineUpdate; 36 | private Map featureTypeDefs; 37 | 38 | public ClusterFactory(Map featureTypeDefs, boolean onlineUpdate) { 39 | this.onlineUpdate = onlineUpdate; 40 | this.featureTypeDefs = featureTypeDefs; 41 | } 42 | 43 | public Cluster create() { 44 | return (new Cluster(UUID.randomUUID().toString(), featureTypeDefs.values(), onlineUpdate)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/ClusterJsonMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.io.IOException; 28 | import java.io.StringWriter; 29 | 30 | import org.codehaus.jackson.JsonFactory; 31 | import org.codehaus.jackson.JsonGenerationException; 32 | import org.codehaus.jackson.JsonGenerator; 33 | import org.codehaus.jackson.JsonParseException; 34 | import org.codehaus.jackson.map.JsonMappingException; 35 | import org.codehaus.jackson.map.ObjectMapper; 36 | 37 | /*** 38 | * Serializer for Cluster object to/from JSON 39 | * 40 | * @author slangevin 41 | * 42 | */ 43 | public class ClusterJsonMapper { 44 | 45 | private static ObjectMapper mapper = new ObjectMapper(); 46 | private static JsonFactory factory = new JsonFactory(); 47 | 48 | public static Cluster fromJson(String jsonAsString) 49 | throws JsonMappingException, JsonParseException, IOException { 50 | 51 | mapper.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL); 52 | Cluster cluster = mapper.readValue(jsonAsString, Cluster.class); 53 | 54 | return cluster; 55 | } 56 | 57 | public static String toJson(Cluster cluster, boolean prettyPrint) 58 | throws JsonMappingException, JsonGenerationException, IOException { 59 | 60 | StringWriter writer = new StringWriter(); 61 | JsonGenerator generator = factory.createJsonGenerator(writer); 62 | 63 | if (prettyPrint) { 64 | generator.useDefaultPrettyPrinter(); 65 | } 66 | 67 | mapper.enableDefaultTyping(ObjectMapper.DefaultTyping.NON_FINAL); 68 | mapper.writeValue(generator, cluster); 69 | return writer.toString(); 70 | } 71 | } -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/ClusterResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.io.Serializable; 28 | 29 | 30 | /*** 31 | * This interface defines the return type for doCluster() operation of a Clusterer 32 | * 33 | * @author slangevin 34 | * 35 | */ 36 | public interface ClusterResult extends Iterable, Serializable { 37 | 38 | public boolean isEmpty(); 39 | 40 | public int size(); 41 | } 42 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/Clusterer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.util.List; 28 | 29 | import com.oculusinfo.ml.DataSet; 30 | import com.oculusinfo.ml.Instance; 31 | 32 | public interface Clusterer { 33 | 34 | /*** 35 | * Method to initialize the clusterer - useful for clusterers that need to allocated resources such as thread pools 36 | * 37 | */ 38 | public void init(); 39 | 40 | /*** 41 | * Method to terminate the clusterer - useful for clusterers that need to cleanup resources such as thread pools 42 | * 43 | */ 44 | public void terminate(); 45 | 46 | /*** 47 | * Cluster the instances in ds using the clusters provided 48 | * 49 | * @param ds is the DataSet containing the new instances to cluster 50 | * @param clusters is the list of clusters to use during clustering 51 | * @return the list of clusters that were modified 52 | */ 53 | public ClusterResult doIncrementalCluster(DataSet ds, List clusters); 54 | 55 | /*** 56 | * Generate clusters given the provided DataSet ds 57 | * 58 | * @param ds is the DataSet containing the instances to cluster 59 | * @return the list of clusters created 60 | */ 61 | public ClusterResult doCluster(DataSet ds); 62 | 63 | /*** 64 | * Calculate the distance of the two instances using the distance measures 65 | * associated with this clusterer. 66 | * 67 | * @param inst1 68 | * @param inst2 69 | * @return a double value representing the distance between inst1 and inst2 70 | */ 71 | public double distance(Instance inst1, Instance inst2); 72 | } 73 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/FeatureTypeDefinition.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.io.Serializable; 28 | 29 | import com.oculusinfo.ml.centroid.Centroid; 30 | import com.oculusinfo.ml.distance.DistanceFunction; 31 | 32 | public class FeatureTypeDefinition implements Serializable { 33 | private static final long serialVersionUID = -8378567604749382148L; 34 | public String featureName; 35 | @SuppressWarnings("rawtypes") 36 | public DistanceFunction distFunc; 37 | @SuppressWarnings("rawtypes") 38 | public Class centroidClass; 39 | 40 | @SuppressWarnings("rawtypes") 41 | public FeatureTypeDefinition(String featureName, 42 | Class centroidClass, 43 | DistanceFunction distFunc) { 44 | this.distFunc = distFunc; 45 | this.featureName = featureName; 46 | this.centroidClass = centroidClass; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/InMemoryClusterResult.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster; 26 | 27 | import java.util.Iterator; 28 | import java.util.LinkedList; 29 | import java.util.List; 30 | 31 | /*** 32 | * This class represents a cluster result that stores all Clusters in memory. 33 | * 34 | * @author slangevin 35 | * 36 | */ 37 | public class InMemoryClusterResult implements ClusterResult { 38 | private static final long serialVersionUID = -485649795889971226L; 39 | 40 | List clusters = new LinkedList(); 41 | 42 | public InMemoryClusterResult(List clusters) { 43 | this.clusters.addAll(clusters); 44 | } 45 | 46 | @Override 47 | public Iterator iterator() { 48 | return clusters.iterator(); 49 | } 50 | 51 | @Override 52 | public boolean isEmpty() { 53 | return clusters.isEmpty(); 54 | } 55 | 56 | @Override 57 | public int size() { 58 | return clusters.size(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/unsupervised/cluster/threshold/ThresholdClusterer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised.cluster.threshold; 26 | 27 | import com.oculusinfo.ml.Instance; 28 | import com.oculusinfo.ml.unsupervised.cluster.AbstractClusterer; 29 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 30 | 31 | /*** 32 | * This class implements a single pass threshold clustering algorithm. 33 | * 34 | * The algorithm is defined as: 35 | * set clusters = [] 36 | * for each instance 37 | * best cluster = none 38 | * best score = infinity 39 | * for each cluster in clusters 40 | * if distance(instance, cluster) < threshold and < score 41 | * best cluster = cluster 42 | * best score = distance(instance, cluster) 43 | * 44 | * if best cluster is not none 45 | * add instance to best cluster 46 | * else 47 | * create new cluster with instance as a member 48 | * add cluster to clusters 49 | * 50 | * This clusterer is useful when multiple passes over the data are undesirable or the number of clusters is unknown. 51 | * Tuning is required to choose an appropriate threshold that controls when new clusters can be created. 52 | * 53 | * Note that the order that clustering is sensitive to the order data is processed. 54 | * 55 | * To speed up the clustering even faster, the firstCandidate option can be set to true which will halt the 56 | * search for the best cluster after the first candidate is found. 57 | * 58 | * @author slangevin 59 | * 60 | */ 61 | public class ThresholdClusterer extends AbstractClusterer { 62 | protected double threshold = 0.5; // default threshold 63 | 64 | public ThresholdClusterer() { 65 | super(false, true, true); 66 | } 67 | 68 | public ThresholdClusterer(boolean firstCandidate, boolean penalizeMissingFeatures) { 69 | super(firstCandidate, true, penalizeMissingFeatures); 70 | } 71 | 72 | public void setThreshold(double threshold) { 73 | this.threshold = threshold; 74 | } 75 | 76 | public double getThreshold() { 77 | return threshold; 78 | } 79 | 80 | @Override 81 | protected boolean isCandidate(Instance inst, Cluster candidate, 82 | double score, Cluster best, double bestScore) { 83 | 84 | return (score < threshold && score < bestScore); // lower score less than threshold is better 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/validation/unsupervised/external/BCubed.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.validation.unsupervised.external; 26 | 27 | import java.util.Collection; 28 | 29 | import com.oculusinfo.ml.Instance; 30 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 31 | 32 | /*** 33 | * External clustering validation implementation of BCubed 34 | * 35 | * @author slangevin 36 | * 37 | */ 38 | public class BCubed { 39 | private double precision = 0, recall = 0, f = 0; 40 | 41 | /*** 42 | * BCubed recall is the proportion of instances with class label are in this cluster. 43 | * @param label is the class label we are evaluating 44 | * @param cluster that we are evaluating 45 | * @param clusters is a collection of all the clusters 46 | * @return the proportion of instances with class label that are members of cluster 47 | */ 48 | public double recall(String label, Cluster cluster, Collection clusters) { 49 | double count = 0, total = 0, recall = 0; 50 | 51 | // first determine how many instances have class label in this cluster 52 | for (Instance i : cluster.getMembers()) { 53 | if (i.hasClassLabel(label)) count++; 54 | } 55 | 56 | // find out how many instances in total have this class label 57 | for (Cluster c : clusters) { 58 | for (Instance i : c.getMembers()) { 59 | if (i.hasClassLabel(label)) total++; 60 | } 61 | } 62 | 63 | if (total > 0) recall = count / total; 64 | 65 | return recall; 66 | } 67 | 68 | /*** 69 | * BCubed precision is the proportion of instances in a cluster that share the same label. 70 | * 71 | * @param label is the class label we are evaluating 72 | * @param cluster that we are evaluating 73 | * @return the proportion of instances in cluster that have the specified label 74 | */ 75 | public double precision(String label, Cluster cluster) { 76 | double count = 0, precision = 0; 77 | 78 | for (Instance i : cluster.getMembers()) { 79 | if (i.hasClassLabel(label)) count++; 80 | } 81 | 82 | if (count > 0) precision = count / cluster.getMembers().size(); 83 | 84 | return precision; 85 | } 86 | 87 | public double validate(Collection clusters) { 88 | int num = 0; 89 | double p = 0, r = 0; 90 | 91 | for (Cluster c : clusters) { 92 | for (Instance inst : c.getMembers()) { 93 | p += precision(inst.getClassLabel(), c) ; 94 | r += recall(inst.getClassLabel(), c, clusters); 95 | num++; 96 | } 97 | } 98 | 99 | // calculate average precision and recall 100 | precision = p / num; 101 | recall = r / num; 102 | 103 | // calculate and return the f score 104 | f = 2 * precision * recall / (precision + recall); 105 | 106 | return f; 107 | } 108 | 109 | public double getPrecision() { 110 | return precision; 111 | } 112 | 113 | public double getRecall() { 114 | return recall; 115 | } 116 | 117 | public double getBCubed() { 118 | return f; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/validation/unsupervised/internal/Cohesion.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.validation.unsupervised.internal; 26 | 27 | import com.oculusinfo.ml.Instance; 28 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 29 | import com.oculusinfo.ml.unsupervised.cluster.ClusterResult; 30 | import com.oculusinfo.ml.unsupervised.cluster.Clusterer; 31 | 32 | /*** 33 | * An internal clustering validation implementation of cluster cohesion 34 | * 35 | * @author slangevin 36 | * 37 | */ 38 | public class Cohesion { 39 | 40 | public static double cohesion(Clusterer clusterer, Cluster cluster) { 41 | double norm = cluster.size(); 42 | double cohesion = 0; 43 | for (Instance inst : cluster.getMembers()) { 44 | if (inst instanceof Cluster) { 45 | cohesion += cohesion(clusterer, (Cluster)inst); 46 | } 47 | else { 48 | cohesion += clusterer.distance(cluster, inst); 49 | } 50 | } 51 | return cohesion / norm; 52 | } 53 | 54 | public static double validate(Clusterer clusterer, ClusterResult clusters) { 55 | double cohesion = 0; 56 | double norm = clusters.size(); 57 | for (Cluster c : clusters) { 58 | cohesion += cohesion(clusterer, c); 59 | } 60 | return cohesion / norm; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /ensemble-clustering/src/main/java/com/oculusinfo/ml/validation/unsupervised/internal/Separation.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.validation.unsupervised.internal; 26 | 27 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 28 | import com.oculusinfo.ml.unsupervised.cluster.ClusterResult; 29 | import com.oculusinfo.ml.unsupervised.cluster.Clusterer; 30 | 31 | /*** 32 | * An internal clustering validation implementation of cluster separation 33 | * @author slangevin 34 | * 35 | */ 36 | public class Separation { 37 | 38 | public static double separation(Clusterer clusterer, Cluster c1, Cluster c2) { 39 | return clusterer.distance(c1, c2); 40 | } 41 | 42 | public static double validate(Clusterer clusterer, ClusterResult clusters) { 43 | double separation = 0; 44 | 45 | for (Cluster c1 : clusters) { 46 | double sum = 0; 47 | for (Cluster c2 : clusters) { 48 | sum += separation(clusterer, c1, c2); 49 | } 50 | separation += sum / clusters.size(); 51 | } 52 | return (separation / clusters.size()); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/TestGeoSpatialCentroid.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | import static org.junit.Assert.assertTrue; 26 | 27 | import org.junit.Test; 28 | 29 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 30 | import com.oculusinfo.ml.feature.spatial.centroid.GeoSpatialCentroid; 31 | 32 | 33 | public class TestGeoSpatialCentroid { 34 | 35 | double epsilon = 0.00001; 36 | 37 | private boolean isEqual(double d1, double d2) { 38 | return (Math.abs( d1 - d2 ) < epsilon ); 39 | } 40 | 41 | @Test 42 | public void testAddOne() { 43 | GeoSpatialFeature f = new GeoSpatialFeature("f1"); 44 | f.setValue(10, 50); 45 | GeoSpatialCentroid centroid = new GeoSpatialCentroid(); 46 | centroid.add(f); 47 | f = centroid.getCentroid(); 48 | System.out.println(f); 49 | assertTrue(isEqual(f.getLatitude(), 10)); 50 | assertTrue(isEqual(f.getLongitude(), 50)); 51 | } 52 | 53 | @Test 54 | public void testAddOne2() { 55 | GeoSpatialFeature f = new GeoSpatialFeature("f1"); 56 | f.setValue(37.68455,-97.34110); 57 | GeoSpatialCentroid centroid = new GeoSpatialCentroid(); 58 | centroid.add(f); 59 | f = centroid.getCentroid(); 60 | System.out.println(f); 61 | assertTrue(isEqual(f.getLatitude(), 37.68455)); 62 | assertTrue(isEqual(f.getLongitude(), -97.34110)); 63 | } 64 | 65 | @Test 66 | public void testAddOne3() { 67 | GeoSpatialFeature f = new GeoSpatialFeature("f1"); 68 | f.setValue(-89,80); 69 | GeoSpatialCentroid centroid = new GeoSpatialCentroid(); 70 | centroid.add(f); 71 | f = centroid.getCentroid(); 72 | System.out.println(f); 73 | assertTrue(isEqual(f.getLatitude(), 89)); 74 | assertTrue(isEqual(f.getLongitude(), -100)); 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/geometry/geodesic/TrackSimplificationTests.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.geometry.geodesic; 26 | 27 | import org.junit.Assert; 28 | import org.junit.Test; 29 | 30 | import com.oculusinfo.geometry.geodesic.tracks.GeodeticTrack; 31 | 32 | public class TrackSimplificationTests { 33 | // Test to make sure intermediate colinear points are removed. 34 | @Test 35 | public void testInlineRemoval () { 36 | Position p0 = new Position(4, 28); 37 | Position p1 = new Position(-17, 42); 38 | double azimuth = p0.getAzimuth(p1); 39 | double distance = p0.getAngularDistance(p1); 40 | Position pa = p0.offset(azimuth, 0.25*distance); 41 | Position pb = p0.offset(azimuth, 0.50*distance); 42 | Position pc = p0.offset(azimuth, 0.75*distance); 43 | 44 | PositionCalculationParameters params = new PositionCalculationParameters(PositionCalculationType.Geodetic, 1E-12, 1E-12, false); 45 | Track track = new GeodeticTrack(params , p0, pa, pb, pc, p1); 46 | Assert.assertEquals(2, track.getPoints().size()); 47 | } 48 | 49 | @Test 50 | public void testNearInlineRemoval () { 51 | Position p0 = new Position(4, 28); 52 | Position p1 = new Position(-17, 42); 53 | double azimuth = p0.getAzimuth(p1); 54 | double distance = p0.getAngularDistance(p1); 55 | Position pa = p0.offset(azimuth, 0.25*distance).offset(azimuth+90, distance*0.01); 56 | Position pb = p0.offset(azimuth, 0.50*distance).offset(azimuth+90, distance*0.01); 57 | Position pc = p0.offset(azimuth, 0.75*distance).offset(azimuth+90, distance*0.01); 58 | 59 | PositionCalculationParameters params = new PositionCalculationParameters(PositionCalculationType.Geodetic, 0.01, 1E-12, false); 60 | Track track = new GeodeticTrack(params , p0, pa, pb, pc, p1); 61 | Assert.assertEquals(2, track.getPoints().size()); 62 | 63 | params = new PositionCalculationParameters(PositionCalculationType.Geodetic, 0.001, 1E-12, false); 64 | track = new GeodeticTrack(params , p0, pa, pb, pc, p1); 65 | Assert.assertEquals(4, track.getPoints().size()); 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/math/algebra/TestAngleUtilities.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.algebra; 26 | 27 | import junit.framework.Assert; 28 | 29 | import org.junit.Test; 30 | 31 | import com.oculusinfo.math.algebra.AngleUtilities; 32 | 33 | public class TestAngleUtilities { 34 | private static final double EPSILON = 1E-12; 35 | 36 | @Test 37 | public void testAngleRanges () { 38 | Assert.assertEquals( 1.0, AngleUtilities.intoRangeDegrees(0.0, 361.0), EPSILON); 39 | Assert.assertEquals(-180.0, AngleUtilities.intoRangeDegrees(0.0, 180.0), EPSILON); 40 | Assert.assertEquals(-180.0, AngleUtilities.intoRangeDegrees(0.0, -180.0), EPSILON); 41 | Assert.assertEquals( -1.0, AngleUtilities.intoRangeDegrees(0.0, 359.0), EPSILON); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/math/linearalgebra/ListUtilitiesTests.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.linearalgebra; 26 | 27 | import java.util.Arrays; 28 | import java.util.List; 29 | 30 | import junit.framework.Assert; 31 | 32 | import org.junit.Test; 33 | 34 | public class ListUtilitiesTests { 35 | private static final double EPSILON = 1E-12; 36 | 37 | @Test 38 | public void testJoiningWithSingleton () { 39 | List base = Arrays.asList(0.0, 1.0, 2.0, 3.0, 4.0); 40 | 41 | Assert.assertEquals(Arrays.asList(-1.0, 0.0, 1.0, 2.0, 3.0, 4.0), 42 | ListUtilities.joinLists(base, Arrays.asList(-1.0), EPSILON)); 43 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(0.0), EPSILON)); 44 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(1.0), EPSILON)); 45 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(2.0), EPSILON)); 46 | Assert.assertEquals(Arrays.asList(0.0, 1.0, 2.0, 2.5, 3.0, 4.0), 47 | ListUtilities.joinLists(base, Arrays.asList(2.5), EPSILON)); 48 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(3.0), EPSILON)); 49 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(4.0), EPSILON)); 50 | Assert.assertEquals(Arrays.asList(0.0, 1.0, 2.0, 3.0, 4.0, 5.0), 51 | ListUtilities.joinLists(base, Arrays.asList(5.0), EPSILON)); 52 | } 53 | 54 | @Test 55 | public void testJoiningWithZerosList () { 56 | List base = Arrays.asList(0.0, 1.0, 2.0, 3.0, 4.0); 57 | Assert.assertEquals(base, ListUtilities.joinLists(base, Arrays.asList(0.0, 0.0, 0.0, 0.0, 0.0), EPSILON)); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/math/linearalgebra/VectorTests.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.linearalgebra; 26 | 27 | import junit.framework.Assert; 28 | 29 | import org.junit.Test; 30 | 31 | import com.oculusinfo.math.linearalgebra.Vector; 32 | 33 | public class VectorTests { 34 | @Test 35 | public void testVectorEquality () { 36 | Assert.assertFalse(new Vector(Double.NaN).equals(new Vector(0))); 37 | Assert.assertFalse(new Vector(0).equals(new Vector(Double.NaN))); 38 | Assert.assertTrue(new Vector(0).equals(new Vector(0))); 39 | Assert.assertTrue(new Vector(Double.NaN).equals(new Vector(Double.NaN))); 40 | } 41 | 42 | @Test 43 | public void testCrossProduct () { 44 | Vector X = new Vector(1, 0, 0); 45 | Vector Y = new Vector(0, 1, 0); 46 | Vector Z = new Vector(0, 0, 1); 47 | Assert.assertEquals(Z, X.cross(Y)); 48 | Assert.assertEquals(Y, Z.cross(X)); 49 | Assert.assertEquals(X, Y.cross(Z)); 50 | Assert.assertEquals(Z.scale(-1.0), Y.cross(X)); 51 | Assert.assertEquals(Y.scale(-1.0), X.cross(Z)); 52 | Assert.assertEquals(X.scale(-1.0), Z.cross(Y)); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/math/statistics/TestStats.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.math.statistics; 26 | 27 | import junit.framework.Assert; 28 | 29 | import org.junit.Test; 30 | 31 | import com.oculusinfo.math.statistics.StatTracker; 32 | 33 | public class TestStats { 34 | private static final double EPSILON = 1E-12; 35 | 36 | @Test 37 | public void testMean () { 38 | StatTracker s = new StatTracker(); 39 | s.addStat(0.0); 40 | s.addStat(1.0); 41 | s.addStat(2.0); 42 | Assert.assertEquals(1.0, s.mean(), EPSILON); 43 | s.addStat(3.0); 44 | Assert.assertEquals(1.5, s.mean(), EPSILON); 45 | } 46 | 47 | @Test 48 | public void testVariance () { 49 | StatTracker s = new StatTracker(); 50 | s.addStat(0.0); 51 | s.addStat(1.0); 52 | s.addStat(2.0); 53 | Assert.assertEquals(2.0/3.0, s.variance(), EPSILON); 54 | 55 | s.addStat(3.0); 56 | Assert.assertEquals(5.0/4.0, s.variance(), EPSILON); 57 | } 58 | 59 | @Test 60 | public void testReset () { 61 | StatTracker s = new StatTracker(); 62 | s.addStat(0.0); 63 | s.addStat(1.0); 64 | s.addStat(2.0); 65 | s.addStat(3.0); 66 | Assert.assertEquals(1.5, s.mean(), EPSILON); 67 | 68 | s.reset(); 69 | s.addStat(0.0); 70 | s.addStat(1.0); 71 | Assert.assertEquals(0.5, s.mean(), EPSILON); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/TestNormalization.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml; 26 | 27 | import org.junit.Test; 28 | 29 | import com.oculusinfo.ml.feature.numeric.NumericVectorFeature; 30 | 31 | public class TestNormalization { 32 | 33 | @Test 34 | public void test() { 35 | DataSet ds = new DataSet(); 36 | 37 | for (int i=0; i < 5; i++) { 38 | Instance inst = new Instance(); 39 | NumericVectorFeature v = new NumericVectorFeature("v"); 40 | v.setValue(new double[] {i, i*10, i*100}); 41 | inst.addFeature(v); 42 | ds.add(inst); 43 | } 44 | 45 | ds.normalizeInstanceFeature("v"); 46 | 47 | for (Instance inst : ds) { 48 | NumericVectorFeature v = (NumericVectorFeature)inst.getFeature("v"); 49 | double[] vals = v.getValue(); 50 | for (int i=0; i < vals.length; i++) { 51 | System.out.print(vals[0] + ", "); 52 | } 53 | System.out.println(); 54 | } 55 | 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/TestStringTools.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml; 26 | 27 | import org.junit.Assert; 28 | import org.junit.Test; 29 | 30 | import com.oculusinfo.ml.utils.StringTools; 31 | 32 | public class TestStringTools { 33 | 34 | @Test 35 | public void test() { 36 | String str2 = "godel"; 37 | String str3 = " godel "; 38 | String str4 = "~godel~"; 39 | String str5 = "\tgodel"; 40 | String str6 = "%##@godel@#!@"; 41 | String str7 = "apple banana"; 42 | String str8 = "#$%apple banana"; 43 | String str9 = "apple $%#@banana"; 44 | String str10 = "apple\tbanana"; 45 | String str11 = "banana apple"; 46 | 47 | Assert.assertEquals(StringTools.fingerPrint(str2), "godel"); 48 | Assert.assertEquals(StringTools.fingerPrint(str3), "godel"); 49 | Assert.assertEquals(StringTools.fingerPrint(str4), "godel"); 50 | Assert.assertEquals(StringTools.fingerPrint(str5), "godel"); 51 | Assert.assertEquals(StringTools.fingerPrint(str6), "godel"); 52 | Assert.assertEquals(StringTools.fingerPrint(str7), "apple banana"); 53 | Assert.assertEquals(StringTools.fingerPrint(str8), "apple banana"); 54 | Assert.assertEquals(StringTools.fingerPrint(str9), "apple banana"); 55 | Assert.assertEquals(StringTools.fingerPrint(str10), "apple banana"); 56 | Assert.assertEquals(StringTools.fingerPrint(str11), "apple banana"); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/distance/TestExactStringMatchDistance.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.distance; 26 | 27 | import java.util.Collections; 28 | 29 | import junit.framework.Assert; 30 | 31 | import org.junit.Test; 32 | 33 | import com.oculusinfo.ml.feature.string.StringFeature; 34 | import com.oculusinfo.ml.feature.string.distance.ExactTokenMatchDistance; 35 | 36 | public class TestExactStringMatchDistance { 37 | double epsilon = 0.00001; 38 | 39 | private boolean isEqual(double d1, double d2) { 40 | return (Math.abs( d1 - d2 ) < epsilon ); 41 | } 42 | 43 | @Test 44 | public void testIdentical() { 45 | StringFeature t1 = new StringFeature(); 46 | t1.setValue("dog"); 47 | 48 | StringFeature t2 = new StringFeature(); 49 | t2.setValue("dog"); 50 | 51 | ExactTokenMatchDistance d = new ExactTokenMatchDistance(); 52 | double distance = d.aveMinDistance(Collections.singletonList(t1), Collections.singletonList(t2)); 53 | System.out.println(distance); 54 | Assert.assertTrue(isEqual(distance, 0)); 55 | 56 | distance = d.distance(t1, t2); 57 | System.out.println(distance); 58 | Assert.assertTrue(isEqual(distance, 0)); 59 | } 60 | 61 | 62 | @Test 63 | public void testSymmetric() { 64 | StringFeature t1 = new StringFeature(); 65 | t1.setValue("dog"); 66 | 67 | StringFeature t2 = new StringFeature(); 68 | t2.setValue("dog"); 69 | 70 | ExactTokenMatchDistance d = new ExactTokenMatchDistance(); 71 | double d1 = d.aveMinDistance(Collections.singletonList(t1), Collections.singletonList(t2)); 72 | double d2 = d.aveMinDistance(Collections.singletonList(t2), Collections.singletonList(t1)); 73 | 74 | Assert.assertTrue(isEqual(d1, d2)); 75 | 76 | d1 = d.distance(t1, t2); 77 | d2 = d.distance(t2, t1); 78 | 79 | Assert.assertTrue(isEqual(d1, d2)); 80 | } 81 | 82 | @Test 83 | public void testDisjoint() { 84 | StringFeature t1 = new StringFeature(); 85 | t1.setValue("dog"); 86 | 87 | StringFeature t2 = new StringFeature(); 88 | t2.setValue("cat"); 89 | 90 | ExactTokenMatchDistance d = new ExactTokenMatchDistance(); 91 | double distance = d.aveMinDistance(Collections.singletonList(t1), Collections.singletonList(t2)); 92 | System.out.println(distance); 93 | Assert.assertTrue(isEqual(distance, 1.0)); 94 | 95 | distance = d.distance(t1, t2); 96 | System.out.println(distance); 97 | Assert.assertTrue(isEqual(distance, 1.0)); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/search/TestAnnealer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.search; 26 | 27 | import java.util.Random; 28 | 29 | import com.oculusinfo.ml.search.stochastic.SimulatedAnnealing; 30 | 31 | public class TestAnnealer { 32 | 33 | public static class TestSolution implements Solution { 34 | public double vertex[] = new double[3]; 35 | 36 | public TestSolution(double vals[]) { 37 | vertex = vals.clone(); 38 | } 39 | public TestSolution(double a, double b, double c) { 40 | vertex[0] = a; 41 | vertex[1] = b; 42 | vertex[2] = c; 43 | } 44 | 45 | @Override 46 | public Solution neighbor(double temp) { 47 | Random rnd = new Random(); 48 | double tmp[] = vertex.clone(); 49 | 50 | tmp[0] = tmp[0] + Math.pow(-1, rnd.nextInt(2)) * rnd.nextDouble() * temp; 51 | tmp[1] = tmp[1] + Math.pow(-1, rnd.nextInt(2)) * rnd.nextDouble() * temp; 52 | 53 | return new TestSolution(tmp); 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return "a: " + vertex[0] + ", b: " + + vertex[1] + ", c: " + + vertex[2]; 59 | } 60 | 61 | 62 | } 63 | 64 | /** 65 | * @param args 66 | */ 67 | public static void main(String[] args) { 68 | SimulatedAnnealing annealer = new SimulatedAnnealing(new ObjectiveFunction() { 69 | 70 | // static final double A=2; 71 | // static final double B=3; 72 | 73 | @Override 74 | public double score(Solution solution) { 75 | double error; 76 | TestSolution s = (TestSolution)solution; 77 | error = Math.pow(s.vertex[0], 2) + Math.pow(s.vertex[1], 2); 78 | // error=Math.pow(A-Math.sin(s.vertex[0])*Math.exp(s.vertex[1])*s.vertex[2], 2); 79 | // error+=Math.pow(B-Math.exp(s.vertex[0])*Math.sin(s.vertex[1]), 2); 80 | System.out.println(error); 81 | return error; 82 | } 83 | 84 | }); 85 | 86 | try { 87 | Random rnd = new Random(); 88 | annealer.setInitialSolution(new TestSolution(rnd.nextDouble(), rnd.nextDouble(), rnd.nextDouble())); 89 | annealer.setInitialTemperature(20); 90 | annealer.setCoolingRate(0.001); 91 | annealer.setMaxIterations(10000); 92 | 93 | Solution best = annealer.search(true); 94 | System.out.println(best); 95 | 96 | // print out "distance" to solution to build a bit of confidence in the result 97 | System.out.println("Distance " + annealer.getObjectiveFunction().score(best)); 98 | 99 | } catch (SearchException e) { 100 | // TODO Auto-generated catch block 101 | e.printStackTrace(); 102 | } 103 | 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/tracks/TestFrame.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.tracks; 26 | 27 | import java.awt.Dimension; 28 | import java.awt.Point; 29 | import java.awt.event.ComponentAdapter; 30 | import java.awt.event.ComponentEvent; 31 | import java.awt.event.WindowAdapter; 32 | import java.awt.event.WindowEvent; 33 | import java.util.prefs.Preferences; 34 | 35 | import javax.swing.JFrame; 36 | 37 | 38 | 39 | /** 40 | * A visible frame, in which to show visual graphical tests, in a way that's 41 | * easy to run from junit tests. 42 | * 43 | * @author nkronenfeld 44 | */ 45 | public class TestFrame extends JFrame { 46 | private static final long serialVersionUID = 1L; 47 | private Object _showLock; 48 | public TestFrame () { 49 | _showLock = new Object(); 50 | initializeGeometry(); 51 | 52 | addWindowListener(new WindowAdapter() { 53 | @Override 54 | public void windowClosing (WindowEvent e) { 55 | synchronized (_showLock) { 56 | _showLock.notify(); 57 | } 58 | } 59 | }); 60 | addComponentListener(new ComponentAdapter() { 61 | @Override 62 | public void componentResized (ComponentEvent e) { 63 | saveGeometry(); 64 | } 65 | @Override 66 | public void componentMoved (ComponentEvent e) { 67 | saveGeometry(); 68 | } 69 | }); 70 | } 71 | 72 | private void initializeGeometry () { 73 | Preferences p = Preferences.userRoot(); 74 | Preferences oculus = p.node("com.oculusinfo"); 75 | Preferences test = oculus.node("testing"); 76 | 77 | int x = test.getInt("test.frame.x", 100); 78 | int y = test.getInt("test.frame.y", 100); 79 | int width = test.getInt("test.frame.width", 500); 80 | int height = test.getInt("test.frame.height", 500); 81 | 82 | setLocation(x, y); 83 | setSize(width, height); 84 | } 85 | 86 | private void saveGeometry () { 87 | Preferences p = Preferences.userRoot(); 88 | Preferences oculus = p.node("com.oculusinfo"); 89 | Preferences test = oculus.node("testing"); 90 | 91 | Dimension size = getSize(); 92 | Point location = getLocation(); 93 | test.putInt("test.frame.x", location.x); 94 | test.putInt("test.frame.y", location.y); 95 | test.putInt("test.frame.width", size.width); 96 | test.putInt("test.frame.height", size.height); 97 | } 98 | 99 | /** 100 | * Show the frame, and wait until it is closed. 101 | */ 102 | public void showAndWait () { 103 | setVisible(true); 104 | synchronized (_showLock) { 105 | try { 106 | _showLock.wait(); 107 | } catch (InterruptedException e) { 108 | e.printStackTrace(); 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/unsupervised/TestGeoClusteringWithDPMeans.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised; 26 | 27 | import com.oculusinfo.ml.DataSet; 28 | import com.oculusinfo.ml.Instance; 29 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 30 | import com.oculusinfo.ml.feature.spatial.centroid.GeoSpatialCentroid; 31 | import com.oculusinfo.ml.feature.spatial.distance.HaversineDistance; 32 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 33 | import com.oculusinfo.ml.unsupervised.cluster.ClusterResult; 34 | import com.oculusinfo.ml.unsupervised.cluster.dpmeans.DPMeans; 35 | 36 | public class TestGeoClusteringWithDPMeans { 37 | 38 | /** 39 | * @param args 40 | */ 41 | public static void main(String[] args) { 42 | DataSet ds = new DataSet(); 43 | 44 | Instance inst = new Instance("1"); 45 | GeoSpatialFeature feature = new GeoSpatialFeature("geo"); 46 | feature.setValue(-5.0, 120.0); 47 | inst.addFeature(feature); 48 | ds.add(inst); 49 | 50 | inst = new Instance("2"); 51 | feature = new GeoSpatialFeature("geo"); 52 | feature.setValue(-5.0, 120.0); 53 | inst.addFeature(feature); 54 | ds.add(inst); 55 | 56 | inst = new Instance("3"); 57 | feature = new GeoSpatialFeature("geo"); 58 | feature.setValue(-5.0, 120.0); 59 | inst.addFeature(feature); 60 | ds.add(inst); 61 | 62 | inst = new Instance("4"); 63 | feature = new GeoSpatialFeature("geo"); 64 | feature.setValue(-5.0, 120.0); 65 | inst.addFeature(feature); 66 | ds.add(inst); 67 | 68 | inst = new Instance("5"); 69 | feature = new GeoSpatialFeature("geo"); 70 | feature.setValue(-5.0, 120.0); 71 | inst.addFeature(feature); 72 | ds.add(inst); 73 | 74 | inst = new Instance("6"); 75 | feature = new GeoSpatialFeature("geo"); 76 | feature.setValue(-5.0, 120.0); 77 | inst.addFeature(feature); 78 | ds.add(inst); 79 | 80 | inst = new Instance("7"); 81 | feature = new GeoSpatialFeature("geo"); 82 | feature.setValue(-5.0, 120.0); 83 | inst.addFeature(feature); 84 | ds.add(inst); 85 | 86 | inst = new Instance("8"); 87 | feature = new GeoSpatialFeature("geo"); 88 | feature.setValue(-5.0, 120.0); 89 | inst.addFeature(feature); 90 | ds.add(inst); 91 | 92 | inst = new Instance("9"); 93 | feature = new GeoSpatialFeature("geo"); 94 | feature.setValue(-5.0, 120.0); 95 | inst.addFeature(feature); 96 | ds.add(inst); 97 | 98 | inst = new Instance("10"); 99 | feature = new GeoSpatialFeature("geo"); 100 | feature.setValue(-5.0, 120.0); 101 | inst.addFeature(feature); 102 | ds.add(inst); 103 | 104 | DPMeans clusterer = new DPMeans(3, true); 105 | clusterer.setThreshold(0.2); 106 | clusterer.registerFeatureType("geo", GeoSpatialCentroid.class, new HaversineDistance(1.0)); 107 | 108 | ClusterResult clusters = clusterer.doCluster(ds); 109 | for (Cluster c : clusters) { 110 | System.out.println(c.toString(true)); 111 | } 112 | clusterer.terminate(); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /ensemble-clustering/src/test/java/com/oculusinfo/ml/unsupervised/TestNameLocationClustering.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013 Oculus Info Inc. 3 | * http://www.oculusinfo.com/ 4 | * 5 | * Released under the MIT License. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | * this software and associated documentation files (the "Software"), to deal in 9 | * the Software without restriction, including without limitation the rights to 10 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 11 | * of the Software, and to permit persons to whom the Software is furnished to do 12 | * so, subject to the following conditions: 13 | 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | package com.oculusinfo.ml.unsupervised; 26 | 27 | import java.util.Random; 28 | 29 | import com.oculusinfo.ml.DataSet; 30 | import com.oculusinfo.ml.Instance; 31 | import com.oculusinfo.ml.feature.spatial.GeoSpatialFeature; 32 | import com.oculusinfo.ml.feature.spatial.centroid.GeoSpatialCentroid; 33 | import com.oculusinfo.ml.feature.spatial.distance.HaversineDistance; 34 | import com.oculusinfo.ml.feature.string.StringFeature; 35 | import com.oculusinfo.ml.feature.string.centroid.StringMedianCentroid; 36 | import com.oculusinfo.ml.feature.string.distance.EditDistance; 37 | 38 | import com.oculusinfo.ml.unsupervised.cluster.Cluster; 39 | import com.oculusinfo.ml.unsupervised.cluster.ClusterResult; 40 | import com.oculusinfo.ml.unsupervised.cluster.kmeans.KMeans; 41 | 42 | public class TestNameLocationClustering { 43 | 44 | public static void main(String[] args) { 45 | DataSet ds = new DataSet(); 46 | 47 | String[] tokens = {"alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "romeo", "sierra", "tango", "whiskey"}; 48 | 49 | Random rnd = new Random(); 50 | for (int i=0; i < 100000; i++) { 51 | // create a new data instance 52 | Instance inst = new Instance(); 53 | 54 | // add name feature to the instance 55 | StringFeature name = new StringFeature("name"); 56 | name.setValue( tokens[rnd.nextInt(tokens.length)] + " " + tokens[rnd.nextInt(tokens.length)]); 57 | inst.addFeature(name); 58 | 59 | // add geo spatial feature to the instance 60 | GeoSpatialFeature geo = new GeoSpatialFeature("location"); 61 | geo.setLatitude(rnd.nextDouble() * 180 - 90); 62 | geo.setLongitude(rnd.nextDouble() * 360 - 180); 63 | inst.addFeature(geo); 64 | 65 | // add the instance to the dataset 66 | ds.add(inst); 67 | } 68 | 69 | // create a k-means clusterer with k=4, 5 max iterations 70 | KMeans clusterer = new KMeans(4, 5, false); 71 | 72 | // register the name features distance function and centroid method using a weight of 1.0 73 | clusterer.registerFeatureType( 74 | "name", 75 | StringMedianCentroid.class, 76 | new EditDistance(1.0)); 77 | 78 | // register the location features distance function and centroid method using a weight of 1.0 79 | clusterer.registerFeatureType( 80 | "location", 81 | GeoSpatialCentroid.class, 82 | new HaversineDistance(1.0)); 83 | 84 | ClusterResult clusters = clusterer.doCluster(ds); 85 | for (Cluster c : clusters) { 86 | System.out.println(c.toString(false)); 87 | } 88 | clusterer.terminate(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.oculusinfo 5 | 0.1.0-SNAPSHOT 6 | ml 7 | pom 8 | 9 | Ensemble Clustering Library 10 | Parent project for Ensemble Clustering Library 11 | 12 | 13 | 14 | codelutin 15 | Code Lutin 16 | http://lutinbuilder.labs.libre-entreprise.org/maven2/ 17 | 18 | 19 | 20 | codehaus-snapshots 21 | http://snapshots.repository.codehaus.org 22 | 23 | 24 | 25 | 26 | 27 | 28 | org.slf4j 29 | slf4j-log4j12 30 | 1.6.4 31 | 32 | 33 | 34 | org.codehaus.jackson 35 | jackson-mapper-asl 36 | 1.9.2 37 | 38 | 39 | 40 | junit 41 | junit 42 | 4.8.2 43 | jar 44 | test 45 | 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.0 54 | 55 | 1.6 56 | 1.6 57 | 58 | 59 | 60 | 61 | 62 | 63 | ensemble-clustering 64 | ensemble-clustering-spark 65 | 66 | --------------------------------------------------------------------------------