├── src ├── main │ └── java │ │ └── eu │ │ └── socialsensor │ │ ├── benchmarks │ │ ├── InsertsGraphData.java │ │ ├── RequiresGraphData.java │ │ ├── Benchmark.java │ │ ├── DeleteGraphBenchmark.java │ │ ├── FindNodesOfAllEdgesBenchmark.java │ │ ├── FindNeighboursOfAllNodesBenchmark.java │ │ ├── FindShortestPathBenchmark.java │ │ ├── MassiveInsertionBenchmark.java │ │ ├── BenchmarkBase.java │ │ ├── SingleInsertionBenchmark.java │ │ ├── PermutingBenchmarkBase.java │ │ └── ClusteringBenchmark.java │ │ ├── insert │ │ ├── Insertion.java │ │ ├── SparkseeSingleInsertion.java │ │ ├── SparkseeMassiveInsertion.java │ │ ├── Neo4jMassiveInsertion.java │ │ ├── TitanMassiveInsertion.java │ │ ├── TitanSingleInsertion.java │ │ ├── OrientMassiveInsertion.java │ │ ├── OrientSingleInsertion.java │ │ ├── OrientAbstractInsertion.java │ │ ├── Neo4jSingleInsertion.java │ │ └── InsertionBase.java │ │ ├── main │ │ ├── BenchmarkingException.java │ │ ├── BenchmarkType.java │ │ ├── GraphDatabaseType.java │ │ ├── GraphDatabaseBenchmark.java │ │ └── BenchmarkConfiguration.java │ │ ├── dataset │ │ ├── DatasetFactory.java │ │ └── Dataset.java │ │ ├── utils │ │ ├── PermuteMethod.java │ │ ├── Metrics.java │ │ └── Utils.java │ │ ├── clustering │ │ ├── LouvainMethod.java │ │ └── Cache.java │ │ └── graphdatabases │ │ ├── GraphDatabase.java │ │ ├── GraphDatabaseBase.java │ │ ├── OrientGraphDatabase.java │ │ └── SparkseeGraphDatabase.java └── test │ ├── resources │ └── META-INF │ │ ├── log4j2.xml │ │ ├── log4j.properties │ │ └── input.properties │ └── java │ └── eu │ └── socialsensor │ └── main │ └── GraphDatabaseBenchmarkTest.java ├── .gitignore ├── NOTICE ├── README.md ├── LICENSE └── pom.xml /src/main/java/eu/socialsensor/benchmarks/InsertsGraphData.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | public interface InsertsGraphData extends Benchmark 4 | { 5 | 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/RequiresGraphData.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | public interface RequiresGraphData extends Benchmark 4 | { 5 | 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/* 2 | *.class 3 | /target 4 | /data 5 | /.project 6 | /.gitignore 7 | /.settings 8 | /.classpath 9 | .idea/ 10 | graphdb-benchmarks.iml 11 | metrics/ 12 | results/ 13 | storage/ 14 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/Benchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | /** 4 | * Represents the benchmarks 5 | * 6 | * @author sotbeis 7 | * @email sotbeis@iti.gr 8 | */ 9 | public interface Benchmark 10 | { 11 | 12 | /** 13 | * Start the selected benchmark 14 | */ 15 | public void startBenchmark(); 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/test/resources/META-INF/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This product includes software developed by Information Technologies Institute 2 | (CERTH, 57001, Thermi, Greece), and the following individuals: 3 | * Sotiris Beis 4 | * Alexander Patrikalakis 5 | 6 | It also includes software from other open source projects including, 7 | but not limited to (check pom.xml for complete listing): 8 | cassandra.yaml file came from: 9 | https://github.com/thinkaurelius/titan/blob/titan05/titan-cassandra/config/cassandra/cassandra.yaml -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/Insertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * Represents the insertion of data in each graph database 7 | * 8 | * @author sotbeis, sotbeis@iti.gr 9 | */ 10 | public interface Insertion 11 | { 12 | 13 | /** 14 | * Loads the data in each graph database 15 | * 16 | * @param datasetDir 17 | */ 18 | public void createGraph(File dataset, int scenarioNumber); 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/test/resources/META-INF/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=info, stdout 2 | #log4j.logger.com.amazon.titan=trace 3 | #log4j.logger.com.thinkaurelius=debug 4 | log4j.logger.com.amazonaws=off 5 | log4j.appender.stdout=org.apache.logging.log4j.core.appender.ConsoleAppender 6 | log4j.appender.stdout.layout=org.apache.logging.log4j.core.layout.PatternLayout 7 | 8 | # Pattern to output the caller's file name and line number. 9 | log4j.appender.stdout.layout.ConversionPattern=%d (%t) [%5p] (%F:%L) - %m%n 10 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/main/BenchmarkingException.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | public class BenchmarkingException extends RuntimeException 4 | { 5 | 6 | /** 7 | * 8 | */ 9 | private static final long serialVersionUID = -4165548376731455231L; 10 | 11 | public BenchmarkingException(String message) 12 | { 13 | super(message); 14 | } 15 | 16 | public BenchmarkingException(String message, Throwable cause) 17 | { 18 | super(message, cause); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | import static org.junit.Assert.fail; 4 | import org.junit.Test; 5 | 6 | public class GraphDatabaseBenchmarkTest 7 | { 8 | @Test 9 | public void testGraphDatabaseBenchmark() 10 | { 11 | GraphDatabaseBenchmark bench = new GraphDatabaseBenchmark(null /* inputPath */); 12 | try 13 | { 14 | bench.run(); 15 | } 16 | catch (Exception e) 17 | { 18 | e.printStackTrace(); 19 | fail("Got unexpected exception: " + e.getMessage()); 20 | } 21 | 22 | //bench.cleanup(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/dataset/DatasetFactory.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.dataset; 2 | 3 | import java.io.File; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | /** 8 | * 9 | * @author Alexander Patrikalakis 10 | * 11 | */ 12 | public class DatasetFactory 13 | { 14 | private static DatasetFactory theInstance = null; 15 | private final Map datasetMap; 16 | 17 | private DatasetFactory() 18 | { 19 | datasetMap = new HashMap(); 20 | } 21 | 22 | public static DatasetFactory getInstance() 23 | { 24 | if (theInstance == null) 25 | { 26 | theInstance = new DatasetFactory(); 27 | } 28 | return theInstance; 29 | } 30 | 31 | public Dataset getDataset(File datasetFile) 32 | { 33 | if (!datasetMap.containsKey(datasetFile)) 34 | { 35 | datasetMap.put(datasetFile, new Dataset(datasetFile)); 36 | } 37 | 38 | return datasetMap.get(datasetFile); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import com.google.common.base.Stopwatch; 6 | 7 | import eu.socialsensor.main.BenchmarkConfiguration; 8 | import eu.socialsensor.main.BenchmarkType; 9 | import eu.socialsensor.main.GraphDatabaseType; 10 | import eu.socialsensor.utils.Utils; 11 | 12 | /** 13 | * Benchmark that measures the time requried to delete a graph 14 | * @author Alexander Patrikalakis 15 | * 16 | */ 17 | public class DeleteGraphBenchmark extends PermutingBenchmarkBase implements RequiresGraphData 18 | { 19 | public DeleteGraphBenchmark(BenchmarkConfiguration bench) 20 | { 21 | super(bench, BenchmarkType.DELETION); 22 | } 23 | 24 | @Override 25 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 26 | { 27 | Stopwatch watch = new Stopwatch(); 28 | watch.start(); 29 | Utils.deleteDatabase(type, bench); 30 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import com.google.common.base.Stopwatch; 6 | 7 | import eu.socialsensor.graphdatabases.GraphDatabase; 8 | import eu.socialsensor.main.BenchmarkConfiguration; 9 | import eu.socialsensor.main.BenchmarkType; 10 | import eu.socialsensor.main.GraphDatabaseType; 11 | import eu.socialsensor.utils.Utils; 12 | 13 | /** 14 | * FindNodesOfAllEdgesBenchmark implementation 15 | * 16 | * @author sotbeis, sotbeis@iti.gr 17 | * @author Alexander Patrikalakis 18 | */ 19 | public class FindNodesOfAllEdgesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData 20 | { 21 | public FindNodesOfAllEdgesBenchmark(BenchmarkConfiguration config) 22 | { 23 | super(config, BenchmarkType.FIND_ADJACENT_NODES); 24 | } 25 | 26 | @Override 27 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 28 | { 29 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 30 | graphDatabase.open(); 31 | Stopwatch watch = new Stopwatch(); 32 | watch.start(); 33 | graphDatabase.findNodesOfAllEdges(); 34 | graphDatabase.shutdown(); 35 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | 5 | import com.sparsity.sparksee.gdb.Graph; 6 | import com.sparsity.sparksee.gdb.Session; 7 | import com.sparsity.sparksee.gdb.Value; 8 | 9 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; 10 | import eu.socialsensor.main.GraphDatabaseType; 11 | 12 | public class SparkseeSingleInsertion extends InsertionBase 13 | { 14 | private final Session session; 15 | private final Graph sparkseeGraph; 16 | 17 | Value value = new Value(); 18 | 19 | public SparkseeSingleInsertion(Session session, File resultsPath) 20 | { 21 | // no temp files for massive load insert 22 | super(GraphDatabaseType.SPARKSEE, resultsPath); 23 | this.session = session; 24 | this.sparkseeGraph = session.getGraph(); 25 | } 26 | 27 | @Override 28 | public Long getOrCreate(String value) 29 | { 30 | Value sparkseeValue = new Value(); 31 | return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value)); 32 | } 33 | 34 | @Override 35 | public void relateNodes(Long src, Long dest) 36 | { 37 | session.begin(); 38 | sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest); 39 | session.commit(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import com.google.common.base.Stopwatch; 6 | 7 | import eu.socialsensor.graphdatabases.GraphDatabase; 8 | import eu.socialsensor.main.BenchmarkConfiguration; 9 | import eu.socialsensor.main.BenchmarkType; 10 | import eu.socialsensor.main.GraphDatabaseType; 11 | import eu.socialsensor.utils.Utils; 12 | 13 | /** 14 | * FindNeighboursOfAllNodesBenchmark implementation 15 | * 16 | * @author sotbeis, sotbeis@iti.gr 17 | * @author Alexander Patrikalakis 18 | */ 19 | public class FindNeighboursOfAllNodesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData 20 | { 21 | public FindNeighboursOfAllNodesBenchmark(BenchmarkConfiguration config) 22 | { 23 | super(config, BenchmarkType.FIND_NEIGHBOURS); 24 | } 25 | 26 | @Override 27 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 28 | { 29 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 30 | graphDatabase.open(); 31 | Stopwatch watch = new Stopwatch(); 32 | watch.start(); 33 | graphDatabase.findAllNodeNeighbours(); 34 | graphDatabase.shutdown(); 35 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/main/BenchmarkType.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | 6 | /** 7 | * 8 | * @author Alexander Patrikalakis 9 | * 10 | */ 11 | public enum BenchmarkType 12 | { 13 | MASSIVE_INSERTION("Massive Insertion", "MassiveInsertion"), SINGLE_INSERTION("Single Insertion", "SingleInsertion"), DELETION( 14 | "Delete Graph", "DeleteGraph"), FIND_NEIGHBOURS("Find Neighbours of All Nodes", "FindNeighbours"), FIND_ADJACENT_NODES( 15 | "Find Adjacent Nodes of All Edges", "FindAdjacent"), FIND_SHORTEST_PATH("Find Shortest Path", "FindShortest"), CLUSTERING( 16 | "Clustering", "Clustering"); 17 | 18 | public static final Set INSERTING_BENCHMARK_SET = new HashSet(); 19 | static 20 | { 21 | INSERTING_BENCHMARK_SET.add(MASSIVE_INSERTION); 22 | INSERTING_BENCHMARK_SET.add(SINGLE_INSERTION); 23 | } 24 | 25 | private final String longname; 26 | private final String filenamePrefix; 27 | 28 | private BenchmarkType(String longName, String filenamePrefix) 29 | { 30 | this.longname = longName; 31 | this.filenamePrefix = filenamePrefix; 32 | } 33 | 34 | public String longname() 35 | { 36 | return longname; 37 | } 38 | 39 | public String getResultsFileName() 40 | { 41 | return filenamePrefix + ".csv"; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import com.sparsity.sparksee.gdb.Graph; 4 | import com.sparsity.sparksee.gdb.Session; 5 | import com.sparsity.sparksee.gdb.Value; 6 | 7 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; 8 | import eu.socialsensor.main.GraphDatabaseType; 9 | 10 | public class SparkseeMassiveInsertion extends InsertionBase implements Insertion 11 | { 12 | private final Session session; 13 | private final Graph sparkseeGraph; 14 | private int operations; 15 | 16 | public SparkseeMassiveInsertion(Session session) 17 | { 18 | super(GraphDatabaseType.SPARKSEE, null /* resultsPath */); 19 | this.session = session; 20 | this.sparkseeGraph = session.getGraph(); 21 | this.operations = 0; 22 | } 23 | 24 | @Override 25 | public Long getOrCreate(String value) 26 | { 27 | Value sparkseeValue = new Value(); 28 | return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value)); 29 | } 30 | 31 | @Override 32 | public void relateNodes(Long src, Long dest) 33 | { 34 | sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest); 35 | operations++; 36 | if (operations == 10000) 37 | { 38 | session.commit(); 39 | session.begin(); 40 | operations = 0; 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/Neo4jMassiveInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.neo4j.helpers.collection.MapUtil; 6 | import org.neo4j.unsafe.batchinsert.BatchInserter; 7 | 8 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; 9 | import eu.socialsensor.main.GraphDatabaseType; 10 | 11 | /** 12 | * Implementation of massive Insertion in Neo4j graph database 13 | * 14 | * @author sotbeis, sotbeis@iti.gr 15 | * @author Alexander Patrikalakis 16 | * 17 | */ 18 | public final class Neo4jMassiveInsertion extends InsertionBase 19 | { 20 | private final BatchInserter inserter; 21 | Map cache = new HashMap(); 22 | 23 | public Neo4jMassiveInsertion(BatchInserter inserter) 24 | { 25 | super(GraphDatabaseType.NEO4J, null /* resultsPath */); 26 | this.inserter = inserter; 27 | } 28 | 29 | @Override 30 | protected Long getOrCreate(String value) 31 | { 32 | Long id = cache.get(Long.valueOf(value)); 33 | if (id == null) 34 | { 35 | Map properties = MapUtil.map("nodeId", value); 36 | id = inserter.createNode(properties, Neo4jGraphDatabase.NODE_LABEL); 37 | cache.put(Long.valueOf(value), id); 38 | } 39 | return id; 40 | } 41 | 42 | @Override 43 | protected void relateNodes(Long src, Long dest) 44 | { 45 | inserter.createRelationship(src, dest, Neo4jGraphDatabase.RelTypes.SIMILAR, null); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import com.thinkaurelius.titan.core.TitanGraph; 4 | import com.thinkaurelius.titan.core.util.TitanId; 5 | import com.tinkerpop.blueprints.Vertex; 6 | import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph; 7 | 8 | import eu.socialsensor.main.GraphDatabaseType; 9 | 10 | /** 11 | * Implementation of massive Insertion in Titan graph database 12 | * 13 | * @author sotbeis, sotbeis@iti.gr 14 | * @author Alexander Patrikalakis 15 | * 16 | */ 17 | public class TitanMassiveInsertion extends InsertionBase 18 | { 19 | private final BatchGraph batchGraph; 20 | 21 | public TitanMassiveInsertion(BatchGraph batchGraph, GraphDatabaseType type) 22 | { 23 | super(type, null /* resultsPath */); // no temp files for massive load 24 | // insert 25 | this.batchGraph = batchGraph; 26 | } 27 | 28 | @Override 29 | public Vertex getOrCreate(String value) 30 | { 31 | Integer intVal = Integer.valueOf(value); 32 | final long titanVertexId = TitanId.toVertexId(intVal); 33 | Vertex vertex = batchGraph.getVertex(titanVertexId); 34 | if (vertex == null) 35 | { 36 | vertex = batchGraph.addVertex(titanVertexId); 37 | vertex.setProperty("nodeId", intVal); 38 | } 39 | return vertex; 40 | } 41 | 42 | @Override 43 | public void relateNodes(Vertex src, Vertex dest) 44 | { 45 | src.addEdge("similar", dest); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import eu.socialsensor.dataset.DatasetFactory; 4 | import eu.socialsensor.graphdatabases.GraphDatabase; 5 | import eu.socialsensor.main.BenchmarkConfiguration; 6 | import eu.socialsensor.main.BenchmarkType; 7 | import eu.socialsensor.main.GraphDatabaseType; 8 | import eu.socialsensor.utils.Utils; 9 | 10 | import java.util.Set; 11 | import java.util.concurrent.TimeUnit; 12 | 13 | import com.google.common.base.Stopwatch; 14 | 15 | /** 16 | * FindShortestPathBenchmark implementation 17 | * 18 | * @author sotbeis, sotbeis@iti.gr 19 | * @author Alexander Patrikalakis 20 | */ 21 | public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData 22 | { 23 | 24 | private final Set generatedNodes; 25 | 26 | public FindShortestPathBenchmark(BenchmarkConfiguration config) 27 | { 28 | super(config, BenchmarkType.FIND_SHORTEST_PATH); 29 | generatedNodes = DatasetFactory.getInstance().getDataset(config.getDataset()) 30 | .generateRandomNodes(config.getRandomNodes()); 31 | } 32 | 33 | @Override 34 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 35 | { 36 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 37 | graphDatabase.open(); 38 | Stopwatch watch = new Stopwatch(); 39 | watch.start(); 40 | graphDatabase.shortestPaths(generatedNodes); 41 | graphDatabase.shutdown(); 42 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/dataset/Dataset.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.dataset; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.HashSet; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.Set; 9 | 10 | import org.apache.commons.math3.util.MathArrays; 11 | 12 | import eu.socialsensor.utils.Utils; 13 | 14 | /** 15 | * 16 | * @author Alexander Patrikalakis 17 | * 18 | */ 19 | public class Dataset implements Iterable> 20 | { 21 | private final List> data; 22 | 23 | public Dataset(File datasetFile) 24 | { 25 | data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */); 26 | } 27 | 28 | public Set generateRandomNodes(int numRandomNodes) 29 | { 30 | Set nodes = new HashSet(); 31 | for (List line : data.subList(4, data.size())) 32 | { 33 | for (String nodeId : line) 34 | { 35 | nodes.add(nodeId.trim()); 36 | } 37 | } 38 | 39 | List nodeList = new ArrayList(nodes); 40 | int[] nodeIndexList = new int[nodeList.size()]; 41 | for (int i = 0; i < nodeList.size(); i++) 42 | { 43 | nodeIndexList[i] = i; 44 | } 45 | MathArrays.shuffle(nodeIndexList); 46 | 47 | Set generatedNodes = new HashSet(); 48 | for (int i = 0; i < numRandomNodes; i++) 49 | { 50 | generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i]))); 51 | } 52 | return generatedNodes; 53 | } 54 | 55 | @Override 56 | public Iterator> iterator() 57 | { 58 | return data.iterator(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | 5 | import com.thinkaurelius.titan.core.TitanGraph; 6 | import com.thinkaurelius.titan.core.util.TitanId; 7 | import com.tinkerpop.blueprints.Compare; 8 | import com.tinkerpop.blueprints.Vertex; 9 | 10 | import eu.socialsensor.main.GraphDatabaseType; 11 | 12 | /** 13 | * Implementation of single Insertion in Titan graph database 14 | * 15 | * @author sotbeis, sotbeis@iti.gr 16 | * @author Alexander Patrikalakis 17 | * 18 | */ 19 | public class TitanSingleInsertion extends InsertionBase 20 | { 21 | private final TitanGraph titanGraph; 22 | 23 | public TitanSingleInsertion(TitanGraph titanGraph, GraphDatabaseType type, File resultsPath) 24 | { 25 | super(type, resultsPath); 26 | this.titanGraph = titanGraph; 27 | } 28 | 29 | @Override 30 | public Vertex getOrCreate(String value) 31 | { 32 | Integer intValue = Integer.valueOf(value); 33 | final Vertex v; 34 | if (titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().hasNext()) 35 | { 36 | v = (Vertex) titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().next(); 37 | } 38 | else 39 | { 40 | final long titanVertexId = TitanId.toVertexId(intValue); 41 | v = titanGraph.addVertex(titanVertexId); 42 | v.setProperty("nodeId", intValue); 43 | titanGraph.commit(); 44 | } 45 | return v; 46 | } 47 | 48 | @Override 49 | public void relateNodes(Vertex src, Vertex dest) 50 | { 51 | try 52 | { 53 | titanGraph.addEdge(null, src, dest, "similar"); 54 | titanGraph.commit(); 55 | } 56 | catch (Exception e) 57 | { 58 | titanGraph.rollback(); //TODO(amcp) why can this happen? doesn't this indicate illegal state? 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | 8 | import com.google.common.base.Stopwatch; 9 | 10 | import eu.socialsensor.graphdatabases.GraphDatabase; 11 | import eu.socialsensor.main.BenchmarkConfiguration; 12 | import eu.socialsensor.main.BenchmarkType; 13 | import eu.socialsensor.main.GraphDatabaseType; 14 | import eu.socialsensor.utils.Utils; 15 | 16 | /** 17 | * MassiveInsertionBenchmark implementation 18 | * 19 | * @author sotbeis, sotbeis@iti.gr 20 | * @author Alexander Patrikalakis 21 | */ 22 | 23 | public class MassiveInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData 24 | { 25 | private static final Logger logger = LogManager.getLogger(); 26 | 27 | public MassiveInsertionBenchmark(BenchmarkConfiguration config) 28 | { 29 | super(config, BenchmarkType.MASSIVE_INSERTION); 30 | } 31 | 32 | @Override 33 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 34 | { 35 | logger.debug("Creating database instance for type " + type.getShortname()); 36 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 37 | logger.debug("Prepare database instance for type {} for massive loading", type.getShortname()); 38 | // the following step includes provisioning in managed database 39 | // services. do not measure this time as 40 | // it is not related to the action of inserting. 41 | graphDatabase.createGraphForMassiveLoad(); 42 | logger.debug("Massive load graph in database type {}", type.getShortname()); 43 | Stopwatch watch = new Stopwatch(); 44 | watch.start(); 45 | graphDatabase.massiveModeLoading(bench.getDataset()); 46 | logger.debug("Shutdown massive graph in database type {}", type.getShortname()); 47 | graphDatabase.shutdownMassiveGraph(); 48 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/utils/PermuteMethod.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.utils; 2 | import java.lang.reflect.Array; 3 | import java.lang.reflect.Method; 4 | import java.util.Iterator; 5 | import java.util.NoSuchElementException; 6 | 7 | public class PermuteMethod implements Iterator { 8 | private final int size; 9 | private final Method[] elements; // copy of original 0 .. size-1 10 | private final Method[] ar; // array for output, 0 .. size-1 11 | private final int[] permutation; // perm of nums 1..size, perm[0]=0 12 | 13 | private boolean next = true; 14 | 15 | public PermuteMethod(Method[] e) { 16 | size = e.length; 17 | elements = new Method[size]; 18 | System.arraycopy(e, 0, elements, 0, size); 19 | ar = new Method[size]; 20 | System.arraycopy(e, 0, ar, 0, size); 21 | permutation = new int[size + 1]; 22 | for (int i = 0; i < size + 1; i++) { 23 | permutation[i] = i; 24 | } 25 | } 26 | 27 | private void formNextPermutation() { 28 | for (int i = 0; i < size; i++) { 29 | Array.set(ar, i, elements[permutation[i + 1] - 1]); 30 | } 31 | } 32 | 33 | public boolean hasNext() { 34 | return next; 35 | } 36 | 37 | public void remove() throws UnsupportedOperationException { 38 | throw new UnsupportedOperationException(); 39 | } 40 | 41 | private void swap(final int i, final int j) { 42 | final int x = permutation[i]; 43 | permutation[i] = permutation[j]; 44 | permutation[j] = x; 45 | } 46 | 47 | public Method[] next() throws NoSuchElementException { 48 | formNextPermutation(); // copy original elements 49 | int i = size - 1; 50 | while (permutation[i] > permutation[i + 1]) 51 | i--; 52 | if (i == 0) { 53 | next = false; 54 | for (int j = 0; j < size + 1; j++) { 55 | permutation[j] = j; 56 | } 57 | return ar; 58 | } 59 | int j = size; 60 | while (permutation[i] > permutation[j]) 61 | j--; 62 | swap(i, j); 63 | int r = size; 64 | int s = i + 1; 65 | while (r > s) { 66 | swap(r, s); 67 | r--; 68 | s++; 69 | } 70 | return ar; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/main/GraphDatabaseType.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | import java.util.HashMap; 4 | import java.util.HashSet; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | /** 9 | * Enum containing constants that correspond to each database. 10 | * 11 | * @author Alexander Patrikalakis 12 | */ 13 | public enum GraphDatabaseType 14 | { 15 | TITAN_BERKELEYDB("Titan", "berkeleyje", "tbdb"), 16 | TITAN_DYNAMODB("Titan", "com.amazon.titan.diskstorage.dynamodb.DynamoDBStoreManager", "tddb"), 17 | TITAN_CASSANDRA("Titan", "cassandra", "tc"), 18 | TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"), 19 | TITAN_HBASE("Titan", "hbase", "thb"), 20 | TITAN_PERSISTIT("TitanEmbedded", "persistit", "tp"), 21 | ORIENT_DB("OrientDB", null, "orient"), 22 | NEO4J("Neo4j", null, "neo4j"), 23 | SPARKSEE("Sparksee", null, "sparksee"); 24 | 25 | private final String backend; 26 | private final String api; 27 | private final String shortname; 28 | 29 | public static final Map STRING_REP_MAP = new HashMap(); 30 | public static final Set TITAN_FLAVORS = new HashSet(); 31 | static 32 | { 33 | for (GraphDatabaseType db : values()) 34 | { 35 | STRING_REP_MAP.put(db.getShortname(), db); 36 | } 37 | TITAN_FLAVORS.add(TITAN_BERKELEYDB); 38 | TITAN_FLAVORS.add(TITAN_DYNAMODB); 39 | TITAN_FLAVORS.add(TITAN_CASSANDRA); 40 | TITAN_FLAVORS.add(TITAN_CASSANDRA_EMBEDDED); 41 | TITAN_FLAVORS.add(TITAN_HBASE); 42 | TITAN_FLAVORS.add(TITAN_PERSISTIT); 43 | } 44 | 45 | private GraphDatabaseType(String api, String backend, String shortname) 46 | { 47 | this.api = api; 48 | this.backend = backend; 49 | this.shortname = shortname; 50 | } 51 | 52 | public String getBackend() 53 | { 54 | return backend; 55 | } 56 | 57 | public String getApi() 58 | { 59 | return api; 60 | } 61 | 62 | public String getShortname() 63 | { 64 | return shortname; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import com.orientechnologies.orient.core.config.OGlobalConfiguration; 4 | import com.orientechnologies.orient.graph.batch.OGraphBatchInsertBasic; 5 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; 6 | 7 | import eu.socialsensor.main.GraphDatabaseType; 8 | 9 | /** 10 | * Implementation of massive Insertion in OrientDB graph database 11 | * 12 | * @author sotbeis, sotbeis@iti.gr 13 | * @author Alexander Patrikalakis 14 | * 15 | */ 16 | public class OrientMassiveInsertion extends InsertionBase implements Insertion 17 | { 18 | private static final int ESTIMATED_ENTRIES = 1000000; 19 | private static final int AVERAGE_NUMBER_OF_EDGES_PER_NODE = 40; 20 | private static final int NUMBER_OF_ORIENT_CLUSTERS = 16; 21 | private final OGraphBatchInsertBasic graph; 22 | 23 | public OrientMassiveInsertion(final String url) 24 | { 25 | super(GraphDatabaseType.ORIENT_DB, null /* resultsPath */); 26 | OGlobalConfiguration.ENVIRONMENT_CONCURRENT.setValue(false); 27 | OrientGraphNoTx transactionlessGraph = new OrientGraphNoTx(url); 28 | for (int i = 0; i < NUMBER_OF_ORIENT_CLUSTERS; ++i) 29 | { 30 | transactionlessGraph.getVertexBaseType().addCluster("v_" + i); 31 | transactionlessGraph.getEdgeBaseType().addCluster("e_" + i); 32 | } 33 | transactionlessGraph.shutdown(); 34 | 35 | graph = new OGraphBatchInsertBasic(url); 36 | graph.setAverageEdgeNumberPerNode(AVERAGE_NUMBER_OF_EDGES_PER_NODE); 37 | graph.setEstimatedEntries(ESTIMATED_ENTRIES); 38 | graph.setIdPropertyName("nodeId"); 39 | graph.begin(); 40 | } 41 | 42 | @Override 43 | protected void post() { 44 | graph.end(); 45 | } 46 | 47 | @Override 48 | protected Long getOrCreate(String value) 49 | { 50 | final long v = Long.parseLong(value); 51 | graph.createVertex(v); 52 | return v; 53 | } 54 | 55 | @Override 56 | protected void relateNodes(Long src, Long dest) 57 | { 58 | graph.createEdge(src, dest); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/BenchmarkBase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.io.File; 4 | 5 | import org.apache.logging.log4j.LogManager; 6 | import org.apache.logging.log4j.Logger; 7 | 8 | import eu.socialsensor.main.BenchmarkConfiguration; 9 | import eu.socialsensor.main.BenchmarkType; 10 | import eu.socialsensor.main.GraphDatabaseType; 11 | import eu.socialsensor.utils.Utils; 12 | 13 | /** 14 | * Base class for benchmarks. 15 | * 16 | * @author Alexander Patrikalakis 17 | */ 18 | public abstract class BenchmarkBase implements Benchmark 19 | { 20 | private static final Logger logger = LogManager.getLogger(); 21 | protected final BenchmarkConfiguration bench; 22 | protected final File outputFile; 23 | protected final BenchmarkType type; 24 | 25 | protected BenchmarkBase(BenchmarkConfiguration bench, BenchmarkType type) 26 | { 27 | this.bench = bench; 28 | this.outputFile = new File(bench.getResultsPath(), type.getResultsFileName()); 29 | this.type = type; 30 | } 31 | 32 | @Override 33 | public final void startBenchmark() 34 | { 35 | startBenchmarkInternal(); 36 | } 37 | 38 | public abstract void startBenchmarkInternal(); 39 | 40 | protected final void createDatabases() 41 | { 42 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 43 | { 44 | logger.info(String.format("creating %s database from %s dataset", type.getShortname(), bench.getDataset() 45 | .getName())); 46 | File dbpath = Utils.generateStorageDirectory(type, bench.getDbStorageDirectory()); 47 | if (dbpath.exists()) 48 | { 49 | throw new IllegalStateException(String.format( 50 | "Database from a previous run exist: %s; clean up and try again.", dbpath.getAbsolutePath())); 51 | } 52 | Utils.createMassiveLoadDatabase(type, bench); 53 | } 54 | } 55 | 56 | protected final void deleteDatabases() 57 | { 58 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 59 | { 60 | Utils.deleteDatabase(type, bench); 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import eu.socialsensor.graphdatabases.GraphDatabase; 4 | import eu.socialsensor.main.BenchmarkConfiguration; 5 | import eu.socialsensor.main.BenchmarkType; 6 | import eu.socialsensor.main.GraphDatabaseType; 7 | import eu.socialsensor.utils.Utils; 8 | 9 | import java.io.File; 10 | import java.util.List; 11 | 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | 15 | /** 16 | * SingleInsertionBenchmak implementation 17 | * 18 | * @author sotbeis, sotbeis@iti.gr 19 | * @author Alexander Patrikalakis 20 | */ 21 | public class SingleInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData 22 | { 23 | public static final String INSERTION_TIMES_OUTPUT_FILE_NAME_BASE = "SINGLE_INSERTIONResults"; 24 | private static final Logger LOG = LogManager.getLogger(); 25 | 26 | public SingleInsertionBenchmark(BenchmarkConfiguration bench) 27 | { 28 | super(bench, BenchmarkType.SINGLE_INSERTION); 29 | } 30 | 31 | @Override 32 | public void post() 33 | { 34 | LOG.info("Write results to " + outputFile.getAbsolutePath()); 35 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 36 | { 37 | String prefix = outputFile.getParentFile().getAbsolutePath() + File.separator 38 | + INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname(); 39 | List> insertionTimesOfEachScenario = Utils.getDocumentsAs2dList(prefix, bench.getScenarios()); 40 | times.put(type, Utils.calculateMeanList(insertionTimesOfEachScenario)); 41 | Utils.deleteMultipleFiles(prefix, bench.getScenarios()); 42 | } 43 | // use the logic of the superclass method after populating the times map 44 | super.post(); 45 | } 46 | 47 | @Override 48 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber) 49 | { 50 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 51 | graphDatabase.createGraphForSingleLoad(); 52 | graphDatabase.singleModeLoading(bench.getDataset(), bench.getResultsPath(), scenarioNumber); 53 | graphDatabase.shutdown(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | 5 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 6 | import com.orientechnologies.orient.core.index.OIndex; 7 | import com.tinkerpop.blueprints.TransactionalGraph; 8 | import com.tinkerpop.blueprints.Vertex; 9 | import com.tinkerpop.blueprints.impls.orient.OrientGraph; 10 | 11 | import eu.socialsensor.main.GraphDatabaseType; 12 | 13 | /** 14 | * Implementation of single Insertion in OrientDB graph database 15 | * 16 | * @author sotbeis, sotbeis@iti.gr 17 | * @author Alexander Patrikalakis 18 | * 19 | */ 20 | public final class OrientSingleInsertion extends InsertionBase 21 | { 22 | protected final OrientGraph orientGraph; 23 | protected final OIndex index; 24 | 25 | public OrientSingleInsertion(OrientGraph orientGraph, File resultsPath) 26 | { 27 | super(GraphDatabaseType.ORIENT_DB, resultsPath); 28 | this.orientGraph = orientGraph; 29 | this.index = this.orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId"); 30 | } 31 | 32 | @Override 33 | protected void relateNodes(Vertex src, Vertex dest) 34 | { 35 | orientGraph.addEdge(null, src, dest, "similar"); 36 | 37 | // TODO why commit twice? is this a nested transaction? 38 | if (orientGraph instanceof TransactionalGraph) 39 | { 40 | orientGraph.commit(); 41 | orientGraph.commit(); 42 | } 43 | } 44 | 45 | @Override 46 | protected Vertex getOrCreate(final String value) 47 | { 48 | final int key = Integer.parseInt(value); 49 | 50 | Vertex v; 51 | final OIdentifiable rec = (OIdentifiable) index.get(key); 52 | if (rec != null) 53 | { 54 | return orientGraph.getVertex(rec); 55 | } 56 | 57 | v = orientGraph.addVertex(key, "nodeId", key); 58 | 59 | if (orientGraph instanceof TransactionalGraph) 60 | { 61 | orientGraph.commit(); 62 | } 63 | 64 | return v; 65 | } 66 | 67 | @Override 68 | protected void post() 69 | { 70 | super.post(); 71 | if (orientGraph instanceof TransactionalGraph) 72 | { 73 | orientGraph.commit(); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * * 17 | * * For more information: http://www.orientechnologies.com 18 | * 19 | */ 20 | 21 | package eu.socialsensor.insert; 22 | 23 | import org.apache.log4j.Logger; 24 | 25 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 26 | import com.orientechnologies.orient.core.index.OIndex; 27 | import com.tinkerpop.blueprints.TransactionalGraph; 28 | import com.tinkerpop.blueprints.Vertex; 29 | import com.tinkerpop.blueprints.impls.orient.OrientExtendedGraph; 30 | 31 | /** 32 | * Implementation of single Insertion in OrientDB graph database 33 | * 34 | * @author sotbeis 35 | * @email sotbeis@iti.gr 36 | * 37 | */ 38 | public abstract class OrientAbstractInsertion implements Insertion { 39 | 40 | public static String INSERTION_TIMES_OUTPUT_PATH = null; 41 | 42 | protected OrientExtendedGraph orientGraph = null; 43 | protected Logger logger = Logger.getLogger(OrientAbstractInsertion.class); 44 | 45 | protected OIndex index; 46 | 47 | public OrientAbstractInsertion(OrientExtendedGraph orientGraph) { 48 | this.orientGraph = orientGraph; 49 | } 50 | 51 | protected Vertex getOrCreate(final String value) { 52 | final int key = Integer.parseInt(value); 53 | 54 | Vertex v; 55 | if (index == null) { 56 | index = orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId"); 57 | } 58 | 59 | final OIdentifiable rec = (OIdentifiable) index.get(key); 60 | if (rec != null) { 61 | return orientGraph.getVertex(rec); 62 | } 63 | 64 | v = orientGraph.addVertex(key, "nodeId", key); 65 | 66 | if (orientGraph instanceof TransactionalGraph) { 67 | ((TransactionalGraph) orientGraph).commit(); 68 | } 69 | 70 | return v; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import org.apache.commons.collections4.iterators.PermutationIterator; 10 | import org.apache.logging.log4j.Logger; 11 | import org.apache.logging.log4j.LogManager; 12 | 13 | import eu.socialsensor.main.BenchmarkConfiguration; 14 | import eu.socialsensor.main.BenchmarkType; 15 | import eu.socialsensor.main.GraphDatabaseType; 16 | import eu.socialsensor.utils.Utils; 17 | 18 | /** 19 | * Base class abstracting the logic of permutations 20 | * 21 | * @author Alexander Patrikalakis 22 | */ 23 | public abstract class PermutingBenchmarkBase extends BenchmarkBase 24 | { 25 | protected final Map> times; 26 | private static final Logger LOG = LogManager.getLogger(); 27 | 28 | protected PermutingBenchmarkBase(BenchmarkConfiguration bench, BenchmarkType typeIn) 29 | { 30 | super(bench, typeIn); 31 | times = new HashMap>(); 32 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 33 | { 34 | times.put(type, new ArrayList(bench.getScenarios())); 35 | } 36 | } 37 | 38 | @Override 39 | public void startBenchmarkInternal() 40 | { 41 | LOG.info(String.format("Executing %s Benchmark . . . .", type.longname())); 42 | 43 | if (bench.permuteBenchmarks()) 44 | { 45 | PermutationIterator iter = new PermutationIterator( 46 | bench.getSelectedDatabases()); 47 | int cntPermutations = 1; 48 | while (iter.hasNext()) 49 | { 50 | LOG.info("Scenario " + cntPermutations); 51 | startBenchmarkInternalOnePermutation(iter.next(), cntPermutations); 52 | cntPermutations++; 53 | } 54 | } 55 | else 56 | { 57 | startBenchmarkInternalOnePermutation(bench.getSelectedDatabases(), 1); 58 | } 59 | 60 | LOG.info(String.format("%s Benchmark finished", type.longname())); 61 | post(); 62 | } 63 | 64 | private void startBenchmarkInternalOnePermutation(Collection types, int cntPermutations) 65 | { 66 | for (GraphDatabaseType type : types) 67 | { 68 | benchmarkOne(type, cntPermutations); 69 | } 70 | } 71 | 72 | public abstract void benchmarkOne(GraphDatabaseType type, int scenarioNumber); 73 | 74 | public void post() 75 | { 76 | Utils.writeResults(outputFile, times, type.longname()); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.neo4j.cypher.javacompat.ExecutionEngine; 8 | import org.neo4j.graphdb.GraphDatabaseService; 9 | import org.neo4j.graphdb.Node; 10 | import org.neo4j.graphdb.ResourceIterator; 11 | import org.neo4j.graphdb.Transaction; 12 | import org.neo4j.kernel.GraphDatabaseAPI; 13 | 14 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; 15 | import eu.socialsensor.main.BenchmarkingException; 16 | import eu.socialsensor.main.GraphDatabaseType; 17 | 18 | /** 19 | * Implementation of single Insertion in Neo4j graph database 20 | * 21 | * @author sotbeis, sotbeis@iti.gr 22 | * @author Alexander Patrikalakis 23 | * 24 | */ 25 | @SuppressWarnings("deprecation") 26 | public class Neo4jSingleInsertion extends InsertionBase 27 | { 28 | private final GraphDatabaseService neo4jGraph; 29 | private final ExecutionEngine engine; 30 | 31 | public Neo4jSingleInsertion(GraphDatabaseService neo4jGraph, File resultsPath) 32 | { 33 | super(GraphDatabaseType.NEO4J, resultsPath); 34 | this.neo4jGraph = neo4jGraph; 35 | engine = new ExecutionEngine(this.neo4jGraph); 36 | } 37 | 38 | public Node getOrCreate(String nodeId) 39 | { 40 | Node result = null; 41 | 42 | try(final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin()) 43 | { 44 | try 45 | { 46 | String queryString = "MERGE (n:Node {nodeId: {nodeId}}) RETURN n"; 47 | Map parameters = new HashMap(); 48 | parameters.put("nodeId", nodeId); 49 | ResourceIterator resultIterator = engine.execute(queryString, parameters).columnAs("n"); 50 | result = resultIterator.next(); 51 | tx.success(); 52 | } 53 | catch (Exception e) 54 | { 55 | tx.failure(); 56 | throw new BenchmarkingException("unable to get or create node " + nodeId, e); 57 | } 58 | } 59 | 60 | return result; 61 | } 62 | 63 | @Override 64 | public void relateNodes(Node src, Node dest) 65 | { 66 | try (final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin()) 67 | { 68 | try 69 | { 70 | src.createRelationshipTo(dest, Neo4jGraphDatabase.RelTypes.SIMILAR); 71 | tx.success(); 72 | } 73 | catch (Exception e) 74 | { 75 | tx.failure(); 76 | throw new BenchmarkingException("unable to relate nodes", e); 77 | } 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/utils/Metrics.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.utils; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | /** 7 | * This class implements the metrics we use for the evaluation of the predicted 8 | * clustering. For now we use only the NMI 9 | * 10 | * @author sbeis 11 | * @email sotbeis@gmail.com 12 | * 13 | */ 14 | public class Metrics 15 | { 16 | public double normalizedMutualInformation(int numberOfNodes, Map> actualPartitions, 17 | Map> predictedPartitions) 18 | { 19 | double nmi; 20 | double numOfNodes = (double) numberOfNodes; 21 | int[][] confusionMatrix = confusionMatrix(actualPartitions, predictedPartitions); 22 | int[] confusionMatrixActual = new int[actualPartitions.size()]; 23 | int[] confusionMatrixPredicted = new int[predictedPartitions.size()]; 24 | for (int i = 0; i < confusionMatrixActual.length; i++) 25 | { 26 | int sum = 0; 27 | for (int j = 0; j < confusionMatrixPredicted.length; j++) 28 | { 29 | sum = sum + confusionMatrix[i][j]; 30 | } 31 | confusionMatrixActual[i] = sum; 32 | } 33 | for (int j = 0; j < confusionMatrixPredicted.length; j++) 34 | { 35 | int sum = 0; 36 | for (int i = 0; i < confusionMatrixActual.length; i++) 37 | { 38 | sum = sum + confusionMatrix[i][j]; 39 | } 40 | confusionMatrixPredicted[j] = sum; 41 | } 42 | 43 | double term1 = 0; 44 | for (int i = 0; i < confusionMatrixActual.length; i++) 45 | { 46 | for (int j = 0; j < confusionMatrixPredicted.length; j++) 47 | { 48 | if (confusionMatrix[i][j] > 0) 49 | { 50 | term1 += -2.0 51 | * confusionMatrix[i][j] 52 | * Math.log((confusionMatrix[i][j] * numOfNodes) 53 | / (confusionMatrixActual[i] * confusionMatrixPredicted[j])); 54 | } 55 | } 56 | } 57 | double term2 = 0; 58 | for (int i = 0; i < confusionMatrixActual.length; i++) 59 | { 60 | term2 += confusionMatrixActual[i] * Math.log(confusionMatrixActual[i] / numOfNodes); 61 | } 62 | double term3 = 0; 63 | for (int j = 0; j < confusionMatrixPredicted.length; j++) 64 | { 65 | term3 += confusionMatrixPredicted[j] * Math.log(confusionMatrixPredicted[j] / numOfNodes); 66 | } 67 | nmi = term1 / (term2 + term3); 68 | return nmi; 69 | } 70 | 71 | private int[][] confusionMatrix(Map> actualPartitions, 72 | Map> predictedPartitions) 73 | { 74 | int actualPartitionsSize = actualPartitions.size(); 75 | int predictedPartitionsSize = predictedPartitions.size(); 76 | int[][] confusionMatrix = new int[actualPartitionsSize][]; 77 | int actualPartitionsKeys[] = new int[actualPartitionsSize]; 78 | int predictedPartitionsKeys[] = new int[predictedPartitionsSize]; 79 | 80 | int actualPartitionsIndex = 0; 81 | for (int key : actualPartitions.keySet()) 82 | { 83 | actualPartitionsKeys[actualPartitionsIndex] = key; 84 | actualPartitionsIndex++; 85 | } 86 | int predictedPartitionsIndex = 0; 87 | for (int key : predictedPartitions.keySet()) 88 | { 89 | predictedPartitionsKeys[predictedPartitionsIndex] = key; 90 | predictedPartitionsIndex++; 91 | } 92 | 93 | for (int i = 0; i < actualPartitionsSize; i++) 94 | { 95 | confusionMatrix[i] = new int[predictedPartitionsSize]; 96 | for (int j = 0; j < predictedPartitionsSize; j++) 97 | { 98 | int commonNodes = 0; 99 | for (int node : predictedPartitions.get(predictedPartitionsKeys[j])) 100 | { 101 | if (actualPartitions.get(actualPartitionsKeys[i]).contains(node)) 102 | { 103 | commonNodes++; 104 | } 105 | } 106 | confusionMatrix[i][j] = commonNodes; 107 | } 108 | } 109 | return confusionMatrix; 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/insert/InsertionBase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.insert; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.concurrent.TimeUnit; 7 | 8 | import org.apache.logging.log4j.LogManager; 9 | import org.apache.logging.log4j.Logger; 10 | 11 | import com.codahale.metrics.Timer; 12 | import com.google.common.base.Stopwatch; 13 | 14 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark; 15 | import eu.socialsensor.dataset.Dataset; 16 | import eu.socialsensor.dataset.DatasetFactory; 17 | import eu.socialsensor.main.GraphDatabaseBenchmark; 18 | import eu.socialsensor.main.GraphDatabaseType; 19 | import eu.socialsensor.utils.Utils; 20 | 21 | /** 22 | * Base class for business logic of insertion workloads 23 | * 24 | * @author Alexander Patrikalakis 25 | * 26 | * @param 27 | * the Type of vertexes (graph database vendor specific) 28 | */ 29 | public abstract class InsertionBase implements Insertion 30 | { 31 | private static final Logger logger = LogManager.getLogger(); 32 | public static final String INSERTION_CONTEXT = ".eu.socialsensor.insertion."; 33 | private final Timer getOrCreateTimes; 34 | private final Timer relateNodesTimes; 35 | 36 | protected final GraphDatabaseType type; 37 | protected final List insertionTimes; 38 | private final boolean single; 39 | 40 | // to write intermediate times for SingleInsertion subclasses 41 | protected final File resultsPath; 42 | 43 | protected InsertionBase(GraphDatabaseType type, File resultsPath) 44 | { 45 | this.type = type; 46 | this.insertionTimes = new ArrayList(); 47 | this.resultsPath = resultsPath; 48 | this.single = resultsPath != null; 49 | final String insertionTypeCtxt = type.getShortname() + INSERTION_CONTEXT + (single ? "adhoc." : "batch."); 50 | this.getOrCreateTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "getOrCreate"); 51 | this.relateNodesTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "relateNodes"); 52 | } 53 | 54 | /** 55 | * Gets or creates a vertex 56 | * 57 | * @param value 58 | * the identifier of the vertex 59 | * @return the id of the created vertex 60 | */ 61 | protected abstract T getOrCreate(final String value); 62 | 63 | /** 64 | * 65 | * @param src 66 | * @param dest 67 | */ 68 | protected abstract void relateNodes(final T src, final T dest); 69 | 70 | /** 71 | * sometimes a transaction needs to be committed at the end of a batch run. 72 | * this is the hook. 73 | */ 74 | protected void post() 75 | { 76 | // NOOP 77 | } 78 | 79 | public final void createGraph(File datasetFile, int scenarioNumber) 80 | { 81 | logger.info("Loading data in {} mode in {} database . . . .", single ? "single" : "massive", 82 | type.name()); 83 | Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile); 84 | 85 | T srcNode, dstNode; 86 | Stopwatch thousandWatch = new Stopwatch(), watch = new Stopwatch(); 87 | thousandWatch.start(); 88 | watch.start(); 89 | int i = 4; 90 | for (List line : dataset) 91 | { 92 | final Timer.Context contextSrc = getOrCreateTimes.time(); 93 | try { 94 | srcNode = getOrCreate(line.get(0)); 95 | } finally { 96 | contextSrc.stop(); 97 | } 98 | 99 | final Timer.Context contextDest = getOrCreateTimes.time(); 100 | try { 101 | dstNode = getOrCreate(line.get(1)); 102 | } finally { 103 | contextDest.stop(); 104 | } 105 | 106 | final Timer.Context contextRelate = relateNodesTimes.time(); 107 | try { 108 | relateNodes(srcNode, dstNode); 109 | } finally { 110 | contextRelate.stop(); 111 | } 112 | 113 | if (i % 1000 == 0) 114 | { 115 | insertionTimes.add((double) thousandWatch.elapsed(TimeUnit.MILLISECONDS)); 116 | thousandWatch.stop(); 117 | thousandWatch = new Stopwatch(); 118 | thousandWatch.start(); 119 | } 120 | i++; 121 | } 122 | post(); 123 | insertionTimes.add((double) watch.elapsed(TimeUnit.MILLISECONDS)); 124 | 125 | if (single) 126 | { 127 | Utils.writeTimes(insertionTimes, new File(resultsPath, 128 | SingleInsertionBenchmark.INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname() + "." 129 | + Integer.toString(scenarioNumber))); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/test/resources/META-INF/input.properties: -------------------------------------------------------------------------------- 1 | # Choose which data sets you want to include in the benchmark by removing the contents. 2 | #eu.socialsensor.dataset=data/Email-Enron.txt 3 | #eu.socialsensor.dataset=data/com-youtube.ungraph.txt 4 | #eu.socialsensor.dataset=data/Amazon0601.txt 5 | #eu.socialsensor.dataset=data/com-lj.ungraph.txt 6 | #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000 7 | eu.socialsensor.dataset=data/network1000.dat 8 | eu.socialsensor.actual-communities=data/community1000.dat 9 | 10 | eu.socialsensor.database-storage-directory=storage 11 | # Sample meters this frequently (milliseconds) 12 | eu.socialsensor.metrics.csv.interval=1000 13 | # for the csv reporter 14 | eu.socialsensor.metrics.csv.directory=metrics 15 | # for the graphite reporter 16 | #eu.socialsensor.metrics.graphite.hostname=192.168.59.103 17 | 18 | # Choose which databases you want to in the benchmark by removing the comments. 19 | # Available dbs are: 20 | eu.socialsensor.databases=tbdb 21 | eu.socialsensor.databases=tddb 22 | #eu.socialsensor.databases=tc 23 | #eu.socialsensor.databases=thb 24 | #eu.socialsensor.databases=tce 25 | #eu.socialsensor.databases=tp 26 | #eu.socialsensor.databases=orient 27 | #eu.socialsensor.databases=neo4j 28 | #eu.socialsensor.databases=sparksee 29 | 30 | # Database specific options 31 | # Titan options 32 | # page-size - Number of results to pull when iterating over a storage backend (default 100) 33 | eu.socialsensor.titan.page-size=100 34 | # to disable buffering on mutations, set to zero. Default 1024. This will set the queue size as well 35 | eu.socialsensor.titan.buffer-size=10000 36 | # id block size default 10000 37 | eu.socialsensor.titan.ids.block-size=10000 38 | # Titan DynamoDB options 39 | # when warm-tables is set to true, the benchmark will create tables of the specified data model in parallel 40 | eu.socialsensor.dynamodb.precreate-tables=true 41 | # DynamoDBDelegate worker thread pool size. should not be larger than the number of HTTP connections 42 | # assuming a round trip time of 10ms for writes, one thread can do 50tps. When using the MULTI data 43 | # model, items are usually small, so the round trip time is close to that. 44 | eu.socialsensor.dynamodb.workers=15 45 | # TPS (both read and write to set per table). 750 R and W TPS = 1 DynamoDB table partition 46 | # http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GuidelinesForTables.html#GuidelinesForTables.Partitions 47 | eu.socialsensor.dynamodb.tps=750 48 | # Data model for the Titan DynamoDB plugin. Can be SINGLE or MULTI 49 | eu.socialsensor.dynamodb.data-model=MULTI 50 | # Whether to allow eventually consistent reads or not 51 | # (allowing eventually consistent reads allows queries to happen faster) 52 | eu.socialsensor.dynamodb.force-consistent-read=true 53 | # Credentials. You can set credentials to any value when running against DynamoDBLocal 54 | # Needs to be the fully qualified class name of a class implementing 55 | # com.amazonaws.auth.AWSCredentials or com.amazonaws.auth.AWSCredentialsProvider. 56 | eu.socialsensor.dynamodb.credentials.class-name=com.amazonaws.auth.DefaultAWSCredentialsProviderChain 57 | # Comma separated list of strings to pass to the constructor of the class specified above. 58 | eu.socialsensor.dynamodb.credentials.constructor-args= 59 | # Endpoint. the titan-dynamodb database option above requires you to specify an endpoint. 60 | # This endpoint could be DynamoDBLocal running in a separate process (eg, http://127.0.0.1:4567), 61 | # or the https endpoint of a production region of the DynamoDB service. 62 | eu.socialsensor.dynamodb.endpoint=http://127.0.0.1:4567 63 | #eu.socialsensor.dynamodb.endpoint=https://dynamodb.us-east-1.amazonaws.com 64 | 65 | # OrientDB options 66 | eu.socialsensor.orient.lightweight-edges=true 67 | 68 | # Sparksee options 69 | eu.socialsensor.sparksee.license-key=DEADBEEF 70 | 71 | # The following five benchmarks are permutable (that is, the suite can run them 72 | # many times in different database order). To turn on permutations, set 73 | # eu.socialsensor.permute-benchmarks=true 74 | eu.socialsensor.permute-benchmarks=false 75 | 76 | # Choose which benchmark you want to run by removing the comments. Choose one Insertion 77 | # workload and then query/clustering workloads afterward. 78 | eu.socialsensor.benchmarks=MASSIVE_INSERTION 79 | #eu.socialsensor.benchmarks=SINGLE_INSERTION 80 | eu.socialsensor.benchmarks=FIND_NEIGHBOURS 81 | eu.socialsensor.benchmarks=FIND_ADJACENT_NODES 82 | eu.socialsensor.benchmarks=FIND_SHORTEST_PATH 83 | eu.socialsensor.shortest-path-random-nodes=100 84 | 85 | # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true 86 | #eu.socialsensor.benchmarks=CLUSTERING 87 | eu.socialsensor.randomize-clustering=false 88 | eu.socialsensor.nodes-count=1000 89 | 90 | # Choose the cache values you want run the CW benchmark, or have them generated. To choose: 91 | eu.socialsensor.cache-values=25 92 | eu.socialsensor.cache-values=50 93 | eu.socialsensor.cache-values=75 94 | eu.socialsensor.cache-values=100 95 | eu.socialsensor.cache-values=125 96 | eu.socialsensor.cache-values=150 97 | 98 | # To have the cache values generated for the CW benchmark. 99 | #eu.socialsensor.cache-increment-factor=1 100 | #eu.socialsensor.cache-values-count=6 101 | 102 | # This benchmark measures the time it takes to delete the database 103 | #eu.socialsensor.benchmarks=DELETION 104 | 105 | # Results folder path 106 | eu.socialsensor.results-path=results 107 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/clustering/LouvainMethod.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.clustering; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Random; 6 | import java.util.Set; 7 | import java.util.concurrent.ExecutionException; 8 | 9 | import eu.socialsensor.graphdatabases.GraphDatabase; 10 | 11 | /** 12 | * Implementation of Louvain Method on top of graph databases. Gephi Toolkit 13 | * (https://gephi.org/toolkit/) java implementation was used as guide. 14 | * 15 | * @author sotbeis 16 | * @email sotbeis@iti.gr 17 | */ 18 | public class LouvainMethod 19 | { 20 | boolean isRandomized; 21 | private double resolution = 1.0; 22 | private double graphWeightSum; 23 | private int N; 24 | private List communityWeights; 25 | private boolean communityUpdate = false; 26 | 27 | GraphDatabase graphDatabase; 28 | Cache cache; 29 | 30 | public LouvainMethod(GraphDatabase graphDatabase, int cacheSize, boolean isRandomized) throws ExecutionException 31 | { 32 | this.graphDatabase = graphDatabase; 33 | this.isRandomized = isRandomized; 34 | initialize(); 35 | cache = new Cache(graphDatabase, cacheSize); 36 | } 37 | 38 | private void initialize() 39 | { 40 | this.N = this.graphDatabase.getNodeCount();// this step takes a long 41 | // time on dynamodb. 42 | this.graphWeightSum = this.graphDatabase.getGraphWeightSum() / 2; 43 | 44 | this.communityWeights = new ArrayList(this.N); 45 | for (int i = 0; i < this.N; i++) 46 | { 47 | this.communityWeights.add(0.0); 48 | } 49 | 50 | this.graphDatabase.initCommunityProperty(); 51 | } 52 | 53 | public void computeModularity() throws ExecutionException 54 | { 55 | Random rand = new Random(); 56 | boolean someChange = true; 57 | while (someChange) 58 | { 59 | someChange = false; 60 | boolean localChange = true; 61 | while (localChange) 62 | { 63 | localChange = false; 64 | int start = 0; 65 | if (this.isRandomized) 66 | { 67 | start = Math.abs(rand.nextInt()) % this.N; 68 | } 69 | int step = 0; 70 | for (int i = start; step < this.N; i = (i + 1) % this.N) 71 | { 72 | step++; 73 | int bestCommunity = updateBestCommunity(i); 74 | if ((this.cache.getCommunity(i) != bestCommunity) && (this.communityUpdate)) 75 | { 76 | 77 | this.cache.moveNodeCommunity(i, bestCommunity); 78 | this.graphDatabase.moveNode(i, bestCommunity); 79 | 80 | double bestCommunityWeight = this.communityWeights.get(bestCommunity); 81 | 82 | bestCommunityWeight += cache.getNodeCommunityWeight(i); 83 | this.communityWeights.set(bestCommunity, bestCommunityWeight); 84 | localChange = true; 85 | } 86 | 87 | this.communityUpdate = false; 88 | } 89 | someChange = localChange || someChange; 90 | } 91 | if (someChange) 92 | { 93 | zoomOut(); 94 | } 95 | } 96 | } 97 | 98 | private int updateBestCommunity(int node) throws ExecutionException 99 | { 100 | int bestCommunity = 0; 101 | double best = 0; 102 | Set communities = this.cache.getCommunitiesConnectedToNodeCommunities(node); 103 | for (int community : communities) 104 | { 105 | double qValue = q(node, community); 106 | if (qValue > best) 107 | { 108 | best = qValue; 109 | bestCommunity = community; 110 | this.communityUpdate = true; 111 | } 112 | } 113 | return bestCommunity; 114 | } 115 | 116 | private double q(int nodeCommunity, int community) throws ExecutionException 117 | { 118 | double edgesInCommunity = this.cache.getEdgesInsideCommunity(nodeCommunity, community); 119 | double communityWeight = this.communityWeights.get(community); 120 | double nodeWeight = this.cache.getNodeCommunityWeight(nodeCommunity); 121 | double qValue = this.resolution * edgesInCommunity - (nodeWeight * communityWeight) 122 | / (2.0 * this.graphWeightSum); 123 | int actualNodeCom = this.cache.getCommunity(nodeCommunity); 124 | int communitySize = this.cache.getCommunitySize(community); 125 | 126 | if ((actualNodeCom == community) && (communitySize > 1)) 127 | { 128 | qValue = this.resolution * edgesInCommunity - (nodeWeight * (communityWeight - nodeWeight)) 129 | / (2.0 * this.graphWeightSum); 130 | } 131 | if ((actualNodeCom == community) && (communitySize == 1)) 132 | { 133 | qValue = 0.; 134 | } 135 | return qValue; 136 | } 137 | 138 | public void zoomOut() 139 | { 140 | this.N = this.graphDatabase.reInitializeCommunities(); 141 | this.cache.reInitializeCommunities(); 142 | this.communityWeights = new ArrayList(this.N); 143 | for (int i = 0; i < this.N; i++) 144 | { 145 | this.communityWeights.add(graphDatabase.getCommunityWeight(i)); 146 | } 147 | } 148 | 149 | public int getN() 150 | { 151 | return this.N; 152 | } 153 | 154 | } -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.benchmarks; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.SortedMap; 11 | import java.util.TreeMap; 12 | import java.util.concurrent.ExecutionException; 13 | import java.util.concurrent.TimeUnit; 14 | 15 | import org.apache.logging.log4j.Logger; 16 | import org.apache.logging.log4j.LogManager; 17 | 18 | import com.google.common.base.Stopwatch; 19 | 20 | import eu.socialsensor.clustering.LouvainMethod; 21 | import eu.socialsensor.graphdatabases.GraphDatabase; 22 | import eu.socialsensor.main.BenchmarkConfiguration; 23 | import eu.socialsensor.main.BenchmarkType; 24 | import eu.socialsensor.main.BenchmarkingException; 25 | import eu.socialsensor.main.GraphDatabaseType; 26 | import eu.socialsensor.utils.Metrics; 27 | import eu.socialsensor.utils.Utils; 28 | 29 | /** 30 | * ClusteringBenchmark implementation 31 | * 32 | * @author sotbeis, sotbeis@iti.gr 33 | * @author Alexander Patrikalakis 34 | */ 35 | public class ClusteringBenchmark extends BenchmarkBase implements RequiresGraphData 36 | { 37 | private static final Logger LOG = LogManager.getLogger(); 38 | private final List cacheValues; 39 | 40 | public ClusteringBenchmark(BenchmarkConfiguration config) 41 | { 42 | super(config, BenchmarkType.CLUSTERING); 43 | this.cacheValues = new ArrayList(); 44 | if (config.getCacheValues() == null) 45 | { 46 | int cacheValueMultiplier = config.getCacheIncrementFactor().intValue() * config.getNodesCount(); 47 | for (int i = 1; i <= config.getCacheValuesCount(); i++) 48 | { 49 | cacheValues.add(i * cacheValueMultiplier); 50 | } 51 | } 52 | else 53 | { 54 | cacheValues.addAll(config.getCacheValues()); 55 | } 56 | } 57 | 58 | @Override 59 | public void startBenchmarkInternal() 60 | { 61 | LOG.info("Executing Clustering Benchmark . . . ."); 62 | SortedMap> typeTimesMap = new TreeMap>(); 63 | try 64 | { 65 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 66 | { 67 | typeTimesMap.put(type, clusteringBenchmark(type)); 68 | } 69 | } 70 | catch (ExecutionException e) 71 | { 72 | throw new BenchmarkingException("Unable to run clustering benchmark: " + e.getMessage(), e); 73 | } 74 | 75 | try (BufferedWriter out = new BufferedWriter(new FileWriter(outputFile))) 76 | { 77 | out.write("DB,Cache Size (measured in nodes),Clustering Benchmark Time (s)\n"); 78 | for (GraphDatabaseType type : bench.getSelectedDatabases()) 79 | { 80 | for (Integer cacheSize : typeTimesMap.get(type).keySet()) 81 | { 82 | out.write(String.format("%s,%d,%f\n", type.getShortname(), cacheSize, 83 | typeTimesMap.get(type).get(cacheSize))); 84 | } 85 | } 86 | } 87 | catch (IOException e) 88 | { 89 | throw new BenchmarkingException("Unable to write clustering results to file"); 90 | } 91 | LOG.info("Clustering Benchmark finished"); 92 | } 93 | 94 | private SortedMap clusteringBenchmark(GraphDatabaseType type) throws ExecutionException 95 | { 96 | GraphDatabase graphDatabase = Utils.createDatabaseInstance(bench, type); 97 | graphDatabase.open(); 98 | 99 | SortedMap timeMap = new TreeMap(); 100 | for (int cacheSize : cacheValues) 101 | { 102 | LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName() 103 | + ", Cache Size: " + cacheSize); 104 | 105 | Stopwatch watch = new Stopwatch(); 106 | watch.start(); 107 | LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering()); 108 | louvainMethodCache.computeModularity(); 109 | timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0); 110 | 111 | // evaluation with NMI 112 | Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getN()); 113 | Map> actualCommunities = mapNodesToCommunities(Utils.readTabulatedLines( 114 | bench.getActualCommunitiesFile(), 4 /* numberOfLinesToSkip */)); 115 | Metrics metrics = new Metrics(); 116 | double NMI = metrics.normalizedMutualInformation(bench.getNodesCount(), actualCommunities, 117 | predictedCommunities); 118 | LOG.info("NMI value: " + NMI); 119 | } 120 | graphDatabase.shutdown(); 121 | return timeMap; 122 | } 123 | 124 | private static Map> mapNodesToCommunities(List> tabulatedLines) 125 | { 126 | Map> communities = new HashMap>(); 127 | // http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760 128 | // the format of the communityNNNN.dat files have node and community 129 | // number separated by a tab. 130 | // community number starts at 1 and not zero. 131 | for (List line : tabulatedLines) 132 | { 133 | int node = Integer.valueOf(line.get(0)); 134 | int community = Integer.valueOf(line.get(1).trim()) - 1; 135 | if (!communities.containsKey(community)) 136 | { 137 | communities.put(community, new ArrayList()); 138 | } 139 | communities.get(community).add(node); 140 | } 141 | return communities; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/clustering/Cache.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.clustering; 2 | 3 | import java.util.HashSet; 4 | import java.util.Set; 5 | import java.util.concurrent.ExecutionException; 6 | 7 | import com.google.common.cache.CacheBuilder; 8 | import com.google.common.cache.CacheLoader; 9 | import com.google.common.cache.LoadingCache; 10 | 11 | import eu.socialsensor.graphdatabases.GraphDatabase; 12 | 13 | /** 14 | * Cache implementation for the temporary storage of required information of the 15 | * graph databases in order to execute the Louvain Method 16 | * 17 | * @author sotbeis 18 | * @email sotbeis@iti.gr 19 | */ 20 | public class Cache 21 | { 22 | 23 | LoadingCache> nodeCommunitiesMap; // key=nodeCommunity 24 | // value=nodeIds 25 | // contained in 26 | // nodeCommunityC 27 | LoadingCache> communitiesMap; // key=community 28 | // value=nodeIds 29 | // contained in 30 | // community 31 | LoadingCache nodeCommunitiesToCommunities; // key=nodeCommunity 32 | // value=community 33 | LoadingCache> nodeNeighbours; // key=nodeId 34 | // value=nodeId 35 | // neighbors 36 | LoadingCache nodeToCommunityMap; // key=nodeId 37 | // value=communityId 38 | 39 | public Cache(final GraphDatabase graphDatabase, int cacheSize) throws ExecutionException 40 | { 41 | nodeNeighbours = CacheBuilder.newBuilder().maximumSize(cacheSize) 42 | .build(new CacheLoader>() { 43 | public Set load(Integer nodeId) 44 | { 45 | return graphDatabase.getNeighborsIds(nodeId); 46 | } 47 | }); 48 | 49 | nodeCommunitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize) 50 | .build(new CacheLoader>() { 51 | public Set load(Integer nodeCommunityId) 52 | { 53 | return graphDatabase.getNodesFromNodeCommunity(nodeCommunityId); 54 | } 55 | }); 56 | 57 | communitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize) 58 | .build(new CacheLoader>() { 59 | public Set load(Integer communityId) 60 | { 61 | return graphDatabase.getNodesFromCommunity(communityId); 62 | } 63 | }); 64 | 65 | nodeToCommunityMap = CacheBuilder.newBuilder().maximumSize(cacheSize) 66 | .build(new CacheLoader() { 67 | public Integer load(Integer nodeId) 68 | { 69 | return graphDatabase.getCommunityFromNode(nodeId); 70 | } 71 | }); 72 | 73 | nodeCommunitiesToCommunities = CacheBuilder.newBuilder().maximumSize(cacheSize) 74 | .build(new CacheLoader() { 75 | public Integer load(Integer nodeCommunity) 76 | { 77 | return graphDatabase.getCommunity(nodeCommunity); 78 | } 79 | }); 80 | } 81 | 82 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunity) throws ExecutionException 83 | { 84 | Set nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity); 85 | Set communities = new HashSet(); 86 | for (int nodeFromNodeCommunity : nodesFromNodeCommunity) 87 | { 88 | Set neighbors = nodeNeighbours.get(nodeFromNodeCommunity); 89 | for (int neighbor : neighbors) 90 | { 91 | communities.add(nodeToCommunityMap.get(neighbor)); 92 | } 93 | } 94 | return communities; 95 | } 96 | 97 | public void moveNodeCommunity(int nodeCommunity, int toCommunity) throws ExecutionException 98 | { 99 | int fromCommunity = nodeCommunitiesToCommunities.get(nodeCommunity); 100 | nodeCommunitiesToCommunities.put(nodeCommunity, toCommunity); 101 | Set nodesFromCommunity = communitiesMap.get(fromCommunity); 102 | communitiesMap.invalidate(fromCommunity); 103 | communitiesMap.get(toCommunity).addAll(nodesFromCommunity); 104 | Set nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity); 105 | for (int nodeFromCommunity : nodesFromNodeCommunity) 106 | { 107 | nodeToCommunityMap.put(nodeFromCommunity, toCommunity); 108 | } 109 | } 110 | 111 | public double getNodeCommunityWeight(int nodeCommunity) throws ExecutionException 112 | { 113 | Set nodes = nodeCommunitiesMap.get(nodeCommunity); 114 | double weight = 0; 115 | for (int node : nodes) 116 | { 117 | weight += nodeNeighbours.get(node).size(); 118 | } 119 | return weight; 120 | } 121 | 122 | public int getCommunity(int community) throws ExecutionException 123 | { 124 | return nodeCommunitiesToCommunities.get(community); 125 | } 126 | 127 | public int getCommunitySize(int community) throws ExecutionException 128 | { 129 | return communitiesMap.get(community).size(); 130 | } 131 | 132 | public double getEdgesInsideCommunity(int nodeCommunity, int community) throws ExecutionException 133 | { 134 | Set nodeCommunityNodes = nodeCommunitiesMap.get(nodeCommunity); 135 | Set communityNodes = communitiesMap.get(community); 136 | double edges = 0; 137 | for (int nodeCommunityNode : nodeCommunityNodes) 138 | { 139 | for (int communityNode : communityNodes) 140 | { 141 | if (nodeNeighbours.get(nodeCommunityNode).contains(communityNode)) 142 | { 143 | edges++; 144 | } 145 | } 146 | } 147 | return edges; 148 | } 149 | 150 | public void reInitializeCommunities() 151 | { 152 | nodeCommunitiesMap.invalidateAll(); 153 | communitiesMap.invalidateAll(); 154 | nodeToCommunityMap.invalidateAll(); 155 | nodeCommunitiesToCommunities.invalidateAll(); 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.graphdatabases; 2 | 3 | import java.io.File; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | /** 9 | * Represents a graph database 10 | * 11 | * @author sotbeis, sotbeis@iti.gr 12 | * @author Alexander Patrikalakis 13 | */ 14 | public interface GraphDatabase 15 | { 16 | //edge and vertex operations 17 | public VertexType getOtherVertexFromEdge(EdgeType r, VertexType oneVertex); 18 | public VertexType getSrcVertexFromEdge(EdgeType edge); 19 | public VertexType getDestVertexFromEdge(EdgeType edge); 20 | public VertexType getVertex(Integer i); 21 | 22 | //edge iterators 23 | public EdgeIteratorType getAllEdges(); 24 | public EdgeIteratorType getNeighborsOfVertex(VertexType v); 25 | public boolean edgeIteratorHasNext(EdgeIteratorType it); 26 | public EdgeType nextEdge(EdgeIteratorType it); 27 | public void cleanupEdgeIterator(EdgeIteratorType it); 28 | 29 | //vertex iterators 30 | public VertexIteratorType getVertexIterator(); 31 | public boolean vertexIteratorHasNext(VertexIteratorType it); 32 | public VertexType nextVertex(VertexIteratorType it); 33 | public void cleanupVertexIterator(VertexIteratorType it); 34 | 35 | //benchmarks 36 | public void findAllNodeNeighbours(); 37 | public void findNodesOfAllEdges(); 38 | 39 | /** 40 | * Opens the graph database 41 | * 42 | * @param dbPath 43 | * - database path 44 | */ 45 | public void open(); 46 | 47 | /** 48 | * Creates a graph database and configures for single data insertion 49 | * 50 | * @param dbPath 51 | * - database path 52 | */ 53 | public void createGraphForSingleLoad(); 54 | 55 | /** 56 | * Inserts data in massive mode 57 | * 58 | * @param dataPath 59 | * - dataset path 60 | */ 61 | public void massiveModeLoading(File dataPath); 62 | 63 | /** 64 | * Inserts data in single mode 65 | * 66 | * @param dataPath 67 | * - dataset path 68 | * @param resultsPath 69 | * @param scenarioNumber 70 | */ 71 | public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber); 72 | 73 | /** 74 | * Creates a graph database and configures for bulk data insertion 75 | * 76 | * @param dataPath 77 | * - dataset path 78 | */ 79 | public void createGraphForMassiveLoad(); 80 | 81 | /** 82 | * Shut down the graph database 83 | */ 84 | public void shutdown(); 85 | 86 | /** 87 | * Delete the graph database 88 | */ 89 | public void delete(); 90 | 91 | /** 92 | * Shutdown the graph database, which configuration is for massive insertion 93 | * of data 94 | */ 95 | public void shutdownMassiveGraph(); 96 | 97 | /** 98 | * Find the shortest path between vertex 1 and each of the vertexes in the list 99 | * 100 | * @param nodes 101 | * any number of random nodes 102 | */ 103 | public void shortestPaths(Set nodes); 104 | 105 | /** 106 | * Execute findShortestPaths query from the Query interface 107 | * 108 | * @param nodes 109 | * any number of random nodes 110 | */ 111 | public void shortestPath(final VertexType fromNode, Integer node); 112 | 113 | /** 114 | * @return the number of nodes 115 | */ 116 | public int getNodeCount(); 117 | 118 | /** 119 | * @param nodeId 120 | * @return the neighbours of a particular node 121 | */ 122 | public Set getNeighborsIds(int nodeId); 123 | 124 | /** 125 | * @param nodeId 126 | * @return the node degree 127 | */ 128 | public double getNodeWeight(int nodeId); 129 | 130 | /** 131 | * Initializes the community and nodeCommunity property in each database 132 | */ 133 | public void initCommunityProperty(); 134 | 135 | /** 136 | * @param nodeCommunities 137 | * @return the communities (communityId) that are connected with a 138 | * particular nodeCommunity 139 | */ 140 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities); 141 | 142 | /** 143 | * @param community 144 | * @return the nodes a particular community contains 145 | */ 146 | public Set getNodesFromCommunity(int community); 147 | 148 | /** 149 | * @param nodeCommunity 150 | * @return the nodes a particular nodeCommunity contains 151 | */ 152 | public Set getNodesFromNodeCommunity(int nodeCommunity); 153 | 154 | /** 155 | * @param nodeCommunity 156 | * @param communityNodes 157 | * @return the number of edges between a community and a nodeCommunity 158 | */ 159 | public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes); 160 | 161 | /** 162 | * @param community 163 | * @return the sum of node degrees 164 | */ 165 | public double getCommunityWeight(int community); 166 | 167 | /** 168 | * @param nodeCommunity 169 | * @return the sum of node degrees 170 | */ 171 | public double getNodeCommunityWeight(int nodeCommunity); 172 | 173 | /** 174 | * Moves a node from a community to another 175 | * 176 | * @param from 177 | * @param to 178 | */ 179 | public void moveNode(int from, int to); 180 | 181 | /** 182 | * @return the number of edges of the graph database 183 | */ 184 | public double getGraphWeightSum(); 185 | 186 | /** 187 | * Reinitializes the community and nodeCommunity property 188 | * 189 | * @return the number of communities 190 | */ 191 | public int reInitializeCommunities(); 192 | 193 | /** 194 | * @param nodeId 195 | * @return in which community a particular node belongs 196 | */ 197 | public int getCommunityFromNode(int nodeId); 198 | 199 | /** 200 | * @param nodeCommunity 201 | * @return in which community a particular nodeCommunity belongs 202 | */ 203 | public int getCommunity(int nodeCommunity); 204 | 205 | /** 206 | * @param community 207 | * @return the number of nodeCommunities a particular community contains 208 | */ 209 | public int getCommunitySize(int community); 210 | 211 | /** 212 | * @param numberOfCommunities 213 | * @return a map where the key is the community id and the value is the 214 | * nodes each community has. 215 | */ 216 | public Map> mapCommunities(int numberOfCommunities); 217 | 218 | /** 219 | * 220 | * @param nodeId 221 | * @return return true if node exist, false if not 222 | */ 223 | public boolean nodeExists(int nodeId); 224 | } 225 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | import eu.socialsensor.benchmarks.Benchmark; 4 | import eu.socialsensor.benchmarks.ClusteringBenchmark; 5 | import eu.socialsensor.benchmarks.DeleteGraphBenchmark; 6 | import eu.socialsensor.benchmarks.FindNeighboursOfAllNodesBenchmark; 7 | import eu.socialsensor.benchmarks.FindNodesOfAllEdgesBenchmark; 8 | import eu.socialsensor.benchmarks.FindShortestPathBenchmark; 9 | import eu.socialsensor.benchmarks.MassiveInsertionBenchmark; 10 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark; 11 | 12 | import org.apache.commons.configuration.Configuration; 13 | import org.apache.commons.configuration.ConfigurationException; 14 | import org.apache.commons.configuration.PropertiesConfiguration; 15 | import org.apache.commons.io.FileDeleteStrategy; 16 | import org.apache.logging.log4j.Logger; 17 | import org.apache.logging.log4j.LogManager; 18 | 19 | import com.codahale.metrics.CsvReporter; 20 | import com.codahale.metrics.MetricFilter; 21 | import com.codahale.metrics.MetricRegistry; 22 | import com.codahale.metrics.graphite.Graphite; 23 | import com.codahale.metrics.graphite.GraphiteReporter; 24 | 25 | import java.io.File; 26 | import java.io.IOException; 27 | import java.net.InetSocketAddress; 28 | import java.net.URL; 29 | import java.util.Locale; 30 | import java.util.concurrent.ExecutionException; 31 | import java.util.concurrent.TimeUnit; 32 | 33 | /** 34 | * Main class for the execution of GraphDatabaseBenchmark. 35 | * 36 | * @author sotbeis, sotbeis@iti.gr 37 | * @author Alexander Patrikalakis 38 | */ 39 | public class GraphDatabaseBenchmark 40 | { 41 | public static final Logger logger = LogManager.getLogger(); 42 | public static final MetricRegistry metrics = new MetricRegistry(); 43 | public static final String DEFAULT_INPUT_PROPERTIES = "META-INF/input.properties"; 44 | private final BenchmarkConfiguration config; 45 | 46 | public static final Configuration getAppconfigFromClasspath() 47 | { 48 | Configuration appconfig; 49 | try 50 | { 51 | ClassLoader classLoader = GraphDatabaseBenchmark.class.getClassLoader(); 52 | URL resource = classLoader.getResource(DEFAULT_INPUT_PROPERTIES); 53 | appconfig = new PropertiesConfiguration(resource); 54 | } 55 | catch (ConfigurationException e) 56 | { 57 | throw new IllegalArgumentException(String.format( 58 | "Unable to load properties file from classpath because %s", e.getMessage())); 59 | } 60 | return appconfig; 61 | } 62 | 63 | public GraphDatabaseBenchmark(String inputPath) throws IllegalArgumentException 64 | { 65 | final Configuration appconfig; 66 | try 67 | { 68 | appconfig = inputPath == null ? getAppconfigFromClasspath() : new PropertiesConfiguration(new File( 69 | inputPath)); 70 | } 71 | catch (ConfigurationException e) 72 | { 73 | throw new IllegalArgumentException(String.format("Unable to load properties file %s because %s", inputPath, 74 | e.getMessage())); 75 | } 76 | config = new BenchmarkConfiguration(appconfig); 77 | if(config.publishCsvMetrics()) { 78 | final CsvReporter reporter = CsvReporter.forRegistry(metrics) 79 | .formatFor(Locale.US) 80 | .convertRatesTo(TimeUnit.SECONDS) 81 | .convertDurationsTo(TimeUnit.MILLISECONDS) 82 | .build(config.getCsvDir()); 83 | reporter.start(config.getCsvReportingInterval(), TimeUnit.MILLISECONDS); 84 | } 85 | if(config.publishGraphiteMetrics()) { 86 | final Graphite graphite = new Graphite(new InetSocketAddress(config.getGraphiteHostname(), 80 /*port*/)); 87 | final GraphiteReporter reporter = GraphiteReporter.forRegistry(metrics) 88 | .convertRatesTo(TimeUnit.SECONDS) 89 | .convertDurationsTo(TimeUnit.MILLISECONDS) 90 | .filter(MetricFilter.ALL) 91 | .build(graphite); 92 | reporter.start(config.getGraphiteReportingInterval(), TimeUnit.MILLISECONDS); 93 | } 94 | } 95 | 96 | public void run() 97 | { 98 | //MetricRegistry registry = MetricRegistry.name(klass, names) 99 | for (BenchmarkType type : config.getBenchmarkTypes()) 100 | { 101 | runBenchmark(type); 102 | } 103 | } 104 | 105 | private final void runBenchmark(BenchmarkType type) 106 | { 107 | final Benchmark benchmark; 108 | logger.info(type.longname() + " Benchmark Selected"); 109 | switch (type) 110 | { 111 | case MASSIVE_INSERTION: 112 | benchmark = new MassiveInsertionBenchmark(config); 113 | break; 114 | case SINGLE_INSERTION: 115 | benchmark = new SingleInsertionBenchmark(config); 116 | break; 117 | case FIND_ADJACENT_NODES: 118 | benchmark = new FindNodesOfAllEdgesBenchmark(config); 119 | break; 120 | case CLUSTERING: 121 | benchmark = new ClusteringBenchmark(config); 122 | break; 123 | case FIND_NEIGHBOURS: 124 | benchmark = new FindNeighboursOfAllNodesBenchmark(config); 125 | break; 126 | case FIND_SHORTEST_PATH: 127 | benchmark = new FindShortestPathBenchmark(config); 128 | break; 129 | case DELETION: 130 | benchmark = new DeleteGraphBenchmark(config); 131 | break; 132 | default: 133 | throw new UnsupportedOperationException("unsupported benchmark " + type == null ? "null" 134 | : type.toString()); 135 | } 136 | benchmark.startBenchmark(); 137 | } 138 | 139 | /** 140 | * This is the main function. Set the proper property file and run 141 | * 142 | * @throws ExecutionException 143 | */ 144 | public static void main(String[] args) throws ExecutionException 145 | { 146 | final String inputPath = args.length != 1 ? null : args[0]; 147 | GraphDatabaseBenchmark benchmarks = new GraphDatabaseBenchmark(inputPath); 148 | try 149 | { 150 | benchmarks.run(); 151 | } 152 | catch (Throwable t) 153 | { 154 | logger.fatal(t.getMessage()); 155 | System.exit(1); 156 | } 157 | System.exit(0); 158 | } 159 | 160 | public void cleanup() 161 | { 162 | try 163 | { 164 | FileDeleteStrategy.FORCE.delete(config.getDbStorageDirectory()); 165 | } 166 | catch (IOException e) 167 | { 168 | logger.fatal("Unable to clean up db storage directory: " + e.getMessage()); 169 | System.exit(1); 170 | } 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.graphdatabases; 2 | 3 | import java.io.File; 4 | import java.util.Set; 5 | 6 | import org.neo4j.graphdb.Transaction; 7 | import org.neo4j.kernel.GraphDatabaseAPI; 8 | 9 | import com.codahale.metrics.MetricRegistry; 10 | import com.codahale.metrics.Timer; 11 | 12 | import eu.socialsensor.main.GraphDatabaseBenchmark; 13 | import eu.socialsensor.main.GraphDatabaseType; 14 | 15 | @SuppressWarnings("deprecation") 16 | public abstract class GraphDatabaseBase implements GraphDatabase 17 | { 18 | public static final String SIMILAR = "similar"; 19 | public static final String QUERY_CONTEXT = ".eu.socialsensor.query."; 20 | public static final String NODE_ID = "nodeId"; 21 | public static final String NODE_COMMUNITY = "nodeCommunity"; 22 | public static final String COMMUNITY = "community"; 23 | protected final File dbStorageDirectory; 24 | protected final MetricRegistry metrics = new MetricRegistry(); 25 | protected final GraphDatabaseType type; 26 | private final Timer nextVertexTimes; 27 | private final Timer getNeighborsOfVertexTimes; 28 | private final Timer nextEdgeTimes; 29 | private final Timer getOtherVertexFromEdgeTimes; 30 | private final Timer getAllEdgesTimes; 31 | private final Timer shortestPathTimes; 32 | 33 | protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory) 34 | { 35 | this.type = type; 36 | final String queryTypeContext = type.getShortname() + QUERY_CONTEXT; 37 | this.nextVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextVertex"); 38 | this.getNeighborsOfVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getNeighborsOfVertex"); 39 | this.nextEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextEdge"); 40 | this.getOtherVertexFromEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getOtherVertexFromEdge"); 41 | this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges"); 42 | this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath"); 43 | 44 | this.dbStorageDirectory = dbStorageDirectory; 45 | if (!this.dbStorageDirectory.exists()) 46 | { 47 | this.dbStorageDirectory.mkdirs(); 48 | } 49 | } 50 | 51 | @Override 52 | public void findAllNodeNeighbours() { 53 | //get the iterator 54 | Object tx = null; 55 | if(GraphDatabaseType.NEO4J == type) { //TODO fix this 56 | tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx(); 57 | } 58 | try { 59 | VertexIteratorType vertexIterator = this.getVertexIterator(); 60 | while(vertexIteratorHasNext(vertexIterator)) { 61 | VertexType vertex; 62 | Timer.Context ctxt = nextVertexTimes.time(); 63 | try { 64 | vertex = nextVertex(vertexIterator); 65 | } finally { 66 | ctxt.stop(); 67 | } 68 | 69 | final EdgeIteratorType edgeNeighborIterator; 70 | ctxt = getNeighborsOfVertexTimes.time(); 71 | try { 72 | edgeNeighborIterator = this.getNeighborsOfVertex(vertex); 73 | } finally { 74 | ctxt.stop(); 75 | } 76 | while(edgeIteratorHasNext(edgeNeighborIterator)) { 77 | EdgeType edge; 78 | ctxt = nextEdgeTimes.time(); 79 | try { 80 | edge = nextEdge(edgeNeighborIterator); 81 | } finally { 82 | ctxt.stop(); 83 | } 84 | @SuppressWarnings("unused") 85 | Object other; 86 | ctxt = getOtherVertexFromEdgeTimes.time(); 87 | try { 88 | other = getOtherVertexFromEdge(edge, vertex); 89 | } finally { 90 | ctxt.stop(); 91 | } 92 | } 93 | this.cleanupEdgeIterator(edgeNeighborIterator); 94 | } 95 | this.cleanupVertexIterator(vertexIterator); 96 | if(this instanceof Neo4jGraphDatabase) { 97 | ((Transaction) tx).success(); 98 | } 99 | } finally {//TODO fix this 100 | if(GraphDatabaseType.NEO4J == type) { 101 | ((Transaction) tx).finish(); 102 | } 103 | } 104 | } 105 | 106 | @Override 107 | public void findNodesOfAllEdges() { 108 | Object tx = null; 109 | if(GraphDatabaseType.NEO4J == type) {//TODO fix this 110 | tx = ((GraphDatabaseAPI) ((Neo4jGraphDatabase) this).neo4jGraph).tx().unforced().begin(); 111 | } 112 | try { 113 | 114 | EdgeIteratorType edgeIterator; 115 | Timer.Context ctxt = getAllEdgesTimes.time(); 116 | try { 117 | edgeIterator = this.getAllEdges(); 118 | } finally { 119 | ctxt.stop(); 120 | } 121 | 122 | while(edgeIteratorHasNext(edgeIterator)) { 123 | EdgeType edge; 124 | ctxt = nextEdgeTimes.time(); 125 | try { 126 | edge = nextEdge(edgeIterator); 127 | } finally { 128 | ctxt.stop(); 129 | } 130 | @SuppressWarnings("unused") 131 | VertexType source = this.getSrcVertexFromEdge(edge); 132 | @SuppressWarnings("unused") 133 | VertexType destination = this.getDestVertexFromEdge(edge); 134 | } 135 | } finally {//TODO fix this 136 | if(GraphDatabaseType.NEO4J == type) { 137 | ((Transaction) tx).close(); 138 | } 139 | } 140 | } 141 | 142 | @Override 143 | public void shortestPaths(Set nodes) { 144 | Object tx = null; 145 | if(GraphDatabaseType.NEO4J == type) {//TODO fix this 146 | tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx(); 147 | } 148 | try { 149 | //TODO(amcp) change this to use 100+1 random node list and then to use a sublist instead of always choosing node # 1 150 | VertexType from = getVertex(1); 151 | Timer.Context ctxt; 152 | for(Integer i : nodes) { 153 | //time this 154 | ctxt = shortestPathTimes.time(); 155 | try { 156 | shortestPath(from, i); 157 | } finally { 158 | ctxt.stop(); 159 | } 160 | } 161 | if(this instanceof Neo4jGraphDatabase) { 162 | ((Transaction) tx).success(); 163 | } 164 | } finally {//TODO fix this 165 | if(GraphDatabaseType.NEO4J == type) { 166 | ((Transaction) tx).finish(); 167 | } 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | graphdb-benchmarks 2 | ================== 3 | The project graphdb-benchmarks is a benchmark between popular graph dataases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/) and [Sparksee](http://www.sparsity-technologies.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems. 4 | 5 | - *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge. 6 | - *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph. 7 | - *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular dataset. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges. 8 | - *Query Workload (QW)*: we execute three common queries: 9 | * FindNeighbours (FN): finds the neighbours of all nodes. 10 | * FindAdjacentNodes (FA): finds the adjacent nodes of all edges. 11 | * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes. 12 | 13 | Here we measure the execution time of each query. 14 | 15 | For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP dataset collection ([Enron Dataset](http://snap.stanford.edu/data/email-Enron.html), [Amazon dataset](http://snap.stanford.edu/data/amazon0601.html), [Youtube dataset](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal dataset](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded form [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760). 16 | 17 | For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation). 18 | 19 | **Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database. 20 | 21 | **Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf). 22 | 23 | **Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Blueprints 2.5 and added support for the DynamoDB Storage Backend for Titan. 24 | 25 | Instructions 26 | ------------ 27 | To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn dependency:copy-dependencies && mvn test -Pbench` to execute the benchmarking run. 28 | 29 | Results 30 | ------- 31 | This section contains the results of each benchmark. All the measurements are in seconds. 32 | 33 | 34 | ####CW results 35 | Below we list the results of the CW for graphs with 1,000, 5,000, 10,0000, 20,000, 30,000, 40,000, 50,000 nodes. 36 | 37 | | Graph-Cache | Titan | OrientDB | Neo4j | 38 | | ----------- | ----- | -------- | ----- | 39 | |Graph1k-5% |2.39 |**0.92** |2.46 | 40 | |Graph1k-10% |1.45 |**0.59** |2.07 | 41 | |Graph1k-15% |1.30 |**0.58** |1.88 | 42 | |Graph1k-20% |1.25 |**0.55** |1.72 | 43 | |Graph1k-25% |1.19 |**0.49** |1.67 | 44 | |Graph1k-30% |1.15 |**0.48** |1.55 | 45 | | | 46 | |Graph5k-5% |16.01 |**5.88** |12.80 | 47 | |Graph5k-10% |15.10 |**5.67** |12.13 | 48 | |Graph5k-15% |14.63 |**4.81** |11.91 | 49 | |Graph5k-20% |14.16 |**4.62** |11.68 | 50 | |Graph5k-25% |13.76 |**4.51** |11.31 | 51 | |Graph5k-30% |13.38 |**4.45** |10.94 | 52 | | | 53 | |Graph10k-5% |46.06 |**18.20** |34.05 | 54 | |Graph10k-10% |44.59 |**17.92** |32.88 | 55 | |Graph10k-15% |43.68 |**17.31** |31.91 | 56 | |Graph10k-20% |42.48 |**16.88** |31.01 | 57 | |Graph10k-25% |41.32 |**16.58** |30.74 | 58 | |Graph10k-30% |39.98 |**16.34** |30.13 | 59 | | | 60 | |Graph20k-5% |140.46 |**54.01** |87.04 | 61 | |Graph20k-10% |138.10 |**52.51** |85.49 | 62 | |Graph20k-15% |137.25 |**52.12** |82.88 | 63 | |Graph20k-20% |133.11 |**51.68** |82.16 | 64 | |Graph20k-25% |122.48 |**50.79** |79.87 | 65 | |Graph20k-30% |120.94 |**50.49** |78.81 | 66 | | | 67 | |Graph30k-5% |310.25 |**96.38** |154.60 | 68 | |Graph30k-10% |301.80 |**94.98** |151.81 | 69 | |Graph30k-15% |299.27 |**94.85** |151.12 | 70 | |Graph30k-20% |296.43 |**94.67** |146.25 | 71 | |Graph30k-25% |294.33 |**92.62** |144.08 | 72 | |Graph30k-30% |288.50 |**90.13** |142.33 | 73 | | | 74 | |Graph40k-5% |533.29 |**201.19**|250.79 | 75 | |Graph40k-10% |505.91 |**199.18**|244.79 | 76 | |Graph40k-15% |490.39 |**194.34**|242.55 | 77 | |Graph40k-20% |478.31 |**183.14**|241.47 | 78 | |Graph40k-25% |467.18 |**177.55**|237.29 | 79 | |Graph40k-30% |418.07 |**174.65**|229.65 | 80 | | | 81 | |Graph50k-5% |642.42 |**240.58**|348.33 | 82 | |Graph50k-10% |624.36 |**238.35**|344.06 | 83 | |Graph50k-15% |611.70 |**237.65**|340.20 | 84 | |Graph50k-20% |610.40 |**230.76**|337.36 | 85 | |Graph50k-25% |596.29 |**230.03**|332.01 | 86 | |Graph50k-30% |580.44 |**226.31**|325.88 | 87 | 88 | 89 | ####MIW & QW results 90 | Below we list the results of MIW and QW for each dataset. 91 | 92 | | Dataset | Workload | Titan | OrientDB | Neo4j | 93 | | ------- | -------- | ----- | -------- | ----- | 94 | | EN | MIW |9.36 |62.77 |**6.77** | 95 | | AM | MIW |34.00 |97.00 |**10.61** | 96 | | YT | MIW |104.27 |252.15 |**24.69** | 97 | | LJ | MIW |663.03 |9416.74 |**349.55**| 98 | | | 99 | | EN | QW-FN |1.87 |**0.56** |0.95 | 100 | | AM | QW-FN |6.47 |3.50 |**1.85** | 101 | | YT | QW-FN |20.71 |9.34 |**4.51** | 102 | | LJ | QW-FN |213.41 |303.09 |**47.07** | 103 | | | 104 | | EN | QW-FA |3.78 |0.71 |**0.16** | 105 | | AM | QW-FA |13.77 |2.30 |**0.36** | 106 | | YT | QW-FA |42.82 |6.15 |**1.46** | 107 | | LJ | QW-FA |460.25 |518.12 |**16.53** | 108 | | | 109 | | EN | QW-FS |1.63 |3.09 |**0.16** | 110 | | AM | QW-FS |0.12 |83.29 |**0.302** | 111 | | YT | QW-FS |24.87 |23.47 |**0.08** | 112 | | LJ | QW-FS |123.50 |86.87 |**18.13** | 113 | 114 | 115 | ####SIW results 116 | Below we list the results of SIW for each dataset. 117 | 118 | ![siw_benchmark_updated](https://cloud.githubusercontent.com/assets/8163869/12272282/62b1c9f4-b914-11e5-85be-efd3f58e1e05.png) 119 | 125 | 126 | Contact 127 | ------- 128 | For more information or support, please contact: sotbeis@iti.gr, sot.beis@gmail.com, papadop@iti.gr or amcp@me.com. 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/utils/Utils.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.utils; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.LinkedList; 10 | import java.util.List; 11 | import java.util.Map; 12 | import java.util.Map.Entry; 13 | import java.util.SortedMap; 14 | import java.util.TreeMap; 15 | 16 | import org.apache.commons.io.FileUtils; 17 | import org.apache.commons.io.LineIterator; 18 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 19 | import org.apache.commons.math3.util.MathArrays; 20 | import org.apache.logging.log4j.LogManager; 21 | import org.apache.logging.log4j.Logger; 22 | 23 | import eu.socialsensor.graphdatabases.GraphDatabase; 24 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase; 25 | import eu.socialsensor.graphdatabases.OrientGraphDatabase; 26 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase; 27 | import eu.socialsensor.graphdatabases.TitanGraphDatabase; 28 | import eu.socialsensor.main.BenchmarkConfiguration; 29 | import eu.socialsensor.main.BenchmarkingException; 30 | import eu.socialsensor.main.GraphDatabaseType; 31 | 32 | /** 33 | * This class contains all the required utility functions for the benchmark 34 | * 35 | * @author sotbeis, sotbeis@iti.gr 36 | * @author Alexander Patrikalakis 37 | * 38 | */ 39 | public class Utils 40 | { 41 | public static final Logger logger = LogManager.getLogger(); 42 | 43 | public static List> getDocumentsAs2dList(String docPath, int scenarios) 44 | { 45 | List> data = new ArrayList>(scenarios); 46 | for (int i = 0; i < scenarios; i++) 47 | { 48 | File intermediateFile = new File(docPath + "." + (i + 1)); 49 | if (!intermediateFile.exists()) 50 | { 51 | throw new IllegalStateException("file " + intermediateFile.getAbsolutePath() + " does not exist"); 52 | } 53 | data.add(getListFromTextDoc(intermediateFile)); 54 | } 55 | return data; 56 | } 57 | 58 | public static final List readlines(File file) 59 | { 60 | if (file == null || !file.exists()) 61 | { 62 | throw new IllegalArgumentException("file object must not be null and must exist: " + file.getAbsolutePath()); 63 | } 64 | if (!file.isFile() || !(file.isFile() && file.canRead())) 65 | { 66 | throw new IllegalArgumentException("file object must be a readable file: " + file.getAbsolutePath()); 67 | } 68 | LineIterator it; 69 | try 70 | { 71 | it = FileUtils.lineIterator(file, "UTF-8"); 72 | } 73 | catch (IOException e) 74 | { 75 | throw new BenchmarkingException("Unable to read lines from file: " + file.getAbsolutePath(), e); 76 | } 77 | List result = new LinkedList(); 78 | try 79 | { 80 | while (it.hasNext()) 81 | { 82 | result.add(it.nextLine()); 83 | } 84 | } 85 | finally 86 | { 87 | LineIterator.closeQuietly(it); 88 | } 89 | 90 | return result; 91 | } 92 | 93 | public static final List> parseTabulatedLines(List lines, int numberOfLinesToSkip) 94 | { 95 | if (numberOfLinesToSkip < 0 || numberOfLinesToSkip > lines.size()) 96 | { 97 | throw new IllegalArgumentException("can skip at least zero and at most lines.size lines"); 98 | } 99 | List> result = new LinkedList>(); 100 | 101 | lines.subList(numberOfLinesToSkip, lines.size()).parallelStream() 102 | .forEachOrdered(line -> result.add(Arrays.asList(line.split("\t")))); 103 | return result; 104 | } 105 | 106 | public static final List> readTabulatedLines(File file, int numberOfLinesToSkip) 107 | { 108 | return parseTabulatedLines(readlines(file), numberOfLinesToSkip); 109 | } 110 | 111 | public static void deleteRecursively(File file) 112 | { 113 | if (!file.exists()) 114 | { 115 | return; 116 | } 117 | if (file.isDirectory()) 118 | { 119 | for (File child : file.listFiles()) 120 | { 121 | deleteRecursively(child); 122 | } 123 | } 124 | if (!file.delete()) 125 | { 126 | throw new RuntimeException("Couldn't empty database."); 127 | } 128 | } 129 | 130 | public static void deleteMultipleFiles(String filePath, int numberOfFiles) 131 | { 132 | for (int i = 0; i < numberOfFiles; i++) 133 | { 134 | deleteRecursively(new File(filePath + "." + (i + 1))); 135 | } 136 | } 137 | 138 | public static void writeTimes(List insertionTimes, File outputPath) 139 | { 140 | try (BufferedWriter out = new BufferedWriter(new FileWriter(outputPath))) 141 | { 142 | for (Double insertionTime : insertionTimes) 143 | { 144 | out.write(insertionTime.toString()); 145 | out.write("\n"); 146 | } 147 | } 148 | catch (IOException e) 149 | { 150 | throw new BenchmarkingException("unable to write times to: " + outputPath.getAbsolutePath(), e); 151 | } 152 | } 153 | 154 | public static List getListFromTextDoc(File file) 155 | { 156 | List lines = readlines(file); 157 | List values = new ArrayList(lines.size()); 158 | for (String line : lines) 159 | { 160 | values.add(Double.valueOf(line.trim())); 161 | } 162 | return values; 163 | } 164 | 165 | public static T getKeyByValue(Map map, E value) 166 | { 167 | for (Entry entry : map.entrySet()) 168 | { 169 | if (value.equals(entry.getValue())) 170 | { 171 | return entry.getKey(); 172 | } 173 | } 174 | return null; 175 | } 176 | 177 | public static final File generateStorageDirectory(GraphDatabaseType type, File storageBaseDir) 178 | { 179 | return new File(storageBaseDir, type.getShortname()); 180 | } 181 | 182 | public static final GraphDatabase createDatabaseInstance(BenchmarkConfiguration config, GraphDatabaseType type) 183 | { 184 | final GraphDatabase graphDatabase; 185 | final File dbStorageDirectory = generateStorageDirectory(type, config.getDbStorageDirectory()); 186 | if (GraphDatabaseType.TITAN_FLAVORS.contains(type)) 187 | { 188 | graphDatabase = new TitanGraphDatabase(type, config, dbStorageDirectory); 189 | } 190 | else if (GraphDatabaseType.NEO4J == type) 191 | { 192 | graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory); 193 | } 194 | else if (GraphDatabaseType.ORIENT_DB == type) 195 | { 196 | graphDatabase = new OrientGraphDatabase(config, dbStorageDirectory); 197 | } 198 | else if (GraphDatabaseType.SPARKSEE == type) 199 | { 200 | graphDatabase = new SparkseeGraphDatabase(config, dbStorageDirectory); 201 | } 202 | else 203 | { 204 | // For safety, will handle the null case 205 | throw new IllegalArgumentException("Unknown type: " + type == null ? "null" : type.toString()); 206 | } 207 | 208 | return graphDatabase; 209 | } 210 | 211 | public static void createMassiveLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config) 212 | { 213 | final GraphDatabase graphDatabase = createDatabaseInstance(config, type); 214 | graphDatabase.createGraphForMassiveLoad(); 215 | graphDatabase.massiveModeLoading(config.getDataset()); 216 | graphDatabase.shutdownMassiveGraph(); 217 | } 218 | 219 | /** 220 | * Returns a graph database loaded with data in individual transactions. 221 | * Does not shut down the database after the data load 222 | * 223 | * @param type 224 | * database type 225 | * @param config 226 | * for individual databases. 227 | * @return 228 | */ 229 | public static GraphDatabase createSingleLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config) 230 | { 231 | final GraphDatabase graphDatabase = createDatabaseInstance(config, type); 232 | graphDatabase.createGraphForSingleLoad(); 233 | graphDatabase.singleModeLoading(config.getDataset(), null /* resultsPath */, 0); 234 | return graphDatabase; 235 | } 236 | 237 | public static void deleteDatabase(GraphDatabaseType type, BenchmarkConfiguration config) 238 | { 239 | logger.info(String.format("Deleting graph database %s . . . .", type.getShortname())); 240 | 241 | final GraphDatabase graphDatabase = createDatabaseInstance(config, type); 242 | graphDatabase.delete(); 243 | } 244 | 245 | public static double[] convert(List list) 246 | { 247 | if (list == null || list.isEmpty()) 248 | { 249 | return new double[0]; 250 | } 251 | double[] result = new double[list.size()]; 252 | for (int i = 0; i < list.size(); i++) 253 | { 254 | result[i] = list.get(i).doubleValue(); 255 | } 256 | 257 | return result; 258 | } 259 | 260 | /** 261 | * 262 | * @param output 263 | * @param times 264 | * in milliseconds 265 | * @param benchmarkTitle 266 | */ 267 | public static void writeResults(File output, Map> times, String benchmarkTitle) 268 | { 269 | logger.info("Write results to " + output); 270 | if (output.isDirectory()) 271 | { 272 | throw new IllegalArgumentException("output was a directory: " + output.getAbsolutePath()); 273 | } 274 | 275 | SortedMap means = new TreeMap(); 276 | SortedMap standardDeviations = new TreeMap(); 277 | for (GraphDatabaseType type : times.keySet()) 278 | { 279 | final double[] scaledTimesArray = MathArrays.scale(0.001, convert(times.get(type))); 280 | DescriptiveStatistics stats = new DescriptiveStatistics(); 281 | for (double val : scaledTimesArray) 282 | { 283 | stats.addValue(val); 284 | } 285 | means.put(type, stats.getMean()); 286 | standardDeviations.put(type, stats.getStandardDeviation()); 287 | } 288 | 289 | // use two passes so the compute is not interleaved with disk IO 290 | try (BufferedWriter out = new BufferedWriter(new FileWriter(output))) 291 | { 292 | // TODO(amcp) add other sample means p50 p90 p99 p99.9 etc 293 | out.write(String.format("DB,%s p100 Mean (s),Sample Size,Standard Deviation\n", benchmarkTitle)); 294 | for (GraphDatabaseType type : times.keySet()) 295 | { 296 | out.write(String.format("%s,%f,%d,%f\n", type.getShortname(), means.get(type), times.get(type).size(), 297 | standardDeviations.get(type))); 298 | } 299 | } 300 | catch (IOException e) 301 | { 302 | throw new BenchmarkingException(String.format("Exception thrown when writing output to %s: %s", output, 303 | e.getMessage())); 304 | } 305 | } 306 | 307 | public static List calculateMeanList(List> lists) 308 | { 309 | if (lists == null || lists.isEmpty()) 310 | { 311 | return new ArrayList(0); 312 | } 313 | List result = new ArrayList(); 314 | for (List list : lists) 315 | { 316 | result.add(new DescriptiveStatistics(convert(list)).getMean()); 317 | } 318 | return result; 319 | } 320 | } 321 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.graphdatabases; 2 | 3 | import com.google.common.collect.Iterables; 4 | import com.orientechnologies.common.collection.OMultiCollectionIterator; 5 | import com.orientechnologies.common.util.OCallable; 6 | import com.orientechnologies.orient.core.command.OBasicCommandContext; 7 | import com.orientechnologies.orient.core.config.OGlobalConfiguration; 8 | import com.orientechnologies.orient.core.id.ORID; 9 | import com.orientechnologies.orient.core.metadata.schema.OType; 10 | import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath; 11 | import com.tinkerpop.blueprints.Direction; 12 | import com.tinkerpop.blueprints.Edge; 13 | import com.tinkerpop.blueprints.Parameter; 14 | import com.tinkerpop.blueprints.Vertex; 15 | import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph; 16 | import com.tinkerpop.blueprints.impls.orient.OrientEdgeType; 17 | import com.tinkerpop.blueprints.impls.orient.OrientGraph; 18 | import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory; 19 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx; 20 | import com.tinkerpop.blueprints.impls.orient.OrientVertex; 21 | import com.tinkerpop.blueprints.impls.orient.OrientVertexType; 22 | 23 | import eu.socialsensor.insert.Insertion; 24 | import eu.socialsensor.insert.OrientMassiveInsertion; 25 | import eu.socialsensor.insert.OrientSingleInsertion; 26 | import eu.socialsensor.main.BenchmarkConfiguration; 27 | import eu.socialsensor.main.GraphDatabaseType; 28 | import eu.socialsensor.utils.Utils; 29 | 30 | import java.io.File; 31 | import java.util.ArrayList; 32 | import java.util.HashMap; 33 | import java.util.HashSet; 34 | import java.util.Iterator; 35 | import java.util.List; 36 | import java.util.Map; 37 | import java.util.Set; 38 | 39 | /** 40 | * OrientDB graph database implementation 41 | * 42 | * @author sotbeis, sotbeis@iti.gr 43 | * @author Alexander Patrikalakis 44 | */ 45 | public class OrientGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge> 46 | { 47 | 48 | private OrientGraph graph = null; 49 | private boolean useLightWeightEdges; 50 | 51 | // 52 | public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) 53 | { 54 | super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn); 55 | OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing"); 56 | this.useLightWeightEdges = config.orientLightweightEdges() == null ? true : config.orientLightweightEdges() 57 | .booleanValue(); 58 | } 59 | 60 | @Override 61 | public void open() 62 | { 63 | graph = getGraph(dbStorageDirectory); 64 | } 65 | 66 | @SuppressWarnings("deprecation") 67 | @Override 68 | public void createGraphForSingleLoad() 69 | { 70 | OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false); 71 | graph = getGraph(dbStorageDirectory); 72 | createSchema(); 73 | } 74 | 75 | @SuppressWarnings("deprecation") 76 | @Override 77 | public void createGraphForMassiveLoad() 78 | { 79 | OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false); 80 | graph = getGraph(dbStorageDirectory); 81 | createSchema(); 82 | } 83 | 84 | @Override 85 | public void massiveModeLoading(File dataPath) 86 | { 87 | OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(this.graph.getRawGraph().getURL()); 88 | orientMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */); 89 | } 90 | 91 | @Override 92 | public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber) 93 | { 94 | Insertion orientSingleInsertion = new OrientSingleInsertion(this.graph, resultsPath); 95 | orientSingleInsertion.createGraph(dataPath, scenarioNumber); 96 | } 97 | 98 | @Override 99 | public void shutdown() 100 | { 101 | if (graph == null) 102 | { 103 | return; 104 | } 105 | graph.shutdown(); 106 | graph = null; 107 | } 108 | 109 | @Override 110 | public void delete() 111 | { 112 | OrientGraphNoTx g = new OrientGraphNoTx("plocal:" + dbStorageDirectory.getAbsolutePath()); 113 | g.drop(); 114 | 115 | Utils.deleteRecursively(dbStorageDirectory); 116 | } 117 | 118 | @Override 119 | public void shutdownMassiveGraph() 120 | { 121 | shutdown(); 122 | } 123 | 124 | @Override 125 | public void shortestPath(final Vertex v1, Integer i) 126 | { 127 | final OrientVertex v2 = (OrientVertex) getVertex(i); 128 | 129 | List result = new OSQLFunctionShortestPath().execute(graph, 130 | null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 }, 131 | new OBasicCommandContext()); 132 | 133 | result.size(); 134 | } 135 | 136 | @Override 137 | public int getNodeCount() 138 | { 139 | return (int) graph.countVertices(); 140 | } 141 | 142 | @Override 143 | public Set getNeighborsIds(int nodeId) 144 | { 145 | Set neighbours = new HashSet(); 146 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); 147 | for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR)) 148 | { 149 | Integer neighborId = v.getProperty(NODE_ID); 150 | neighbours.add(neighborId); 151 | } 152 | return neighbours; 153 | } 154 | 155 | @Override 156 | public double getNodeWeight(int nodeId) 157 | { 158 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); 159 | double weight = getNodeOutDegree(vertex); 160 | return weight; 161 | } 162 | 163 | public double getNodeInDegree(Vertex vertex) 164 | { 165 | @SuppressWarnings("rawtypes") 166 | OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.IN, SIMILAR); 167 | return (double) result.size(); 168 | } 169 | 170 | public double getNodeOutDegree(Vertex vertex) 171 | { 172 | @SuppressWarnings("rawtypes") 173 | OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.OUT, SIMILAR); 174 | return (double) result.size(); 175 | } 176 | 177 | @Override 178 | public void initCommunityProperty() 179 | { 180 | int communityCounter = 0; 181 | for (Vertex v : graph.getVertices()) 182 | { 183 | ((OrientVertex) v).setProperties(NODE_COMMUNITY, communityCounter, COMMUNITY, communityCounter); 184 | ((OrientVertex) v).save(); 185 | communityCounter++; 186 | } 187 | } 188 | 189 | @Override 190 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities) 191 | { 192 | Set communities = new HashSet(); 193 | Iterable vertices = graph.getVertices(NODE_COMMUNITY, nodeCommunities); 194 | for (Vertex vertex : vertices) 195 | { 196 | for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) 197 | { 198 | int community = v.getProperty(COMMUNITY); 199 | if (!communities.contains(community)) 200 | { 201 | communities.add(community); 202 | } 203 | } 204 | } 205 | return communities; 206 | } 207 | 208 | @Override 209 | public Set getNodesFromCommunity(int community) 210 | { 211 | Set nodes = new HashSet(); 212 | Iterable iter = graph.getVertices(COMMUNITY, community); 213 | for (Vertex v : iter) 214 | { 215 | Integer nodeId = v.getProperty(NODE_ID); 216 | nodes.add(nodeId); 217 | } 218 | return nodes; 219 | } 220 | 221 | @Override 222 | public Set getNodesFromNodeCommunity(int nodeCommunity) 223 | { 224 | Set nodes = new HashSet(); 225 | Iterable iter = graph.getVertices("nodeCommunity", nodeCommunity); 226 | for (Vertex v : iter) 227 | { 228 | Integer nodeId = v.getProperty(NODE_ID); 229 | nodes.add(nodeId); 230 | } 231 | return nodes; 232 | } 233 | 234 | @Override 235 | public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices) 236 | { 237 | double edges = 0; 238 | Iterable vertices = graph.getVertices(NODE_COMMUNITY, vertexCommunity); 239 | Iterable comVertices = graph.getVertices(COMMUNITY, communityVertices); 240 | for (Vertex vertex : vertices) 241 | { 242 | for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR)) 243 | { 244 | if (Iterables.contains(comVertices, v)) 245 | { 246 | edges++; 247 | } 248 | } 249 | } 250 | return edges; 251 | } 252 | 253 | @Override 254 | public double getCommunityWeight(int community) 255 | { 256 | double communityWeight = 0; 257 | Iterable iter = graph.getVertices(COMMUNITY, community); 258 | if (Iterables.size(iter) > 1) 259 | { 260 | for (Vertex vertex : iter) 261 | { 262 | communityWeight += getNodeOutDegree(vertex); 263 | } 264 | } 265 | return communityWeight; 266 | } 267 | 268 | @Override 269 | public double getNodeCommunityWeight(int nodeCommunity) 270 | { 271 | double nodeCommunityWeight = 0; 272 | Iterable iter = graph.getVertices(NODE_COMMUNITY, nodeCommunity); 273 | for (Vertex vertex : iter) 274 | { 275 | nodeCommunityWeight += getNodeOutDegree(vertex); 276 | } 277 | return nodeCommunityWeight; 278 | } 279 | 280 | @Override 281 | public void moveNode(int nodeCommunity, int toCommunity) 282 | { 283 | Iterable fromIter = graph.getVertices(NODE_COMMUNITY, nodeCommunity); 284 | for (Vertex vertex : fromIter) 285 | { 286 | vertex.setProperty(COMMUNITY, toCommunity); 287 | } 288 | } 289 | 290 | @Override 291 | public double getGraphWeightSum() 292 | { 293 | long edges = 0; 294 | for (Vertex o : graph.getVertices()) 295 | { 296 | edges += ((OrientVertex) o).countEdges(Direction.OUT, SIMILAR); 297 | } 298 | return (double) edges; 299 | } 300 | 301 | @Override 302 | public int reInitializeCommunities() 303 | { 304 | Map initCommunities = new HashMap(); 305 | int communityCounter = 0; 306 | for (Vertex v : graph.getVertices()) 307 | { 308 | int communityId = v.getProperty(COMMUNITY); 309 | if (!initCommunities.containsKey(communityId)) 310 | { 311 | initCommunities.put(communityId, communityCounter); 312 | communityCounter++; 313 | } 314 | int newCommunityId = initCommunities.get(communityId); 315 | ((OrientVertex) v).setProperties(COMMUNITY, newCommunityId, NODE_COMMUNITY, newCommunityId); 316 | ((OrientVertex) v).save(); 317 | } 318 | return communityCounter; 319 | } 320 | 321 | @Override 322 | public int getCommunity(int nodeCommunity) 323 | { 324 | final Iterator result = graph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator(); 325 | if (!result.hasNext()) 326 | throw new IllegalArgumentException("node community not found: " + nodeCommunity); 327 | 328 | Vertex vertex = result.next(); 329 | int community = vertex.getProperty(COMMUNITY); 330 | return community; 331 | } 332 | 333 | @Override 334 | public int getCommunityFromNode(int nodeId) 335 | { 336 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next(); 337 | return vertex.getProperty(COMMUNITY); 338 | } 339 | 340 | @Override 341 | public int getCommunitySize(int community) 342 | { 343 | Iterable vertices = graph.getVertices(COMMUNITY, community); 344 | Set nodeCommunities = new HashSet(); 345 | for (Vertex v : vertices) 346 | { 347 | int nodeCommunity = v.getProperty(NODE_COMMUNITY); 348 | if (!nodeCommunities.contains(nodeCommunity)) 349 | { 350 | nodeCommunities.add(nodeCommunity); 351 | } 352 | } 353 | return nodeCommunities.size(); 354 | } 355 | 356 | @Override 357 | public Map> mapCommunities(int numberOfCommunities) 358 | { 359 | Map> communities = new HashMap>(); 360 | for (int i = 0; i < numberOfCommunities; i++) 361 | { 362 | Iterator verticesIter = graph.getVertices(COMMUNITY, i).iterator(); 363 | List vertices = new ArrayList(); 364 | while (verticesIter.hasNext()) 365 | { 366 | Integer nodeId = verticesIter.next().getProperty(NODE_ID); 367 | vertices.add(nodeId); 368 | } 369 | communities.put(i, vertices); 370 | } 371 | return communities; 372 | } 373 | 374 | protected void createSchema() 375 | { 376 | graph.executeOutsideTx(new OCallable() { 377 | @SuppressWarnings({ "unchecked", "rawtypes" }) 378 | @Override 379 | public Object call(final OrientBaseGraph g) 380 | { 381 | OrientVertexType v = g.getVertexBaseType(); 382 | if(!v.existsProperty(NODE_ID)) { // TODO fix schema detection hack later 383 | v.createProperty(NODE_ID, OType.INTEGER); 384 | g.createKeyIndex(NODE_ID, Vertex.class, new Parameter("type", "UNIQUE_HASH_INDEX"), new Parameter( 385 | "keytype", "INTEGER")); 386 | 387 | v.createEdgeProperty(Direction.OUT, SIMILAR, OType.LINKBAG); 388 | v.createEdgeProperty(Direction.IN, SIMILAR, OType.LINKBAG); 389 | OrientEdgeType similar = g.createEdgeType(SIMILAR); 390 | similar.createProperty("out", OType.LINK, v); 391 | similar.createProperty("in", OType.LINK, v); 392 | g.createKeyIndex(COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"), 393 | new Parameter("keytype", "INTEGER")); 394 | g.createKeyIndex(NODE_COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"), 395 | new Parameter("keytype", "INTEGER")); 396 | } 397 | 398 | return null; 399 | } 400 | }); 401 | } 402 | 403 | private OrientGraph getGraph(final File dbPath) 404 | { 405 | OrientGraph g; 406 | OrientGraphFactory graphFactory = new OrientGraphFactory("plocal:" + dbPath.getAbsolutePath()); 407 | g = graphFactory.getTx(); 408 | g.setUseLightweightEdges(this.useLightWeightEdges); 409 | return g; 410 | } 411 | 412 | @Override 413 | public boolean nodeExists(int nodeId) 414 | { 415 | Iterable iter = graph.getVertices(NODE_ID, nodeId); 416 | return iter.iterator().hasNext(); 417 | } 418 | 419 | @Override 420 | public Iterator getVertexIterator() 421 | { 422 | return graph.getVertices().iterator(); 423 | } 424 | 425 | @Override 426 | public Iterator getNeighborsOfVertex(Vertex v) 427 | { 428 | return v.getEdges(Direction.BOTH, SIMILAR).iterator(); 429 | } 430 | 431 | @Override 432 | public void cleanupVertexIterator(Iterator it) 433 | { 434 | // NOOP for timing 435 | } 436 | 437 | @Override 438 | public Vertex getOtherVertexFromEdge(Edge edge, Vertex oneVertex) 439 | { 440 | return edge.getVertex(Direction.IN).equals(oneVertex) ? edge.getVertex(Direction.OUT) : edge.getVertex(Direction.IN); 441 | } 442 | 443 | @Override 444 | public Iterator getAllEdges() 445 | { 446 | return graph.getEdges().iterator(); 447 | } 448 | 449 | @Override 450 | public Vertex getSrcVertexFromEdge(Edge edge) 451 | { 452 | return edge.getVertex(Direction.IN); 453 | } 454 | 455 | @Override 456 | public Vertex getDestVertexFromEdge(Edge edge) 457 | { 458 | return edge.getVertex(Direction.OUT); 459 | } 460 | 461 | @Override 462 | public boolean edgeIteratorHasNext(Iterator it) 463 | { 464 | return it.hasNext(); 465 | } 466 | 467 | @Override 468 | public Edge nextEdge(Iterator it) 469 | { 470 | return it.next(); 471 | } 472 | 473 | @Override 474 | public void cleanupEdgeIterator(Iterator it) 475 | { 476 | // NOOP 477 | } 478 | 479 | @Override 480 | public boolean vertexIteratorHasNext(Iterator it) 481 | { 482 | return it.hasNext(); 483 | } 484 | 485 | @Override 486 | public Vertex nextVertex(Iterator it) 487 | { 488 | return it.next(); 489 | } 490 | 491 | @Override 492 | public Vertex getVertex(Integer i) 493 | { 494 | return graph.getVertices(NODE_ID, i).iterator().next(); 495 | } 496 | } 497 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.main; 2 | 3 | import java.io.File; 4 | import java.util.ArrayList; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.Set; 8 | import java.util.SortedSet; 9 | import java.util.TreeSet; 10 | 11 | import org.apache.commons.configuration.Configuration; 12 | import org.apache.commons.math3.util.CombinatoricsUtils; 13 | 14 | import com.amazon.titan.diskstorage.dynamodb.BackendDataModel; 15 | import com.amazon.titan.diskstorage.dynamodb.Constants; 16 | import com.google.common.primitives.Ints; 17 | import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration; 18 | 19 | import eu.socialsensor.dataset.DatasetFactory; 20 | 21 | /** 22 | * 23 | * @author Alexander Patrikalakis 24 | * 25 | */ 26 | public class BenchmarkConfiguration 27 | { 28 | // orientdb Configuration 29 | private static final String LIGHTWEIGHT_EDGES = "lightweight-edges"; 30 | 31 | // Sparksee / DEX configuration 32 | private static final String LICENSE_KEY = "license-key"; 33 | 34 | // Titan specific configuration 35 | private static final String TITAN = "titan"; 36 | private static final String BUFFER_SIZE = GraphDatabaseConfiguration.BUFFER_SIZE.getName(); 37 | private static final String IDS_BLOCKSIZE = GraphDatabaseConfiguration.IDS_BLOCK_SIZE.getName(); 38 | private static final String PAGE_SIZE = GraphDatabaseConfiguration.PAGE_SIZE.getName(); 39 | public static final String CSV_INTERVAL = GraphDatabaseConfiguration.METRICS_CSV_INTERVAL.getName(); 40 | public static final String CSV = GraphDatabaseConfiguration.METRICS_CSV_NS.getName(); 41 | private static final String CSV_DIR = GraphDatabaseConfiguration.METRICS_CSV_DIR.getName(); 42 | public static final String GRAPHITE = GraphDatabaseConfiguration.METRICS_GRAPHITE_NS.getName(); 43 | private static final String GRAPHITE_HOSTNAME = GraphDatabaseConfiguration.GRAPHITE_HOST.getName(); 44 | 45 | // DynamoDB Storage Backend for Titan specific configuration 46 | private static final String CONSTRUCTOR_ARGS = Constants.DYNAMODB_CREDENTIALS_CONSTRUCTOR_ARGS.getName(); 47 | private static final String CLASS_NAME = Constants.DYNAMODB_CREDENTIALS_CLASS_NAME.getName(); 48 | private static final String CONSISTENT_READ = Constants.DYNAMODB_FORCE_CONSISTENT_READ.getName(); 49 | private static final String TPS = "tps"; 50 | private static final String CREDENTIALS = Constants.DYNAMODB_CLIENT_CREDENTIALS_NAMESPACE.getName(); 51 | private static final String ENDPOINT = Constants.DYNAMODB_CLIENT_ENDPOINT.getName(); 52 | private static final String TABLE_PREFIX = Constants.DYNAMODB_TABLE_PREFIX.getName(); 53 | 54 | // benchmark configuration 55 | private static final String DATASET = "dataset"; 56 | private static final String DATABASE_STORAGE_DIRECTORY = "database-storage-directory"; 57 | private static final String ACTUAL_COMMUNITIES = "actual-communities"; 58 | private static final String NODES_COUNT = "nodes-count"; 59 | private static final String RANDOMIZE_CLUSTERING = "randomize-clustering"; 60 | private static final String CACHE_VALUES = "cache-values"; 61 | private static final String CACHE_INCREMENT_FACTOR = "cache-increment-factor"; 62 | private static final String CACHE_VALUES_COUNT = "cache-values-count"; 63 | private static final String PERMUTE_BENCHMARKS = "permute-benchmarks"; 64 | private static final String RANDOM_NODES = "shortest-path-random-nodes"; 65 | 66 | private static final Set metricsReporters = new HashSet(); 67 | static { 68 | metricsReporters.add(CSV); 69 | metricsReporters.add(GRAPHITE); 70 | } 71 | 72 | private final File dataset; 73 | private final List benchmarkTypes; 74 | private final SortedSet selectedDatabases; 75 | private final File resultsPath; 76 | 77 | // storage directory 78 | private final File dbStorageDirectory; 79 | 80 | // metrics (optional) 81 | private final long csvReportingInterval; 82 | private final File csvDir; 83 | private final String graphiteHostname; 84 | private final long graphiteReportingInterval; 85 | 86 | // storage backend specific settings 87 | private final long dynamodbTps; 88 | private final BackendDataModel dynamodbDataModel; 89 | private final boolean dynamodbConsistentRead; 90 | private final Boolean orientLightweightEdges; 91 | private final String sparkseeLicenseKey; 92 | 93 | // shortest path 94 | private final int randomNodes; 95 | 96 | // clustering 97 | private final Boolean randomizedClustering; 98 | private final Integer nodesCount; 99 | private final Integer cacheValuesCount; 100 | private final Double cacheIncrementFactor; 101 | private final List cacheValues; 102 | private final File actualCommunities; 103 | private final boolean permuteBenchmarks; 104 | private final int scenarios; 105 | private final String dynamodbCredentialsFqClassName; 106 | private final String dynamodbCredentialsCtorArguments; 107 | private final String dynamodbEndpoint; 108 | private final int bufferSize; 109 | private final int blocksize; 110 | private final int pageSize; 111 | private final int dynamodbWorkerThreads; 112 | private final boolean dynamodbPrecreateTables; 113 | private final String dynamodbTablePrefix; 114 | 115 | public String getDynamodbCredentialsFqClassName() 116 | { 117 | return dynamodbCredentialsFqClassName; 118 | } 119 | 120 | public String getDynamodbCredentialsCtorArguments() 121 | { 122 | return dynamodbCredentialsCtorArguments; 123 | } 124 | 125 | public String getDynamodbEndpoint() 126 | { 127 | return dynamodbEndpoint; 128 | } 129 | 130 | public BenchmarkConfiguration(Configuration appconfig) 131 | { 132 | if (appconfig == null) 133 | { 134 | throw new IllegalArgumentException("appconfig may not be null"); 135 | } 136 | 137 | Configuration eu = appconfig.subset("eu"); 138 | Configuration socialsensor = eu.subset("socialsensor"); 139 | 140 | //metrics 141 | final Configuration metrics = socialsensor.subset(GraphDatabaseConfiguration.METRICS_NS.getName()); 142 | 143 | final Configuration graphite = metrics.subset(GRAPHITE); 144 | this.graphiteHostname = graphite.getString(GRAPHITE_HOSTNAME, null); 145 | this.graphiteReportingInterval = graphite.getLong(GraphDatabaseConfiguration.GRAPHITE_INTERVAL.getName(), 1000 /*default 1sec*/); 146 | 147 | final Configuration csv = metrics.subset(CSV); 148 | this.csvReportingInterval = metrics.getLong(CSV_INTERVAL, 1000 /*ms*/); 149 | this.csvDir = csv.containsKey(CSV_DIR) ? new File(csv.getString(CSV_DIR, System.getProperty("user.dir") /*default*/)) : null; 150 | 151 | Configuration dynamodb = socialsensor.subset("dynamodb"); 152 | this.dynamodbWorkerThreads = dynamodb.getInt("workers", 25); 153 | Configuration credentials = dynamodb.subset(CREDENTIALS); 154 | this.dynamodbPrecreateTables = dynamodb.getBoolean("precreate-tables", Boolean.FALSE); 155 | this.dynamodbTps = Math.max(1, dynamodb.getLong(TPS, 750 /*default*/)); 156 | this.dynamodbConsistentRead = dynamodb.containsKey(CONSISTENT_READ) ? dynamodb.getBoolean(CONSISTENT_READ) 157 | : false; 158 | this.dynamodbDataModel = dynamodb.containsKey("data-model") ? BackendDataModel.valueOf(dynamodb 159 | .getString("data-model")) : null; 160 | this.dynamodbCredentialsFqClassName = credentials.containsKey(CLASS_NAME) ? credentials.getString(CLASS_NAME) 161 | : null; 162 | this.dynamodbCredentialsCtorArguments = credentials.containsKey(CONSTRUCTOR_ARGS) ? credentials 163 | .getString(CONSTRUCTOR_ARGS) : null; 164 | this.dynamodbEndpoint = dynamodb.containsKey(ENDPOINT) ? dynamodb.getString(ENDPOINT) : null; 165 | this.dynamodbTablePrefix = dynamodb.containsKey(TABLE_PREFIX) ? dynamodb.getString(TABLE_PREFIX) : Constants.DYNAMODB_TABLE_PREFIX.getDefaultValue(); 166 | 167 | Configuration orient = socialsensor.subset("orient"); 168 | orientLightweightEdges = orient.containsKey(LIGHTWEIGHT_EDGES) ? orient.getBoolean(LIGHTWEIGHT_EDGES) : null; 169 | 170 | Configuration sparksee = socialsensor.subset("sparksee"); 171 | sparkseeLicenseKey = sparksee.containsKey(LICENSE_KEY) ? sparksee.getString(LICENSE_KEY) : null; 172 | 173 | Configuration titan = socialsensor.subset(TITAN); //TODO(amcp) move dynamodb ns into titan 174 | bufferSize = titan.getInt(BUFFER_SIZE, GraphDatabaseConfiguration.BUFFER_SIZE.getDefaultValue()); 175 | blocksize = titan.getInt(IDS_BLOCKSIZE, GraphDatabaseConfiguration.IDS_BLOCK_SIZE.getDefaultValue()); 176 | pageSize = titan.getInt(PAGE_SIZE, GraphDatabaseConfiguration.PAGE_SIZE.getDefaultValue()); 177 | 178 | // database storage directory 179 | if (!socialsensor.containsKey(DATABASE_STORAGE_DIRECTORY)) 180 | { 181 | throw new IllegalArgumentException("configuration must specify database-storage-directory"); 182 | } 183 | dbStorageDirectory = new File(socialsensor.getString(DATABASE_STORAGE_DIRECTORY)); 184 | dataset = validateReadableFile(socialsensor.getString(DATASET), DATASET); 185 | 186 | // load the dataset 187 | DatasetFactory.getInstance().getDataset(dataset); 188 | 189 | if (!socialsensor.containsKey(PERMUTE_BENCHMARKS)) 190 | { 191 | throw new IllegalArgumentException("configuration must set permute-benchmarks to true or false"); 192 | } 193 | permuteBenchmarks = socialsensor.getBoolean(PERMUTE_BENCHMARKS); 194 | 195 | List benchmarkList = socialsensor.getList("benchmarks"); 196 | benchmarkTypes = new ArrayList(); 197 | for (Object str : benchmarkList) 198 | { 199 | benchmarkTypes.add(BenchmarkType.valueOf(str.toString())); 200 | } 201 | 202 | selectedDatabases = new TreeSet(); 203 | for (Object database : socialsensor.getList("databases")) 204 | { 205 | if (!GraphDatabaseType.STRING_REP_MAP.keySet().contains(database.toString())) 206 | { 207 | throw new IllegalArgumentException(String.format("selected database %s not supported", 208 | database.toString())); 209 | } 210 | selectedDatabases.add(GraphDatabaseType.STRING_REP_MAP.get(database)); 211 | } 212 | scenarios = permuteBenchmarks ? Ints.checkedCast(CombinatoricsUtils.factorial(selectedDatabases.size())) : 1; 213 | 214 | resultsPath = new File(System.getProperty("user.dir"), socialsensor.getString("results-path")); 215 | if (!resultsPath.exists() && !resultsPath.mkdirs()) 216 | { 217 | throw new IllegalArgumentException("unable to create results directory"); 218 | } 219 | if (!resultsPath.canWrite()) 220 | { 221 | throw new IllegalArgumentException("unable to write to results directory"); 222 | } 223 | 224 | randomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(100)); 225 | 226 | if (this.benchmarkTypes.contains(BenchmarkType.CLUSTERING)) 227 | { 228 | if (!socialsensor.containsKey(NODES_COUNT)) 229 | { 230 | throw new IllegalArgumentException("the CW benchmark requires nodes-count integer in config"); 231 | } 232 | nodesCount = socialsensor.getInt(NODES_COUNT); 233 | 234 | if (!socialsensor.containsKey(RANDOMIZE_CLUSTERING)) 235 | { 236 | throw new IllegalArgumentException("the CW benchmark requires randomize-clustering bool in config"); 237 | } 238 | randomizedClustering = socialsensor.getBoolean(RANDOMIZE_CLUSTERING); 239 | 240 | if (!socialsensor.containsKey(ACTUAL_COMMUNITIES)) 241 | { 242 | throw new IllegalArgumentException("the CW benchmark requires a file with actual communities"); 243 | } 244 | actualCommunities = validateReadableFile(socialsensor.getString(ACTUAL_COMMUNITIES), ACTUAL_COMMUNITIES); 245 | 246 | final boolean notGenerating = socialsensor.containsKey(CACHE_VALUES); 247 | if (notGenerating) 248 | { 249 | List objects = socialsensor.getList(CACHE_VALUES); 250 | cacheValues = new ArrayList(objects.size()); 251 | cacheValuesCount = null; 252 | cacheIncrementFactor = null; 253 | for (Object o : objects) 254 | { 255 | cacheValues.add(Integer.valueOf(o.toString())); 256 | } 257 | } 258 | else if (socialsensor.containsKey(CACHE_VALUES_COUNT) && socialsensor.containsKey(CACHE_INCREMENT_FACTOR)) 259 | { 260 | cacheValues = null; 261 | // generate the cache values with parameters 262 | if (!socialsensor.containsKey(CACHE_VALUES_COUNT)) 263 | { 264 | throw new IllegalArgumentException( 265 | "the CW benchmark requires cache-values-count int in config when cache-values not specified"); 266 | } 267 | cacheValuesCount = socialsensor.getInt(CACHE_VALUES_COUNT); 268 | 269 | if (!socialsensor.containsKey(CACHE_INCREMENT_FACTOR)) 270 | { 271 | throw new IllegalArgumentException( 272 | "the CW benchmark requires cache-increment-factor int in config when cache-values not specified"); 273 | } 274 | cacheIncrementFactor = socialsensor.getDouble(CACHE_INCREMENT_FACTOR); 275 | } 276 | else 277 | { 278 | throw new IllegalArgumentException( 279 | "when doing CW benchmark, must provide cache-values or parameters to generate them"); 280 | } 281 | } 282 | else 283 | { 284 | randomizedClustering = null; 285 | nodesCount = null; 286 | cacheValuesCount = null; 287 | cacheIncrementFactor = null; 288 | cacheValues = null; 289 | actualCommunities = null; 290 | } 291 | } 292 | 293 | public File getDataset() 294 | { 295 | return dataset; 296 | } 297 | 298 | public SortedSet getSelectedDatabases() 299 | { 300 | return selectedDatabases; 301 | } 302 | 303 | public File getDbStorageDirectory() 304 | { 305 | return dbStorageDirectory; 306 | } 307 | 308 | public File getResultsPath() 309 | { 310 | return resultsPath; 311 | } 312 | 313 | public long getDynamodbTps() 314 | { 315 | return dynamodbTps; 316 | } 317 | 318 | public boolean dynamodbConsistentRead() 319 | { 320 | return dynamodbConsistentRead; 321 | } 322 | 323 | public BackendDataModel getDynamodbDataModel() 324 | { 325 | return dynamodbDataModel; 326 | } 327 | 328 | public List getBenchmarkTypes() 329 | { 330 | return benchmarkTypes; 331 | } 332 | 333 | public Boolean randomizedClustering() 334 | { 335 | return randomizedClustering; 336 | } 337 | 338 | public Integer getNodesCount() 339 | { 340 | return nodesCount; 341 | } 342 | 343 | public Integer getCacheValuesCount() 344 | { 345 | return cacheValuesCount; 346 | } 347 | 348 | public Double getCacheIncrementFactor() 349 | { 350 | return cacheIncrementFactor; 351 | } 352 | 353 | public List getCacheValues() 354 | { 355 | return cacheValues; 356 | } 357 | 358 | public File getActualCommunitiesFile() 359 | { 360 | return actualCommunities; 361 | } 362 | 363 | public Boolean orientLightweightEdges() 364 | { 365 | return orientLightweightEdges; 366 | } 367 | 368 | public String getSparkseeLicenseKey() 369 | { 370 | return sparkseeLicenseKey; 371 | } 372 | 373 | public boolean permuteBenchmarks() 374 | { 375 | return permuteBenchmarks; 376 | } 377 | 378 | public int getScenarios() 379 | { 380 | return scenarios; 381 | } 382 | 383 | private static final File validateReadableFile(String fileName, String fileType) 384 | { 385 | File file = new File(fileName); 386 | if (!file.exists()) 387 | { 388 | throw new IllegalArgumentException(String.format("the %s does not exist", fileType)); 389 | } 390 | 391 | if (!(file.isFile() && file.canRead())) 392 | { 393 | throw new IllegalArgumentException(String.format("the %s must be a file that this user can read", fileType)); 394 | } 395 | return file; 396 | } 397 | 398 | public int getRandomNodes() 399 | { 400 | return randomNodes; 401 | } 402 | 403 | public long getCsvReportingInterval() 404 | { 405 | return csvReportingInterval; 406 | } 407 | 408 | public long getGraphiteReportingInterval() 409 | { 410 | return graphiteReportingInterval; 411 | } 412 | 413 | public File getCsvDir() 414 | { 415 | return csvDir; 416 | } 417 | 418 | public String getGraphiteHostname() 419 | { 420 | return graphiteHostname; 421 | } 422 | 423 | public int getTitanBufferSize() 424 | { 425 | return bufferSize; 426 | } 427 | 428 | public int getTitanIdsBlocksize() 429 | { 430 | return blocksize; 431 | } 432 | 433 | public int getTitanPageSize() 434 | { 435 | return pageSize; 436 | } 437 | 438 | public int getDynamodbWorkerThreads() 439 | { 440 | return dynamodbWorkerThreads; 441 | } 442 | 443 | public boolean getDynamodbPrecreateTables() 444 | { 445 | return dynamodbPrecreateTables; 446 | } 447 | 448 | public String getDynamodbTablePrefix() 449 | { 450 | return dynamodbTablePrefix; 451 | } 452 | 453 | public boolean publishCsvMetrics() 454 | { 455 | return csvDir != null; 456 | } 457 | 458 | public boolean publishGraphiteMetrics() 459 | { 460 | return graphiteHostname != null && !graphiteHostname.isEmpty(); 461 | } 462 | } 463 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | eu.socialsensor 6 | graphdb-benchmarks 7 | 1.0 8 | 9 | graphdb-benchmarks 10 | https://github.com/socialsensor/graphdb-benchmarks 11 | Performance benchmark between popular graph databases. 12 | 13 | 14 | org.sonatype.oss 15 | oss-parent 16 | 7 17 | 18 | 19 | 20 | SocialSensor 21 | http://www.socialsensor.eu/ 22 | 23 | 24 | 25 | 26 | sarovios 27 | Sotiris Beis 28 | sotbeis@iti.gr 29 | 30 | 31 | amcp 32 | Alexander Patrikalakis 33 | amcp@me.com 34 | 35 | 36 | 37 | 38 | 39 | The Apache Software License, Version 2.0 40 | http://www.apache.org/licenses/LICENSE-2.0.txt 41 | repo 42 | 43 | 44 | 45 | 46 | scm:git:git@github.com:socialsensor/graphdb-benchmarks.git 47 | scm:git:git@github.com:socialsensor/graphdb-benchmarks.git 48 | git@github.com:socialsensor/graphdb-benchmarks.git 49 | graphdb-benchmarks-1.0 50 | 51 | 52 | 53 | 2.6.0 54 | 2.2.5 55 | 0.5.4 56 | 0.98.8-hadoop2 57 | 2.0.1 58 | 1.0.0 59 | 2.1 60 | 2.18.1 61 | 1.8 62 | 3.0.0-BETA3 63 | 64 | 65 | 66 | 67 | org.antlr 68 | antlr-runtime 69 | 3.2 70 | 71 | 72 | com.google.guava 73 | guava 74 | 14.0.1 75 | 76 | 77 | colt 78 | colt 79 | 1.2.0 80 | 81 | 82 | commons-codec 83 | commons-codec 84 | 1.7 85 | 86 | 87 | org.apache.commons 88 | commons-collections4 89 | 4.0 90 | 91 | 92 | org.apache.commons 93 | commons-math3 94 | 3.4.1 95 | 96 | 97 | commons-configuration 98 | commons-configuration 99 | 1.6 100 | 101 | 102 | org.apache.commons 103 | commons-lang3 104 | 3.3.2 105 | 106 | 107 | org.apache.logging.log4j 108 | log4j-api 109 | ${log4j2.version} 110 | 111 | 112 | org.apache.logging.log4j 113 | log4j-core 114 | ${log4j2.version} 115 | 116 | 117 | org.apache.geronimo.specs 118 | geronimo-jta_1.1_spec 119 | 1.1.1 120 | 121 | 122 | com.tinkerpop.gremlin 123 | gremlin-groovy 124 | ${blueprints.version} 125 | 126 | 127 | com.tinkerpop.gremlin 128 | gremlin-java 129 | ${blueprints.version} 130 | 131 | 132 | com.github.stephenc.high-scale-lib 133 | high-scale-lib 134 | 1.1.2 135 | 136 | 137 | com.carrotsearch 138 | hppc 139 | 0.4.2 140 | 141 | 142 | com.sleepycat 143 | je 144 | 5.0.73 145 | 146 | 147 | net.java.dev.jna 148 | jna 149 | 4.0.0 150 | 151 | 152 | org.apache.lucene 153 | lucene-core 154 | 3.6.2 155 | 156 | 157 | org.neo4j 158 | neo4j-cypher 159 | ${neo4j.version} 160 | 161 | 162 | org.neo4j 163 | neo4j 164 | ${neo4j.version} 165 | 166 | 167 | com.tinkerpop.blueprints 168 | blueprints-neo4j2-graph 169 | ${blueprints.version} 170 | 171 | 172 | ch.qos.logback 173 | logback-classic 174 | 175 | 176 | 177 | 178 | com.orientechnologies 179 | orientdb-graphdb 180 | ${orientdb.version} 181 | 182 | 183 | com.tinkerpop 184 | pipes 185 | ${blueprints.version} 186 | 187 | 188 | org.slf4j 189 | slf4j-api 190 | 1.7.5 191 | 192 | 193 | org.slf4j 194 | slf4j-log4j12 195 | 1.7.5 196 | 197 | 198 | org.iq80.snappy 199 | snappy 200 | 0.3 201 | 202 | 203 | com.spatial4j 204 | spatial4j 205 | 0.3 206 | 207 | 208 | com.thinkaurelius.titan 209 | titan-berkeleyje 210 | ${titan.version} 211 | 212 | 213 | com.thinkaurelius.titan 214 | titan-cassandra 215 | ${titan.version} 216 | 224 | 225 | 226 | com.thinkaurelius.titan 227 | titan-hbase 228 | ${titan.version} 229 | 230 | 231 | com.thinkaurelius.titan 232 | titan-core 233 | ${titan.version} 234 | 235 | 236 | org.apache.hbase 237 | hbase-client 238 | ${hbase.version} 239 | 240 | 241 | com.amazonaws 242 | dynamodb-titan054-storage-backend 243 | ${dynamodb.titan.version} 244 | 245 | 246 | com.sparsity 247 | sparkseejava 248 | 5.0.0 249 | 250 | 251 | com.tinkerpop.blueprints 252 | blueprints-sparksee-graph 253 | ${blueprints.version} 254 | 255 | 256 | junit 257 | junit 258 | 4.11 259 | test 260 | 261 | 262 | com.codahale.metrics 263 | metrics-core 264 | ${metrics.version} 265 | 266 | 267 | 268 | 269 | 270 | 271 | org.apache.maven.plugins 272 | maven-release-plugin 273 | 2.5 274 | 275 | false 276 | release 277 | deploy 278 | 279 | 280 | 281 | maven-clean-plugin 282 | 2.6.1 283 | 284 | 285 | 286 | ${basedir} 287 | 288 | **/storage 289 | **/results 290 | 291 | false 292 | 293 | 294 | 295 | 296 | 297 | org.sonatype.plugins 298 | nexus-staging-maven-plugin 299 | 1.6.6 300 | true 301 | 302 | sonatype-nexus-staging 303 | https://oss.sonatype.org/ 304 | true 305 | 306 | 307 | 308 | org.apache.maven.plugins 309 | maven-compiler-plugin 310 | 3.2 311 | 312 | ${jdk.version} 313 | ${jdk.version} 314 | 315 | 316 | 317 | org.apache.maven.plugins 318 | maven-source-plugin 319 | 2.4 320 | 321 | 322 | attach-sources 323 | 324 | jar-no-fork 325 | 326 | 327 | 328 | 329 | 330 | org.apache.maven.plugins 331 | maven-javadoc-plugin 332 | 2.10.1 333 | 334 | 335 | attach-javadocs 336 | 337 | jar 338 | 339 | 340 | 341 | 342 | 343 | org.apache.maven.plugins 344 | maven-dependency-plugin 345 | 2.2 346 | 347 | 348 | copy-dependencies 349 | package 350 | 351 | copy-dependencies 352 | 353 | 354 | ${project.build.directory}/dependency 355 | false 356 | false 357 | true 358 | 359 | 360 | 361 | 362 | 363 | org.apache.maven.plugins 364 | maven-surefire-plugin 365 | ${maven.surefire.version} 366 | 367 | true 368 | 369 | 370 | 371 | org.apache.maven.plugins 372 | maven-gpg-plugin 373 | 1.5 374 | 375 | 376 | sign-artifacts 377 | verify 378 | 379 | sign 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | share 390 | 391 | 392 | src/assembly/component.xml 393 | 394 | 395 | 396 | 397 | 398 | maven-assembly-plugin 399 | 2.5.3 400 | 401 | 402 | src/assembly/component.xml 403 | 404 | 405 | 406 | 407 | make-assembly 408 | package 409 | 410 | single 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | bench 420 | 421 | 422 | 423 | maven-surefire-plugin 424 | ${maven.surefire.version} 425 | 426 | 427 | test 428 | 429 | test 430 | 431 | 432 | 433 | **/GraphDatabaseBenchmarkTest.java 434 | 435 | false 436 | 437 | ${basedir}/src/test/resources/META-INF/log4j2.xml 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | org.apache.httpcomponents 451 | httpclient 452 | 4.3.6 453 | 454 | 455 | org.apache.httpcomponents 456 | httpcore 457 | 4.3.3 458 | 459 | 460 | joda-time 461 | joda-time 462 | 2.8.1 463 | 464 | 465 | 466 | 467 | -------------------------------------------------------------------------------- /src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java: -------------------------------------------------------------------------------- 1 | package eu.socialsensor.graphdatabases; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.util.ArrayList; 6 | import java.util.HashMap; 7 | import java.util.HashSet; 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.Set; 11 | 12 | import com.sparsity.sparksee.algorithms.SinglePairShortestPathBFS; 13 | import com.sparsity.sparksee.gdb.AttributeKind; 14 | import com.sparsity.sparksee.gdb.Condition; 15 | import com.sparsity.sparksee.gdb.DataType; 16 | import com.sparsity.sparksee.gdb.Database; 17 | import com.sparsity.sparksee.gdb.EdgeData; 18 | import com.sparsity.sparksee.gdb.EdgesDirection; 19 | import com.sparsity.sparksee.gdb.Graph; 20 | import com.sparsity.sparksee.gdb.Objects; 21 | import com.sparsity.sparksee.gdb.ObjectsIterator; 22 | import com.sparsity.sparksee.gdb.Session; 23 | import com.sparsity.sparksee.gdb.Sparksee; 24 | import com.sparsity.sparksee.gdb.SparkseeConfig; 25 | import com.sparsity.sparksee.gdb.Value; 26 | 27 | import eu.socialsensor.insert.Insertion; 28 | import eu.socialsensor.insert.SparkseeMassiveInsertion; 29 | import eu.socialsensor.insert.SparkseeSingleInsertion; 30 | import eu.socialsensor.main.BenchmarkConfiguration; 31 | import eu.socialsensor.main.BenchmarkingException; 32 | import eu.socialsensor.main.GraphDatabaseType; 33 | import eu.socialsensor.utils.Utils; 34 | 35 | /** 36 | * Sparksee graph database implementation 37 | * 38 | * @author sotbeis, sotbeis@iti.gr 39 | * @author Alexander Patrikalakis 40 | */ 41 | public class SparkseeGraphDatabase extends GraphDatabaseBase 42 | { 43 | public static final String NODE = "node"; 44 | 45 | public static final String INSERTION_TIMES_OUTPUT_PATH = "data/sparksee.insertion.times"; 46 | 47 | private final String sparkseeLicenseKey; 48 | 49 | private boolean readOnly = false; 50 | 51 | double totalWeight; 52 | 53 | private SparkseeConfig sparkseeConfig; 54 | private Sparksee sparksee; 55 | private Database database; 56 | private Session session; 57 | private Graph sparkseeGraph; 58 | 59 | public static int NODE_ATTRIBUTE; 60 | public static int COMMUNITY_ATTRIBUTE; 61 | public static int NODE_COMMUNITY_ATTRIBUTE; 62 | 63 | public static int NODE_TYPE; 64 | 65 | public static int EDGE_TYPE; 66 | 67 | Value value = new Value(); 68 | 69 | public SparkseeGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn) 70 | { 71 | super(GraphDatabaseType.SPARKSEE, dbStorageDirectoryIn); 72 | this.sparkseeLicenseKey = config.getSparkseeLicenseKey(); 73 | } 74 | 75 | @Override 76 | public void open() 77 | { 78 | sparkseeConfig = new SparkseeConfig(); 79 | sparkseeConfig.setLicense(sparkseeLicenseKey); 80 | sparksee = new Sparksee(sparkseeConfig); 81 | try 82 | { 83 | this.database = sparksee.open(getDbFile(dbStorageDirectory), readOnly); 84 | } 85 | catch (FileNotFoundException e) 86 | { 87 | throw new BenchmarkingException("unable to open the db storage directory for sparksee", e); 88 | } 89 | this.session = database.newSession(); 90 | this.sparkseeGraph = session.getGraph(); 91 | createSchema(); 92 | } 93 | 94 | private String getDbFile(File dbPath) 95 | { 96 | return new File(dbPath, "SparkseeDB.gdb").getAbsolutePath(); 97 | } 98 | 99 | @Override 100 | public void createGraphForSingleLoad() 101 | { 102 | try 103 | { 104 | dbStorageDirectory.mkdirs(); 105 | sparkseeConfig = new SparkseeConfig(); 106 | sparkseeConfig.setLicense(sparkseeLicenseKey); 107 | sparksee = new Sparksee(sparkseeConfig); 108 | database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB"); 109 | session = database.newSession(); 110 | sparkseeGraph = session.getGraph(); 111 | createSchema(); 112 | } 113 | catch (FileNotFoundException e) 114 | { 115 | e.printStackTrace(); 116 | } 117 | 118 | } 119 | 120 | @Override 121 | public void createGraphForMassiveLoad() 122 | { 123 | // maybe some more configuration? 124 | try 125 | { 126 | dbStorageDirectory.mkdirs(); 127 | sparkseeConfig = new SparkseeConfig(); 128 | sparkseeConfig.setLicense(sparkseeLicenseKey); 129 | sparksee = new Sparksee(sparkseeConfig); 130 | database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB"); 131 | session = database.newSession(); 132 | sparkseeGraph = session.getGraph(); 133 | createSchema(); 134 | } 135 | catch (FileNotFoundException e) 136 | { 137 | e.printStackTrace(); 138 | } 139 | } 140 | 141 | private void createSchema() 142 | { 143 | NODE_TYPE = sparkseeGraph.newNodeType(NODE); 144 | NODE_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_ID, DataType.String, AttributeKind.Unique); 145 | EDGE_TYPE = sparkseeGraph.newEdgeType(SIMILAR, true, false); 146 | COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, COMMUNITY, DataType.Integer, 147 | AttributeKind.Indexed); 148 | NODE_COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_COMMUNITY, DataType.Integer, 149 | AttributeKind.Indexed); 150 | } 151 | 152 | @Override 153 | public void massiveModeLoading(File dataPath) 154 | { 155 | Insertion sparkseeMassiveInsertion = new SparkseeMassiveInsertion(session); 156 | sparkseeMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */); 157 | } 158 | 159 | @Override 160 | public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber) 161 | { 162 | Insertion sparkseeSingleInsertion = new SparkseeSingleInsertion(this.session, resultsPath); 163 | sparkseeSingleInsertion.createGraph(dataPath, scenarioNumber); 164 | } 165 | 166 | @Override 167 | public void shutdown() 168 | { 169 | if (session != null) 170 | { 171 | session.close(); 172 | session = null; 173 | database.close(); 174 | database = null; 175 | sparksee.close(); 176 | sparksee = null; 177 | } 178 | 179 | } 180 | 181 | @Override 182 | public void shutdownMassiveGraph() 183 | { 184 | shutdown(); 185 | } 186 | 187 | @Override 188 | public void delete() 189 | { 190 | Utils.deleteRecursively(dbStorageDirectory); 191 | } 192 | 193 | @Override 194 | public void shortestPath(final Long srcNodeID, Integer i) 195 | { 196 | int nodeType = sparkseeGraph.findType(NODE); 197 | int edgeType = sparkseeGraph.findType(SIMILAR); 198 | 199 | long dstNodeID = getVertex(i); 200 | SinglePairShortestPathBFS shortestPathBFS = new SinglePairShortestPathBFS(session, srcNodeID, dstNodeID); 201 | shortestPathBFS.addNodeType(nodeType); 202 | shortestPathBFS.addEdgeType(edgeType, EdgesDirection.Outgoing); 203 | shortestPathBFS.setMaximumHops(4); 204 | shortestPathBFS.run(); 205 | shortestPathBFS.close(); 206 | } 207 | 208 | @Override 209 | public int getNodeCount() 210 | { 211 | return (int) sparkseeGraph.countNodes(); 212 | } 213 | 214 | @Override 215 | public Set getNeighborsIds(int nodeId) 216 | { 217 | Set neighbors = new HashSet(); 218 | long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); 219 | Objects neighborsObjects = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); 220 | ObjectsIterator neighborsIter = neighborsObjects.iterator(); 221 | while (neighborsIter.hasNext()) 222 | { 223 | long neighborID = neighborsIter.next(); 224 | Value neighborNodeID = sparkseeGraph.getAttribute(neighborID, NODE_ATTRIBUTE); 225 | neighbors.add(Integer.valueOf(neighborNodeID.getString())); 226 | } 227 | neighborsIter.close(); 228 | neighborsObjects.close(); 229 | return neighbors; 230 | } 231 | 232 | @Override 233 | public double getNodeWeight(int nodeId) 234 | { 235 | long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); 236 | return getNodeOutDegree(nodeID); 237 | } 238 | 239 | public double getNodeInDegree(long node) 240 | { 241 | long inDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Ingoing); 242 | return (double) inDegree; 243 | } 244 | 245 | public double getNodeOutDegree(long node) 246 | { 247 | long outDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Outgoing); 248 | return (double) outDegree; 249 | } 250 | 251 | @Override 252 | public void initCommunityProperty() 253 | { 254 | int communityCounter = 0; 255 | // basic or indexed attribute? 256 | Objects nodes = sparkseeGraph.select(NODE_TYPE); 257 | ObjectsIterator nodesIter = nodes.iterator(); 258 | while (nodesIter.hasNext()) 259 | { 260 | long nodeID = nodesIter.next(); 261 | sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter)); 262 | sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter)); 263 | communityCounter++; 264 | } 265 | nodesIter.close(); 266 | nodes.close(); 267 | } 268 | 269 | @Override 270 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities) 271 | { 272 | Set communities = new HashSet(); 273 | Objects nodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, 274 | value.setInteger(nodeCommunities)); 275 | ObjectsIterator nodesIter = nodes.iterator(); 276 | while (nodesIter.hasNext()) 277 | { 278 | long nodeID = nodesIter.next(); 279 | Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); 280 | ObjectsIterator neighborsIter = neighbors.iterator(); 281 | while (neighborsIter.hasNext()) 282 | { 283 | long neighborID = neighborsIter.next(); 284 | Value community = sparkseeGraph.getAttribute(neighborID, COMMUNITY_ATTRIBUTE); 285 | communities.add(community.getInteger()); 286 | } 287 | neighborsIter.close(); 288 | neighbors.close(); 289 | } 290 | nodesIter.close(); 291 | nodes.close(); 292 | return communities; 293 | } 294 | 295 | @Override 296 | public Set getNodesFromCommunity(int community) 297 | { 298 | Set nodesFromCommunity = new HashSet(); 299 | Objects nodes = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(community)); 300 | ObjectsIterator nodesIter = nodes.iterator(); 301 | while (nodesIter.hasNext()) 302 | { 303 | Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE); 304 | nodesFromCommunity.add(Integer.valueOf(nodeId.getString())); 305 | } 306 | nodesIter.close(); 307 | nodes.close(); 308 | return nodesFromCommunity; 309 | } 310 | 311 | @Override 312 | public Set getNodesFromNodeCommunity(int nodeCommunity) 313 | { 314 | Set nodesFromNodeCommunity = new HashSet(); 315 | Objects nodes = sparkseeGraph 316 | .select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(nodeCommunity)); 317 | ObjectsIterator nodesIter = nodes.iterator(); 318 | while (nodesIter.hasNext()) 319 | { 320 | Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE); 321 | nodesFromNodeCommunity.add(Integer.valueOf(nodeId.getString())); 322 | } 323 | nodesIter.close(); 324 | nodes.close(); 325 | return nodesFromNodeCommunity; 326 | } 327 | 328 | @Override 329 | public double getEdgesInsideCommunity(int nodeCommunity, int communityNode) 330 | { 331 | double edges = 0; 332 | Objects nodesFromNodeCommunitiy = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, 333 | value.setInteger(nodeCommunity)); 334 | Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, 335 | value.setInteger(communityNode)); 336 | ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunitiy.iterator(); 337 | while (nodesFromNodeCommunityIter.hasNext()) 338 | { 339 | long nodeID = nodesFromNodeCommunityIter.next(); 340 | Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing); 341 | ObjectsIterator neighborsIter = neighbors.iterator(); 342 | while (neighborsIter.hasNext()) 343 | { 344 | if (nodesFromCommunity.contains(neighborsIter.next())) 345 | { 346 | edges++; 347 | } 348 | } 349 | neighborsIter.close(); 350 | neighbors.close(); 351 | } 352 | nodesFromNodeCommunityIter.close(); 353 | nodesFromCommunity.close(); 354 | nodesFromNodeCommunitiy.close(); 355 | return edges; 356 | } 357 | 358 | @Override 359 | public double getCommunityWeight(int community) 360 | { 361 | double communityWeight = 0; 362 | Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, 363 | value.setInteger(community)); 364 | ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator(); 365 | if (nodesFromCommunity.size() > 1) 366 | { 367 | while (nodesFromCommunityIter.hasNext()) 368 | { 369 | communityWeight += getNodeOutDegree(nodesFromCommunityIter.next()); 370 | } 371 | } 372 | nodesFromCommunityIter.close(); 373 | nodesFromCommunity.close(); 374 | return communityWeight; 375 | } 376 | 377 | @Override 378 | public double getNodeCommunityWeight(int nodeCommunity) 379 | { 380 | double nodeCommunityWeight = 0; 381 | Objects nodesFromNodeCommunity = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, 382 | value.setInteger(nodeCommunity)); 383 | ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunity.iterator(); 384 | if (nodesFromNodeCommunity.size() > 1) 385 | { 386 | while (nodesFromNodeCommunityIter.hasNext()) 387 | { 388 | nodeCommunityWeight += getNodeOutDegree(nodesFromNodeCommunityIter.next()); 389 | } 390 | } 391 | nodesFromNodeCommunityIter.close(); 392 | nodesFromNodeCommunity.close(); 393 | return nodeCommunityWeight; 394 | } 395 | 396 | @Override 397 | public void moveNode(int nodeCommunity, int toCommunity) 398 | { 399 | Objects fromNodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, 400 | value.setInteger(nodeCommunity)); 401 | ObjectsIterator fromNodesIter = fromNodes.iterator(); 402 | while (fromNodesIter.hasNext()) 403 | { 404 | sparkseeGraph.setAttribute(fromNodesIter.next(), COMMUNITY_ATTRIBUTE, value.setInteger(toCommunity)); 405 | } 406 | fromNodesIter.close(); 407 | fromNodes.close(); 408 | } 409 | 410 | @Override 411 | public double getGraphWeightSum() 412 | { 413 | return (double) sparkseeGraph.countEdges(); 414 | } 415 | 416 | @Override 417 | public int reInitializeCommunities() 418 | { 419 | Map initCommunities = new HashMap(); 420 | int communityCounter = 0; 421 | Objects nodes = sparkseeGraph.select(NODE_TYPE); 422 | ObjectsIterator nodesIter = nodes.iterator(); 423 | while (nodesIter.hasNext()) 424 | { 425 | long nodeID = nodesIter.next(); 426 | Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); 427 | if (!initCommunities.containsKey(communityId.getInteger())) 428 | { 429 | initCommunities.put(communityId.getInteger(), communityCounter); 430 | communityCounter++; 431 | } 432 | int newCommunityId = initCommunities.get(communityId.getInteger()); 433 | sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId)); 434 | sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId)); 435 | } 436 | nodesIter.close(); 437 | nodes.close(); 438 | return communityCounter; 439 | } 440 | 441 | @Override 442 | public int getCommunity(int nodeCommunity) 443 | { 444 | long nodeID = sparkseeGraph.findObject(NODE_COMMUNITY_ATTRIBUTE, value.setInteger(nodeCommunity)); 445 | Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); 446 | return communityId.getInteger(); 447 | } 448 | 449 | @Override 450 | public int getCommunityFromNode(int nodeId) 451 | { 452 | long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId))); 453 | Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE); 454 | return communityId.getInteger(); 455 | } 456 | 457 | @Override 458 | public int getCommunitySize(int community) 459 | { 460 | Objects nodesFromCommunities = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, 461 | value.setInteger(community)); 462 | ObjectsIterator nodesFromCommunitiesIter = nodesFromCommunities.iterator(); 463 | Set nodeCommunities = new HashSet(); 464 | while (nodesFromCommunitiesIter.hasNext()) 465 | { 466 | Value nodeCommunityId = sparkseeGraph.getAttribute(nodesFromCommunitiesIter.next(), 467 | NODE_COMMUNITY_ATTRIBUTE); 468 | nodeCommunities.add(nodeCommunityId.getInteger()); 469 | } 470 | nodesFromCommunitiesIter.close(); 471 | nodesFromCommunities.close(); 472 | return nodeCommunities.size(); 473 | } 474 | 475 | @Override 476 | public Map> mapCommunities(int numberOfCommunities) 477 | { 478 | Map> communities = new HashMap>(); 479 | for (int i = 0; i < numberOfCommunities; i++) 480 | { 481 | Objects nodesFromCommunity = sparkseeGraph 482 | .select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(i)); 483 | ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator(); 484 | List nodes = new ArrayList(); 485 | while (nodesFromCommunityIter.hasNext()) 486 | { 487 | Value nodeId = sparkseeGraph.getAttribute(nodesFromCommunityIter.next(), NODE_ATTRIBUTE); 488 | nodes.add(Integer.valueOf(nodeId.getString())); 489 | } 490 | communities.put(i, nodes); 491 | nodesFromCommunityIter.close(); 492 | nodesFromCommunity.close(); 493 | } 494 | return communities; 495 | } 496 | 497 | @Override 498 | public boolean nodeExists(int nodeId) 499 | { 500 | Objects nodes = sparkseeGraph.select(NODE_ATTRIBUTE, Condition.Equal, value.setInteger(nodeId)); 501 | ObjectsIterator nodesIter = nodes.iterator(); 502 | if (nodesIter.hasNext()) 503 | { 504 | nodesIter.close(); 505 | nodes.close(); 506 | return true; 507 | } 508 | nodesIter.close(); 509 | nodes.close(); 510 | return false; 511 | } 512 | 513 | @Override 514 | public ObjectsIterator getVertexIterator() 515 | { 516 | final int nodeType = sparkseeGraph.findType(NODE); 517 | final Objects objects = sparkseeGraph.select(nodeType); 518 | return objects.iterator(); 519 | } 520 | 521 | @Override 522 | public ObjectsIterator getNeighborsOfVertex(Long v) 523 | { 524 | final int edgeType = sparkseeGraph.findType(SIMILAR); 525 | final Objects neighbors = sparkseeGraph.neighbors(v, edgeType, EdgesDirection.Any); 526 | return neighbors.iterator(); 527 | } 528 | 529 | @Override 530 | public void cleanupVertexIterator(ObjectsIterator it) 531 | { 532 | it.close(); 533 | } 534 | 535 | @Override 536 | public Long getOtherVertexFromEdge(Long r, Long oneVertex) 537 | { 538 | return r; //pass through 539 | } 540 | 541 | @Override 542 | public ObjectsIterator getAllEdges() 543 | { 544 | int edgeType = sparkseeGraph.findType(SIMILAR); 545 | Objects objects = sparkseeGraph.select(edgeType); 546 | return objects.iterator(); 547 | } 548 | 549 | @Override 550 | public Long getSrcVertexFromEdge(Long edge) 551 | { 552 | EdgeData edgeData = sparkseeGraph.getEdgeData(edge); 553 | return edgeData.getTail(); 554 | } 555 | 556 | @Override 557 | public Long getDestVertexFromEdge(Long edge) 558 | { 559 | EdgeData edgeData = sparkseeGraph.getEdgeData(edge); 560 | return edgeData.getHead(); 561 | } 562 | 563 | @Override 564 | public boolean edgeIteratorHasNext(ObjectsIterator it) 565 | { 566 | return it.hasNext(); 567 | } 568 | 569 | @Override 570 | public Long nextEdge(ObjectsIterator it) 571 | { 572 | return it.next(); 573 | } 574 | 575 | @Override 576 | public void cleanupEdgeIterator(ObjectsIterator it) 577 | { 578 | it.close(); 579 | } 580 | 581 | @Override 582 | public boolean vertexIteratorHasNext(ObjectsIterator it) 583 | { 584 | return it.hasNext(); 585 | } 586 | 587 | @Override 588 | public Long nextVertex(ObjectsIterator it) 589 | { 590 | return it.next(); 591 | } 592 | 593 | @Override 594 | public Long getVertex(Integer i) 595 | { 596 | int nodeType = sparkseeGraph.findType(NODE); 597 | int nodeAttribute = sparkseeGraph.findAttribute(nodeType, NODE_ID); 598 | return sparkseeGraph.findObject(nodeAttribute, value.setInteger(i)); 599 | } 600 | } 601 | --------------------------------------------------------------------------------