├── src
    ├── main
    │   └── java
    │   │   └── eu
    │   │       └── socialsensor
    │   │           ├── benchmarks
    │   │               ├── InsertsGraphData.java
    │   │               ├── RequiresGraphData.java
    │   │               ├── Benchmark.java
    │   │               ├── DeleteGraphBenchmark.java
    │   │               ├── FindNodesOfAllEdgesBenchmark.java
    │   │               ├── FindNeighboursOfAllNodesBenchmark.java
    │   │               ├── FindShortestPathBenchmark.java
    │   │               ├── MassiveInsertionBenchmark.java
    │   │               ├── BenchmarkBase.java
    │   │               ├── SingleInsertionBenchmark.java
    │   │               ├── PermutingBenchmarkBase.java
    │   │               └── ClusteringBenchmark.java
    │   │           ├── insert
    │   │               ├── Insertion.java
    │   │               ├── SparkseeSingleInsertion.java
    │   │               ├── SparkseeMassiveInsertion.java
    │   │               ├── Neo4jMassiveInsertion.java
    │   │               ├── TitanMassiveInsertion.java
    │   │               ├── TitanSingleInsertion.java
    │   │               ├── OrientMassiveInsertion.java
    │   │               ├── OrientSingleInsertion.java
    │   │               ├── OrientAbstractInsertion.java
    │   │               ├── Neo4jSingleInsertion.java
    │   │               └── InsertionBase.java
    │   │           ├── main
    │   │               ├── BenchmarkingException.java
    │   │               ├── BenchmarkType.java
    │   │               ├── GraphDatabaseType.java
    │   │               ├── GraphDatabaseBenchmark.java
    │   │               └── BenchmarkConfiguration.java
    │   │           ├── dataset
    │   │               ├── DatasetFactory.java
    │   │               └── Dataset.java
    │   │           ├── utils
    │   │               ├── PermuteMethod.java
    │   │               ├── Metrics.java
    │   │               └── Utils.java
    │   │           ├── clustering
    │   │               ├── LouvainMethod.java
    │   │               └── Cache.java
    │   │           └── graphdatabases
    │   │               ├── GraphDatabase.java
    │   │               ├── GraphDatabaseBase.java
    │   │               ├── OrientGraphDatabase.java
    │   │               └── SparkseeGraphDatabase.java
    └── test
    │   ├── resources
    │       └── META-INF
    │       │   ├── log4j2.xml
    │       │   ├── log4j.properties
    │       │   └── input.properties
    │   └── java
    │       └── eu
    │           └── socialsensor
    │               └── main
    │                   └── GraphDatabaseBenchmarkTest.java
├── .gitignore
├── NOTICE
├── README.md
├── LICENSE
└── pom.xml


/src/main/java/eu/socialsensor/benchmarks/InsertsGraphData.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 | 
3 | public interface InsertsGraphData extends Benchmark
4 | {
5 | 
6 | }
7 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/RequiresGraphData.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 | 
3 | public interface RequiresGraphData extends Benchmark
4 | {
5 | 
6 | }
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | target/*
 2 | *.class
 3 | /target
 4 | /data
 5 | /.project
 6 | /.gitignore
 7 | /.settings
 8 | /.classpath
 9 | .idea/
10 | graphdb-benchmarks.iml
11 | metrics/
12 | results/
13 | storage/
14 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/Benchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | /**
 4 |  * Represents the benchmarks
 5 |  * 
 6 |  * @author sotbeis
 7 |  * @email sotbeis@iti.gr
 8 |  */
 9 | public interface Benchmark
10 | {
11 | 
12 |     /**
13 |      * Start the selected benchmark
14 |      */
15 |     public void startBenchmark();
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/src/test/resources/META-INF/log4j2.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <Configuration status="fatal">
 3 |   <Appenders>
 4 |     <Console name="Console" target="SYSTEM_OUT">
 5 |       <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %c{1} - %msg%n"/>
 6 |     </Console>
 7 |   </Appenders>
 8 |   <Loggers>
 9 |     <Root level="info">
10 |       <AppenderRef ref="Console"/>
11 |     </Root>
12 |   </Loggers>
13 | </Configuration>
14 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | This product includes software developed by Information Technologies Institute
2 | (CERTH, 57001, Thermi, Greece), and the following individuals:
3 |  * Sotiris Beis
4 |  * Alexander Patrikalakis
5 | 
6 | It also includes software from other open source projects including,
7 | but not limited to (check pom.xml for complete listing):
8 | cassandra.yaml file came from:
9 | https://github.com/thinkaurelius/titan/blob/titan05/titan-cassandra/config/cassandra/cassandra.yaml


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Insertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | /**
 6 |  * Represents the insertion of data in each graph database
 7 |  * 
 8 |  * @author sotbeis, sotbeis@iti.gr
 9 |  */
10 | public interface Insertion
11 | {
12 | 
13 |     /**
14 |      * Loads the data in each graph database
15 |      * 
16 |      * @param datasetDir
17 |      */
18 |     public void createGraph(File dataset, int scenarioNumber);
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/resources/META-INF/log4j.properties:
--------------------------------------------------------------------------------
 1 | log4j.rootLogger=info, stdout
 2 | #log4j.logger.com.amazon.titan=trace
 3 | #log4j.logger.com.thinkaurelius=debug
 4 | log4j.logger.com.amazonaws=off
 5 | log4j.appender.stdout=org.apache.logging.log4j.core.appender.ConsoleAppender
 6 | log4j.appender.stdout.layout=org.apache.logging.log4j.core.layout.PatternLayout
 7 | 
 8 | # Pattern to output the caller's file name and line number.
 9 | log4j.appender.stdout.layout.ConversionPattern=%d (%t) [%5p] (%F:%L) - %m%n
10 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/BenchmarkingException.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.main;
 2 | 
 3 | public class BenchmarkingException extends RuntimeException
 4 | {
 5 | 
 6 |     /**
 7 |      * 
 8 |      */
 9 |     private static final long serialVersionUID = -4165548376731455231L;
10 | 
11 |     public BenchmarkingException(String message)
12 |     {
13 |         super(message);
14 |     }
15 | 
16 |     public BenchmarkingException(String message, Throwable cause)
17 |     {
18 |         super(message, cause);
19 |     }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.main;
 2 | 
 3 | import static org.junit.Assert.fail;
 4 | import org.junit.Test;
 5 | 
 6 | public class GraphDatabaseBenchmarkTest
 7 | {
 8 |     @Test
 9 |     public void testGraphDatabaseBenchmark()
10 |     {
11 |         GraphDatabaseBenchmark bench = new GraphDatabaseBenchmark(null /* inputPath */);
12 |         try
13 |         {
14 |             bench.run();
15 |         }
16 |         catch (Exception e)
17 |         {
18 |             e.printStackTrace();
19 |             fail("Got unexpected exception: " + e.getMessage());
20 |         }
21 | 
22 |         //bench.cleanup();
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/dataset/DatasetFactory.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.dataset;
 2 | 
 3 | import java.io.File;
 4 | import java.util.HashMap;
 5 | import java.util.Map;
 6 | 
 7 | /**
 8 |  * 
 9 |  * @author Alexander Patrikalakis
10 |  *
11 |  */
12 | public class DatasetFactory
13 | {
14 |     private static DatasetFactory theInstance = null;
15 |     private final Map<File, Dataset> datasetMap;
16 | 
17 |     private DatasetFactory()
18 |     {
19 |         datasetMap = new HashMap<File, Dataset>();
20 |     }
21 | 
22 |     public static DatasetFactory getInstance()
23 |     {
24 |         if (theInstance == null)
25 |         {
26 |             theInstance = new DatasetFactory();
27 |         }
28 |         return theInstance;
29 |     }
30 | 
31 |     public Dataset getDataset(File datasetFile)
32 |     {
33 |         if (!datasetMap.containsKey(datasetFile))
34 |         {
35 |             datasetMap.put(datasetFile, new Dataset(datasetFile));
36 |         }
37 | 
38 |         return datasetMap.get(datasetFile);
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.util.concurrent.TimeUnit;
 4 | 
 5 | import com.google.common.base.Stopwatch;
 6 | 
 7 | import eu.socialsensor.main.BenchmarkConfiguration;
 8 | import eu.socialsensor.main.BenchmarkType;
 9 | import eu.socialsensor.main.GraphDatabaseType;
10 | import eu.socialsensor.utils.Utils;
11 | 
12 | /**
13 |  * Benchmark that measures the time requried to delete a graph
14 |  * @author Alexander Patrikalakis
15 |  *
16 |  */
17 | public class DeleteGraphBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
18 | {
19 |     public DeleteGraphBenchmark(BenchmarkConfiguration bench)
20 |     {
21 |         super(bench, BenchmarkType.DELETION);
22 |     }
23 | 
24 |     @Override
25 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
26 |     {
27 |         Stopwatch watch = new Stopwatch();
28 |         watch.start();
29 |         Utils.deleteDatabase(type, bench);
30 |         times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.util.concurrent.TimeUnit;
 4 | 
 5 | import com.google.common.base.Stopwatch;
 6 | 
 7 | import eu.socialsensor.graphdatabases.GraphDatabase;
 8 | import eu.socialsensor.main.BenchmarkConfiguration;
 9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 | 
13 | /**
14 |  * FindNodesOfAllEdgesBenchmark implementation
15 |  * 
16 |  * @author sotbeis, sotbeis@iti.gr
17 |  * @author Alexander Patrikalakis
18 |  */
19 | public class FindNodesOfAllEdgesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
20 | {
21 |     public FindNodesOfAllEdgesBenchmark(BenchmarkConfiguration config)
22 |     {
23 |         super(config, BenchmarkType.FIND_ADJACENT_NODES);
24 |     }
25 | 
26 |     @Override
27 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
28 |     {
29 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
30 |         graphDatabase.open();
31 |         Stopwatch watch = new Stopwatch();
32 |         watch.start();
33 |         graphDatabase.findNodesOfAllEdges();
34 |         graphDatabase.shutdown();
35 |         times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | import com.sparsity.sparksee.gdb.Graph;
 6 | import com.sparsity.sparksee.gdb.Session;
 7 | import com.sparsity.sparksee.gdb.Value;
 8 | 
 9 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | 
12 | public class SparkseeSingleInsertion extends InsertionBase<Long>
13 | {
14 |     private final Session session;
15 |     private final Graph sparkseeGraph;
16 | 
17 |     Value value = new Value();
18 | 
19 |     public SparkseeSingleInsertion(Session session, File resultsPath)
20 |     {
21 |         // no temp files for massive load insert
22 |         super(GraphDatabaseType.SPARKSEE, resultsPath);
23 |         this.session = session;
24 |         this.sparkseeGraph = session.getGraph();
25 |     }
26 | 
27 |     @Override
28 |     public Long getOrCreate(String value)
29 |     {
30 |         Value sparkseeValue = new Value();
31 |         return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value));
32 |     }
33 | 
34 |     @Override
35 |     public void relateNodes(Long src, Long dest)
36 |     {
37 |         session.begin();
38 |         sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest);
39 |         session.commit();
40 |     }
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.util.concurrent.TimeUnit;
 4 | 
 5 | import com.google.common.base.Stopwatch;
 6 | 
 7 | import eu.socialsensor.graphdatabases.GraphDatabase;
 8 | import eu.socialsensor.main.BenchmarkConfiguration;
 9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 | 
13 | /**
14 |  * FindNeighboursOfAllNodesBenchmark implementation
15 |  * 
16 |  * @author sotbeis, sotbeis@iti.gr
17 |  * @author Alexander Patrikalakis
18 |  */
19 | public class FindNeighboursOfAllNodesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
20 | {
21 |     public FindNeighboursOfAllNodesBenchmark(BenchmarkConfiguration config)
22 |     {
23 |         super(config, BenchmarkType.FIND_NEIGHBOURS);
24 |     }
25 | 
26 |     @Override
27 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
28 |     {
29 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
30 |         graphDatabase.open();
31 |         Stopwatch watch = new Stopwatch();
32 |         watch.start();
33 |         graphDatabase.findAllNodeNeighbours();
34 |         graphDatabase.shutdown();
35 |         times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/BenchmarkType.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.main;
 2 | 
 3 | import java.util.HashSet;
 4 | import java.util.Set;
 5 | 
 6 | /**
 7 |  * 
 8 |  * @author Alexander Patrikalakis
 9 |  *
10 |  */
11 | public enum BenchmarkType
12 | {
13 |     MASSIVE_INSERTION("Massive Insertion", "MassiveInsertion"), SINGLE_INSERTION("Single Insertion", "SingleInsertion"), DELETION(
14 |         "Delete Graph", "DeleteGraph"), FIND_NEIGHBOURS("Find Neighbours of All Nodes", "FindNeighbours"), FIND_ADJACENT_NODES(
15 |         "Find Adjacent Nodes of All Edges", "FindAdjacent"), FIND_SHORTEST_PATH("Find Shortest Path", "FindShortest"), CLUSTERING(
16 |         "Clustering", "Clustering");
17 | 
18 |     public static final Set<BenchmarkType> INSERTING_BENCHMARK_SET = new HashSet<BenchmarkType>();
19 |     static
20 |     {
21 |         INSERTING_BENCHMARK_SET.add(MASSIVE_INSERTION);
22 |         INSERTING_BENCHMARK_SET.add(SINGLE_INSERTION);
23 |     }
24 | 
25 |     private final String longname;
26 |     private final String filenamePrefix;
27 | 
28 |     private BenchmarkType(String longName, String filenamePrefix)
29 |     {
30 |         this.longname = longName;
31 |         this.filenamePrefix = filenamePrefix;
32 |     }
33 | 
34 |     public String longname()
35 |     {
36 |         return longname;
37 |     }
38 | 
39 |     public String getResultsFileName()
40 |     {
41 |         return filenamePrefix + ".csv";
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import com.sparsity.sparksee.gdb.Graph;
 4 | import com.sparsity.sparksee.gdb.Session;
 5 | import com.sparsity.sparksee.gdb.Value;
 6 | 
 7 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
 8 | import eu.socialsensor.main.GraphDatabaseType;
 9 | 
10 | public class SparkseeMassiveInsertion extends InsertionBase<Long> implements Insertion
11 | {
12 |     private final Session session;
13 |     private final Graph sparkseeGraph;
14 |     private int operations;
15 | 
16 |     public SparkseeMassiveInsertion(Session session)
17 |     {
18 |         super(GraphDatabaseType.SPARKSEE, null /* resultsPath */);
19 |         this.session = session;
20 |         this.sparkseeGraph = session.getGraph();
21 |         this.operations = 0;
22 |     }
23 | 
24 |     @Override
25 |     public Long getOrCreate(String value)
26 |     {
27 |         Value sparkseeValue = new Value();
28 |         return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value));
29 |     }
30 | 
31 |     @Override
32 |     public void relateNodes(Long src, Long dest)
33 |     {
34 |         sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest);
35 |         operations++;
36 |         if (operations == 10000)
37 |         {
38 |             session.commit();
39 |             session.begin();
40 |             operations = 0;
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Neo4jMassiveInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | import org.neo4j.helpers.collection.MapUtil;
 6 | import org.neo4j.unsafe.batchinsert.BatchInserter;
 7 | 
 8 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
 9 | import eu.socialsensor.main.GraphDatabaseType;
10 | 
11 | /**
12 |  * Implementation of massive Insertion in Neo4j graph database
13 |  * 
14 |  * @author sotbeis, sotbeis@iti.gr
15 |  * @author Alexander Patrikalakis
16 |  * 
17 |  */
18 | public final class Neo4jMassiveInsertion extends InsertionBase<Long>
19 | {
20 |     private final BatchInserter inserter;
21 |     Map<Long, Long> cache = new HashMap<Long, Long>();
22 | 
23 |     public Neo4jMassiveInsertion(BatchInserter inserter)
24 |     {
25 |         super(GraphDatabaseType.NEO4J, null /* resultsPath */);
26 |         this.inserter = inserter;
27 |     }
28 | 
29 |     @Override
30 |     protected Long getOrCreate(String value)
31 |     {
32 |         Long id = cache.get(Long.valueOf(value));
33 |         if (id == null)
34 |         {
35 |             Map<String, Object> properties = MapUtil.map("nodeId", value);
36 |             id = inserter.createNode(properties, Neo4jGraphDatabase.NODE_LABEL);
37 |             cache.put(Long.valueOf(value), id);
38 |         }
39 |         return id;
40 |     }
41 | 
42 |     @Override
43 |     protected void relateNodes(Long src, Long dest)
44 |     {
45 |         inserter.createRelationship(src, dest, Neo4jGraphDatabase.RelTypes.SIMILAR, null);
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import com.thinkaurelius.titan.core.TitanGraph;
 4 | import com.thinkaurelius.titan.core.util.TitanId;
 5 | import com.tinkerpop.blueprints.Vertex;
 6 | import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph;
 7 | 
 8 | import eu.socialsensor.main.GraphDatabaseType;
 9 | 
10 | /**
11 |  * Implementation of massive Insertion in Titan graph database
12 |  * 
13 |  * @author sotbeis, sotbeis@iti.gr
14 |  * @author Alexander Patrikalakis
15 |  * 
16 |  */
17 | public class TitanMassiveInsertion extends InsertionBase<Vertex>
18 | {
19 |     private final BatchGraph<TitanGraph> batchGraph;
20 | 
21 |     public TitanMassiveInsertion(BatchGraph<TitanGraph> batchGraph, GraphDatabaseType type)
22 |     {
23 |         super(type, null /* resultsPath */); // no temp files for massive load
24 |                                              // insert
25 |         this.batchGraph = batchGraph;
26 |     }
27 | 
28 |     @Override
29 |     public Vertex getOrCreate(String value)
30 |     {
31 |         Integer intVal = Integer.valueOf(value);
32 |         final long titanVertexId = TitanId.toVertexId(intVal);
33 |         Vertex vertex = batchGraph.getVertex(titanVertexId);
34 |         if (vertex == null)
35 |         {
36 |             vertex = batchGraph.addVertex(titanVertexId);
37 |             vertex.setProperty("nodeId", intVal);
38 |         }
39 |         return vertex;
40 |     }
41 | 
42 |     @Override
43 |     public void relateNodes(Vertex src, Vertex dest)
44 |     {
45 |         src.addEdge("similar", dest);
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import eu.socialsensor.dataset.DatasetFactory;
 4 | import eu.socialsensor.graphdatabases.GraphDatabase;
 5 | import eu.socialsensor.main.BenchmarkConfiguration;
 6 | import eu.socialsensor.main.BenchmarkType;
 7 | import eu.socialsensor.main.GraphDatabaseType;
 8 | import eu.socialsensor.utils.Utils;
 9 | 
10 | import java.util.Set;
11 | import java.util.concurrent.TimeUnit;
12 | 
13 | import com.google.common.base.Stopwatch;
14 | 
15 | /**
16 |  * FindShortestPathBenchmark implementation
17 |  * 
18 |  * @author sotbeis, sotbeis@iti.gr
19 |  * @author Alexander Patrikalakis
20 |  */
21 | public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
22 | {
23 | 
24 |     private final Set<Integer> generatedNodes;
25 | 
26 |     public FindShortestPathBenchmark(BenchmarkConfiguration config)
27 |     {
28 |         super(config, BenchmarkType.FIND_SHORTEST_PATH);
29 |         generatedNodes = DatasetFactory.getInstance().getDataset(config.getDataset())
30 |             .generateRandomNodes(config.getRandomNodes());
31 |     }
32 | 
33 |     @Override
34 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
35 |     {
36 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
37 |         graphDatabase.open();
38 |         Stopwatch watch = new Stopwatch();
39 |         watch.start();
40 |         graphDatabase.shortestPaths(generatedNodes);
41 |         graphDatabase.shutdown();
42 |         times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/dataset/Dataset.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.dataset;
 2 | 
 3 | import java.io.File;
 4 | import java.util.ArrayList;
 5 | import java.util.HashSet;
 6 | import java.util.Iterator;
 7 | import java.util.List;
 8 | import java.util.Set;
 9 | 
10 | import org.apache.commons.math3.util.MathArrays;
11 | 
12 | import eu.socialsensor.utils.Utils;
13 | 
14 | /**
15 |  * 
16 |  * @author Alexander Patrikalakis
17 |  *
18 |  */
19 | public class Dataset implements Iterable<List<String>>
20 | {
21 |     private final List<List<String>> data;
22 | 
23 |     public Dataset(File datasetFile)
24 |     {
25 |         data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */);
26 |     }
27 | 
28 |     public Set<Integer> generateRandomNodes(int numRandomNodes)
29 |     {
30 |         Set<String> nodes = new HashSet<String>();
31 |         for (List<String> line : data.subList(4, data.size()))
32 |         {
33 |             for (String nodeId : line)
34 |             {
35 |                 nodes.add(nodeId.trim());
36 |             }
37 |         }
38 | 
39 |         List<String> nodeList = new ArrayList<String>(nodes);
40 |         int[] nodeIndexList = new int[nodeList.size()];
41 |         for (int i = 0; i < nodeList.size(); i++)
42 |         {
43 |             nodeIndexList[i] = i;
44 |         }
45 |         MathArrays.shuffle(nodeIndexList);
46 | 
47 |         Set<Integer> generatedNodes = new HashSet<Integer>();
48 |         for (int i = 0; i < numRandomNodes; i++)
49 |         {
50 |             generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i])));
51 |         }
52 |         return generatedNodes;
53 |     }
54 | 
55 |     @Override
56 |     public Iterator<List<String>> iterator()
57 |     {
58 |         return data.iterator();
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | import com.thinkaurelius.titan.core.TitanGraph;
 6 | import com.thinkaurelius.titan.core.util.TitanId;
 7 | import com.tinkerpop.blueprints.Compare;
 8 | import com.tinkerpop.blueprints.Vertex;
 9 | 
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | 
12 | /**
13 |  * Implementation of single Insertion in Titan graph database
14 |  * 
15 |  * @author sotbeis, sotbeis@iti.gr
16 |  * @author Alexander Patrikalakis
17 |  * 
18 |  */
19 | public class TitanSingleInsertion extends InsertionBase<Vertex>
20 | {
21 |     private final TitanGraph titanGraph;
22 | 
23 |     public TitanSingleInsertion(TitanGraph titanGraph, GraphDatabaseType type, File resultsPath)
24 |     {
25 |         super(type, resultsPath);
26 |         this.titanGraph = titanGraph;
27 |     }
28 | 
29 |     @Override
30 |     public Vertex getOrCreate(String value)
31 |     {
32 |         Integer intValue = Integer.valueOf(value);
33 |         final Vertex v;
34 |         if (titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().hasNext())
35 |         {
36 |             v = (Vertex) titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().next();
37 |         }
38 |         else
39 |         {
40 |             final long titanVertexId = TitanId.toVertexId(intValue);
41 |             v = titanGraph.addVertex(titanVertexId);
42 |             v.setProperty("nodeId", intValue);
43 |             titanGraph.commit();
44 |         }
45 |         return v;
46 |     }
47 | 
48 |     @Override
49 |     public void relateNodes(Vertex src, Vertex dest)
50 |     {
51 |         try
52 |         {
53 |             titanGraph.addEdge(null, src, dest, "similar");
54 |             titanGraph.commit();
55 |         }
56 |         catch (Exception e)
57 |         {
58 |             titanGraph.rollback(); //TODO(amcp) why can this happen? doesn't this indicate illegal state?
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.util.concurrent.TimeUnit;
 4 | 
 5 | import org.apache.logging.log4j.LogManager;
 6 | import org.apache.logging.log4j.Logger;
 7 | 
 8 | import com.google.common.base.Stopwatch;
 9 | 
10 | import eu.socialsensor.graphdatabases.GraphDatabase;
11 | import eu.socialsensor.main.BenchmarkConfiguration;
12 | import eu.socialsensor.main.BenchmarkType;
13 | import eu.socialsensor.main.GraphDatabaseType;
14 | import eu.socialsensor.utils.Utils;
15 | 
16 | /**
17 |  * MassiveInsertionBenchmark implementation
18 |  * 
19 |  * @author sotbeis, sotbeis@iti.gr
20 |  * @author Alexander Patrikalakis
21 |  */
22 | 
23 | public class MassiveInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData
24 | {
25 |     private static final Logger logger = LogManager.getLogger();
26 | 
27 |     public MassiveInsertionBenchmark(BenchmarkConfiguration config)
28 |     {
29 |         super(config, BenchmarkType.MASSIVE_INSERTION);
30 |     }
31 | 
32 |     @Override
33 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
34 |     {
35 |         logger.debug("Creating database instance for type " + type.getShortname());
36 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
37 |         logger.debug("Prepare database instance for type {} for massive loading", type.getShortname());
38 |         // the following step includes provisioning in managed database
39 |         // services. do not measure this time as
40 |         // it is not related to the action of inserting.
41 |         graphDatabase.createGraphForMassiveLoad();
42 |         logger.debug("Massive load graph in database type {}", type.getShortname());
43 |         Stopwatch watch = new Stopwatch();
44 |         watch.start();
45 |         graphDatabase.massiveModeLoading(bench.getDataset());
46 |         logger.debug("Shutdown massive graph in database type {}", type.getShortname());
47 |         graphDatabase.shutdownMassiveGraph();
48 |         times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/PermuteMethod.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.utils;
 2 | import java.lang.reflect.Array;
 3 | import java.lang.reflect.Method;
 4 | import java.util.Iterator;
 5 | import java.util.NoSuchElementException;
 6 | 
 7 | public class PermuteMethod implements Iterator<Method[]> {
 8 |   private final int size;
 9 |   private final Method[] elements; // copy of original 0 .. size-1
10 |   private final Method[] ar; // array for output, 0 .. size-1
11 |   private final int[] permutation; // perm of nums 1..size, perm[0]=0
12 | 
13 |   private boolean next = true;
14 | 
15 |   public PermuteMethod(Method[] e) {
16 |     size = e.length;
17 |     elements = new Method[size];
18 |     System.arraycopy(e, 0, elements, 0, size);
19 |     ar = new Method[size];
20 |     System.arraycopy(e, 0, ar, 0, size);
21 |     permutation = new int[size + 1];
22 |     for (int i = 0; i < size + 1; i++) {
23 |       permutation[i] = i;
24 |     }
25 |   }
26 | 
27 |   private void formNextPermutation() {
28 |     for (int i = 0; i < size; i++) {
29 |       Array.set(ar, i, elements[permutation[i + 1] - 1]);
30 |     }
31 |   }
32 | 
33 |   public boolean hasNext() {
34 |     return next;
35 |   }
36 | 
37 |   public void remove() throws UnsupportedOperationException {
38 |     throw new UnsupportedOperationException();
39 |   }
40 | 
41 |   private void swap(final int i, final int j) {
42 |     final int x = permutation[i];
43 |     permutation[i] = permutation[j];
44 |     permutation[j] = x;
45 |   }
46 | 
47 |   public Method[] next() throws NoSuchElementException {
48 |     formNextPermutation(); // copy original elements
49 |     int i = size - 1;
50 |     while (permutation[i] > permutation[i + 1])
51 |       i--;
52 |     if (i == 0) {
53 |       next = false;
54 |       for (int j = 0; j < size + 1; j++) {
55 |         permutation[j] = j;
56 |       }
57 |       return ar;
58 |     }
59 |     int j = size;
60 |     while (permutation[i] > permutation[j])
61 |       j--;
62 |     swap(i, j);
63 |     int r = size;
64 |     int s = i + 1;
65 |     while (r > s) {
66 |       swap(r, s);
67 |       r--;
68 |       s++;
69 |     }
70 |     return ar;
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/GraphDatabaseType.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.main;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.HashSet;
 5 | import java.util.Map;
 6 | import java.util.Set;
 7 | 
 8 | /**
 9 |  * Enum containing constants that correspond to each database.
10 |  * 
11 |  * @author Alexander Patrikalakis
12 |  */
13 | public enum GraphDatabaseType
14 | {
15 |     TITAN_BERKELEYDB("Titan", "berkeleyje", "tbdb"),
16 |     TITAN_DYNAMODB("Titan", "com.amazon.titan.diskstorage.dynamodb.DynamoDBStoreManager", "tddb"),
17 |     TITAN_CASSANDRA("Titan", "cassandra", "tc"),
18 |     TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"),
19 |     TITAN_HBASE("Titan", "hbase", "thb"),
20 |     TITAN_PERSISTIT("TitanEmbedded", "persistit", "tp"),
21 |     ORIENT_DB("OrientDB", null, "orient"),
22 |     NEO4J("Neo4j", null, "neo4j"),
23 |     SPARKSEE("Sparksee", null, "sparksee");
24 | 
25 |     private final String backend;
26 |     private final String api;
27 |     private final String shortname;
28 | 
29 |     public static final Map<String, GraphDatabaseType> STRING_REP_MAP = new HashMap<String, GraphDatabaseType>();
30 |     public static final Set<GraphDatabaseType> TITAN_FLAVORS = new HashSet<GraphDatabaseType>();
31 |     static
32 |     {
33 |         for (GraphDatabaseType db : values())
34 |         {
35 |             STRING_REP_MAP.put(db.getShortname(), db);
36 |         }
37 |         TITAN_FLAVORS.add(TITAN_BERKELEYDB);
38 |         TITAN_FLAVORS.add(TITAN_DYNAMODB);
39 |         TITAN_FLAVORS.add(TITAN_CASSANDRA);
40 |         TITAN_FLAVORS.add(TITAN_CASSANDRA_EMBEDDED);
41 |         TITAN_FLAVORS.add(TITAN_HBASE);
42 |         TITAN_FLAVORS.add(TITAN_PERSISTIT);
43 |     }
44 | 
45 |     private GraphDatabaseType(String api, String backend, String shortname)
46 |     {
47 |         this.api = api;
48 |         this.backend = backend;
49 |         this.shortname = shortname;
50 |     }
51 | 
52 |     public String getBackend()
53 |     {
54 |         return backend;
55 |     }
56 | 
57 |     public String getApi()
58 |     {
59 |         return api;
60 |     }
61 | 
62 |     public String getShortname()
63 |     {
64 |         return shortname;
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import com.orientechnologies.orient.core.config.OGlobalConfiguration;
 4 | import com.orientechnologies.orient.graph.batch.OGraphBatchInsertBasic;
 5 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx;
 6 | 
 7 | import eu.socialsensor.main.GraphDatabaseType;
 8 | 
 9 | /**
10 |  * Implementation of massive Insertion in OrientDB graph database
11 |  * 
12 |  * @author sotbeis, sotbeis@iti.gr
13 |  * @author Alexander Patrikalakis
14 |  * 
15 |  */
16 | public class OrientMassiveInsertion extends InsertionBase<Long> implements Insertion
17 | {
18 |     private static final int ESTIMATED_ENTRIES = 1000000;
19 |     private static final int AVERAGE_NUMBER_OF_EDGES_PER_NODE = 40;
20 |     private static final int NUMBER_OF_ORIENT_CLUSTERS = 16;
21 |     private final OGraphBatchInsertBasic graph;
22 | 
23 |     public OrientMassiveInsertion(final String url)
24 |     {
25 |         super(GraphDatabaseType.ORIENT_DB, null /* resultsPath */);
26 |         OGlobalConfiguration.ENVIRONMENT_CONCURRENT.setValue(false);
27 |         OrientGraphNoTx transactionlessGraph = new OrientGraphNoTx(url);
28 |         for (int i = 0; i < NUMBER_OF_ORIENT_CLUSTERS; ++i)
29 |         {
30 |             transactionlessGraph.getVertexBaseType().addCluster("v_" + i);
31 |             transactionlessGraph.getEdgeBaseType().addCluster("e_" + i);
32 |         }
33 |         transactionlessGraph.shutdown();
34 | 
35 |         graph = new OGraphBatchInsertBasic(url);
36 |         graph.setAverageEdgeNumberPerNode(AVERAGE_NUMBER_OF_EDGES_PER_NODE);
37 |         graph.setEstimatedEntries(ESTIMATED_ENTRIES);
38 |         graph.setIdPropertyName("nodeId");
39 |         graph.begin();
40 |     }
41 | 
42 |     @Override
43 |     protected void post() {
44 |         graph.end();
45 |     }
46 | 
47 |     @Override
48 |     protected Long getOrCreate(String value)
49 |     {
50 |         final long v = Long.parseLong(value);
51 |         graph.createVertex(v);
52 |         return v;
53 |     }
54 | 
55 |     @Override
56 |     protected void relateNodes(Long src, Long dest)
57 |     {
58 |         graph.createEdge(src, dest);
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/BenchmarkBase.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | import org.apache.logging.log4j.LogManager;
 6 | import org.apache.logging.log4j.Logger;
 7 | 
 8 | import eu.socialsensor.main.BenchmarkConfiguration;
 9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 | 
13 | /**
14 |  * Base class for benchmarks.
15 |  * 
16 |  * @author Alexander Patrikalakis
17 |  */
18 | public abstract class BenchmarkBase implements Benchmark
19 | {
20 |     private static final Logger logger = LogManager.getLogger();
21 |     protected final BenchmarkConfiguration bench;
22 |     protected final File outputFile;
23 |     protected final BenchmarkType type;
24 | 
25 |     protected BenchmarkBase(BenchmarkConfiguration bench, BenchmarkType type)
26 |     {
27 |         this.bench = bench;
28 |         this.outputFile = new File(bench.getResultsPath(), type.getResultsFileName());
29 |         this.type = type;
30 |     }
31 | 
32 |     @Override
33 |     public final void startBenchmark()
34 |     {
35 |         startBenchmarkInternal();
36 |     }
37 | 
38 |     public abstract void startBenchmarkInternal();
39 | 
40 |     protected final void createDatabases()
41 |     {
42 |         for (GraphDatabaseType type : bench.getSelectedDatabases())
43 |         {
44 |             logger.info(String.format("creating %s database from %s dataset", type.getShortname(), bench.getDataset()
45 |                 .getName()));
46 |             File dbpath = Utils.generateStorageDirectory(type, bench.getDbStorageDirectory());
47 |             if (dbpath.exists())
48 |             {
49 |                 throw new IllegalStateException(String.format(
50 |                     "Database from a previous run exist: %s; clean up and try again.", dbpath.getAbsolutePath()));
51 |             }
52 |             Utils.createMassiveLoadDatabase(type, bench);
53 |         }
54 |     }
55 | 
56 |     protected final void deleteDatabases()
57 |     {
58 |         for (GraphDatabaseType type : bench.getSelectedDatabases())
59 |         {
60 |             Utils.deleteDatabase(type, bench);
61 |         }
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import eu.socialsensor.graphdatabases.GraphDatabase;
 4 | import eu.socialsensor.main.BenchmarkConfiguration;
 5 | import eu.socialsensor.main.BenchmarkType;
 6 | import eu.socialsensor.main.GraphDatabaseType;
 7 | import eu.socialsensor.utils.Utils;
 8 | 
 9 | import java.io.File;
10 | import java.util.List;
11 | 
12 | import org.apache.logging.log4j.LogManager;
13 | import org.apache.logging.log4j.Logger;
14 | 
15 | /**
16 |  * SingleInsertionBenchmak implementation
17 |  * 
18 |  * @author sotbeis, sotbeis@iti.gr
19 |  * @author Alexander Patrikalakis
20 |  */
21 | public class SingleInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData
22 | {
23 |     public static final String INSERTION_TIMES_OUTPUT_FILE_NAME_BASE = "SINGLE_INSERTIONResults";
24 |     private static final Logger LOG = LogManager.getLogger();
25 | 
26 |     public SingleInsertionBenchmark(BenchmarkConfiguration bench)
27 |     {
28 |         super(bench, BenchmarkType.SINGLE_INSERTION);
29 |     }
30 | 
31 |     @Override
32 |     public void post()
33 |     {
34 |         LOG.info("Write results to " + outputFile.getAbsolutePath());
35 |         for (GraphDatabaseType type : bench.getSelectedDatabases())
36 |         {
37 |             String prefix = outputFile.getParentFile().getAbsolutePath() + File.separator
38 |                 + INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname();
39 |             List<List<Double>> insertionTimesOfEachScenario = Utils.getDocumentsAs2dList(prefix, bench.getScenarios());
40 |             times.put(type, Utils.calculateMeanList(insertionTimesOfEachScenario));
41 |             Utils.deleteMultipleFiles(prefix, bench.getScenarios());
42 |         }
43 |         // use the logic of the superclass method after populating the times map
44 |         super.post();
45 |     }
46 | 
47 |     @Override
48 |     public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
49 |     {
50 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
51 |         graphDatabase.createGraphForSingleLoad();
52 |         graphDatabase.singleModeLoading(bench.getDataset(), bench.getResultsPath(), scenarioNumber);
53 |         graphDatabase.shutdown();
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | import com.orientechnologies.orient.core.db.record.OIdentifiable;
 6 | import com.orientechnologies.orient.core.index.OIndex;
 7 | import com.tinkerpop.blueprints.TransactionalGraph;
 8 | import com.tinkerpop.blueprints.Vertex;
 9 | import com.tinkerpop.blueprints.impls.orient.OrientGraph;
10 | 
11 | import eu.socialsensor.main.GraphDatabaseType;
12 | 
13 | /**
14 |  * Implementation of single Insertion in OrientDB graph database
15 |  * 
16 |  * @author sotbeis, sotbeis@iti.gr
17 |  * @author Alexander Patrikalakis
18 |  * 
19 |  */
20 | public final class OrientSingleInsertion extends InsertionBase<Vertex>
21 | {
22 |     protected final OrientGraph orientGraph;
23 |     protected final OIndex<?> index;
24 | 
25 |     public OrientSingleInsertion(OrientGraph orientGraph, File resultsPath)
26 |     {
27 |         super(GraphDatabaseType.ORIENT_DB, resultsPath);
28 |         this.orientGraph = orientGraph;
29 |         this.index = this.orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId");
30 |     }
31 | 
32 |     @Override
33 |     protected void relateNodes(Vertex src, Vertex dest)
34 |     {
35 |         orientGraph.addEdge(null, src, dest, "similar");
36 | 
37 |         // TODO why commit twice? is this a nested transaction?
38 |         if (orientGraph instanceof TransactionalGraph)
39 |         {
40 |             orientGraph.commit();
41 |             orientGraph.commit();
42 |         }
43 |     }
44 | 
45 |     @Override
46 |     protected Vertex getOrCreate(final String value)
47 |     {
48 |         final int key = Integer.parseInt(value);
49 | 
50 |         Vertex v;
51 |         final OIdentifiable rec = (OIdentifiable) index.get(key);
52 |         if (rec != null)
53 |         {
54 |             return orientGraph.getVertex(rec);
55 |         }
56 | 
57 |         v = orientGraph.addVertex(key, "nodeId", key);
58 | 
59 |         if (orientGraph instanceof TransactionalGraph)
60 |         {
61 |             orientGraph.commit();
62 |         }
63 | 
64 |         return v;
65 |     }
66 | 
67 |     @Override
68 |     protected void post()
69 |     {
70 |         super.post();
71 |         if (orientGraph instanceof TransactionalGraph)
72 |         {
73 |             orientGraph.commit();
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  *  *  Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com)
 4 |  *  *
 5 |  *  *  Licensed under the Apache License, Version 2.0 (the "License");
 6 |  *  *  you may not use this file except in compliance with the License.
 7 |  *  *  You may obtain a copy of the License at
 8 |  *  *
 9 |  *  *       http://www.apache.org/licenses/LICENSE-2.0
10 |  *  *
11 |  *  *  Unless required by applicable law or agreed to in writing, software
12 |  *  *  distributed under the License is distributed on an "AS IS" BASIS,
13 |  *  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  *  *  See the License for the specific language governing permissions and
15 |  *  *  limitations under the License.
16 |  *  *
17 |  *  * For more information: http://www.orientechnologies.com
18 |  *  
19 |  */
20 | 
21 | package eu.socialsensor.insert;
22 | 
23 | import org.apache.log4j.Logger;
24 | 
25 | import com.orientechnologies.orient.core.db.record.OIdentifiable;
26 | import com.orientechnologies.orient.core.index.OIndex;
27 | import com.tinkerpop.blueprints.TransactionalGraph;
28 | import com.tinkerpop.blueprints.Vertex;
29 | import com.tinkerpop.blueprints.impls.orient.OrientExtendedGraph;
30 | 
31 | /**
32 |  * Implementation of single Insertion in OrientDB graph database
33 |  * 
34 |  * @author sotbeis
35 |  * @email sotbeis@iti.gr
36 |  * 
37 |  */
38 | public abstract class OrientAbstractInsertion implements Insertion {
39 | 	
40 | 	public static String INSERTION_TIMES_OUTPUT_PATH = null;
41 | 
42 | 	protected OrientExtendedGraph orientGraph = null;
43 | 	protected Logger logger = Logger.getLogger(OrientAbstractInsertion.class);
44 | 
45 | 	protected OIndex<?> index;
46 | 
47 | 	public OrientAbstractInsertion(OrientExtendedGraph orientGraph) {
48 | 		this.orientGraph = orientGraph;
49 | 	}
50 | 
51 | 	protected Vertex getOrCreate(final String value) {
52 | 		final int key = Integer.parseInt(value);
53 | 
54 | 		Vertex v;
55 | 		if (index == null) {
56 | 			index = orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId");
57 | 		}
58 | 			
59 | 		final OIdentifiable rec = (OIdentifiable) index.get(key);
60 | 		if (rec != null) {
61 | 			return orientGraph.getVertex(rec);
62 | 		}
63 | 		
64 | 		v = orientGraph.addVertex(key, "nodeId", key);
65 |   
66 | 		if (orientGraph instanceof TransactionalGraph) {
67 | 			((TransactionalGraph) orientGraph).commit();
68 | 		}
69 | 
70 | 		return v;
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.benchmarks;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Collection;
 5 | import java.util.HashMap;
 6 | import java.util.List;
 7 | import java.util.Map;
 8 | 
 9 | import org.apache.commons.collections4.iterators.PermutationIterator;
10 | import org.apache.logging.log4j.Logger;
11 | import org.apache.logging.log4j.LogManager;
12 | 
13 | import eu.socialsensor.main.BenchmarkConfiguration;
14 | import eu.socialsensor.main.BenchmarkType;
15 | import eu.socialsensor.main.GraphDatabaseType;
16 | import eu.socialsensor.utils.Utils;
17 | 
18 | /**
19 |  * Base class abstracting the logic of permutations
20 |  * 
21 |  * @author Alexander Patrikalakis
22 |  */
23 | public abstract class PermutingBenchmarkBase extends BenchmarkBase
24 | {
25 |     protected final Map<GraphDatabaseType, List<Double>> times;
26 |     private static final Logger LOG = LogManager.getLogger();
27 | 
28 |     protected PermutingBenchmarkBase(BenchmarkConfiguration bench, BenchmarkType typeIn)
29 |     {
30 |         super(bench, typeIn);
31 |         times = new HashMap<GraphDatabaseType, List<Double>>();
32 |         for (GraphDatabaseType type : bench.getSelectedDatabases())
33 |         {
34 |             times.put(type, new ArrayList<Double>(bench.getScenarios()));
35 |         }
36 |     }
37 | 
38 |     @Override
39 |     public void startBenchmarkInternal()
40 |     {
41 |         LOG.info(String.format("Executing %s Benchmark . . . .", type.longname()));
42 | 
43 |         if (bench.permuteBenchmarks())
44 |         {
45 |             PermutationIterator<GraphDatabaseType> iter = new PermutationIterator<GraphDatabaseType>(
46 |                 bench.getSelectedDatabases());
47 |             int cntPermutations = 1;
48 |             while (iter.hasNext())
49 |             {
50 |                 LOG.info("Scenario " + cntPermutations);
51 |                 startBenchmarkInternalOnePermutation(iter.next(), cntPermutations);
52 |                 cntPermutations++;
53 |             }
54 |         }
55 |         else
56 |         {
57 |             startBenchmarkInternalOnePermutation(bench.getSelectedDatabases(), 1);
58 |         }
59 | 
60 |         LOG.info(String.format("%s Benchmark finished", type.longname()));
61 |         post();
62 |     }
63 | 
64 |     private void startBenchmarkInternalOnePermutation(Collection<GraphDatabaseType> types, int cntPermutations)
65 |     {
66 |         for (GraphDatabaseType type : types)
67 |         {
68 |             benchmarkOne(type, cntPermutations);
69 |         }
70 |     }
71 | 
72 |     public abstract void benchmarkOne(GraphDatabaseType type, int scenarioNumber);
73 | 
74 |     public void post()
75 |     {
76 |         Utils.writeResults(outputFile, times, type.longname());
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java:
--------------------------------------------------------------------------------
 1 | package eu.socialsensor.insert;
 2 | 
 3 | import java.io.File;
 4 | import java.util.HashMap;
 5 | import java.util.Map;
 6 | 
 7 | import org.neo4j.cypher.javacompat.ExecutionEngine;
 8 | import org.neo4j.graphdb.GraphDatabaseService;
 9 | import org.neo4j.graphdb.Node;
10 | import org.neo4j.graphdb.ResourceIterator;
11 | import org.neo4j.graphdb.Transaction;
12 | import org.neo4j.kernel.GraphDatabaseAPI;
13 | 
14 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
15 | import eu.socialsensor.main.BenchmarkingException;
16 | import eu.socialsensor.main.GraphDatabaseType;
17 | 
18 | /**
19 |  * Implementation of single Insertion in Neo4j graph database
20 |  * 
21 |  * @author sotbeis, sotbeis@iti.gr
22 |  * @author Alexander Patrikalakis
23 |  * 
24 |  */
25 | @SuppressWarnings("deprecation")
26 | public class Neo4jSingleInsertion extends InsertionBase<Node>
27 | {
28 |     private final GraphDatabaseService neo4jGraph;
29 |     private final ExecutionEngine engine;
30 | 
31 |     public Neo4jSingleInsertion(GraphDatabaseService neo4jGraph, File resultsPath)
32 |     {
33 |         super(GraphDatabaseType.NEO4J, resultsPath);
34 |         this.neo4jGraph = neo4jGraph;
35 |         engine = new ExecutionEngine(this.neo4jGraph);
36 |     }
37 | 
38 |     public Node getOrCreate(String nodeId)
39 |     {
40 |         Node result = null;
41 | 
42 |         try(final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin())
43 |         {
44 |             try
45 |             {
46 |                 String queryString = "MERGE (n:Node {nodeId: {nodeId}}) RETURN n";
47 |                 Map<String, Object> parameters = new HashMap<String, Object>();
48 |                 parameters.put("nodeId", nodeId);
49 |                 ResourceIterator<Node> resultIterator = engine.execute(queryString, parameters).columnAs("n");
50 |                 result = resultIterator.next();
51 |                 tx.success();
52 |             }
53 |             catch (Exception e)
54 |             {
55 |                 tx.failure();
56 |                 throw new BenchmarkingException("unable to get or create node " + nodeId, e);
57 |             }
58 |         }
59 | 
60 |         return result;
61 |     }
62 | 
63 |     @Override
64 |     public void relateNodes(Node src, Node dest)
65 |     {
66 |         try (final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin())
67 |         {
68 |             try
69 |             {
70 |                 src.createRelationshipTo(dest, Neo4jGraphDatabase.RelTypes.SIMILAR);
71 |                 tx.success();
72 |             }
73 |             catch (Exception e)
74 |             {
75 |                 tx.failure();
76 |                 throw new BenchmarkingException("unable to relate nodes", e);
77 |             }
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/Metrics.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.utils;
  2 | 
  3 | import java.util.List;
  4 | import java.util.Map;
  5 | 
  6 | /**
  7 |  * This class implements the metrics we use for the evaluation of the predicted
  8 |  * clustering. For now we use only the NMI
  9 |  * 
 10 |  * @author sbeis
 11 |  * @email sotbeis@gmail.com
 12 |  *
 13 |  */
 14 | public class Metrics
 15 | {
 16 |     public double normalizedMutualInformation(int numberOfNodes, Map<Integer, List<Integer>> actualPartitions,
 17 |         Map<Integer, List<Integer>> predictedPartitions)
 18 |     {
 19 |         double nmi;
 20 |         double numOfNodes = (double) numberOfNodes;
 21 |         int[][] confusionMatrix = confusionMatrix(actualPartitions, predictedPartitions);
 22 |         int[] confusionMatrixActual = new int[actualPartitions.size()];
 23 |         int[] confusionMatrixPredicted = new int[predictedPartitions.size()];
 24 |         for (int i = 0; i < confusionMatrixActual.length; i++)
 25 |         {
 26 |             int sum = 0;
 27 |             for (int j = 0; j < confusionMatrixPredicted.length; j++)
 28 |             {
 29 |                 sum = sum + confusionMatrix[i][j];
 30 |             }
 31 |             confusionMatrixActual[i] = sum;
 32 |         }
 33 |         for (int j = 0; j < confusionMatrixPredicted.length; j++)
 34 |         {
 35 |             int sum = 0;
 36 |             for (int i = 0; i < confusionMatrixActual.length; i++)
 37 |             {
 38 |                 sum = sum + confusionMatrix[i][j];
 39 |             }
 40 |             confusionMatrixPredicted[j] = sum;
 41 |         }
 42 | 
 43 |         double term1 = 0;
 44 |         for (int i = 0; i < confusionMatrixActual.length; i++)
 45 |         {
 46 |             for (int j = 0; j < confusionMatrixPredicted.length; j++)
 47 |             {
 48 |                 if (confusionMatrix[i][j] > 0)
 49 |                 {
 50 |                     term1 += -2.0
 51 |                         * confusionMatrix[i][j]
 52 |                         * Math.log((confusionMatrix[i][j] * numOfNodes)
 53 |                             / (confusionMatrixActual[i] * confusionMatrixPredicted[j]));
 54 |                 }
 55 |             }
 56 |         }
 57 |         double term2 = 0;
 58 |         for (int i = 0; i < confusionMatrixActual.length; i++)
 59 |         {
 60 |             term2 += confusionMatrixActual[i] * Math.log(confusionMatrixActual[i] / numOfNodes);
 61 |         }
 62 |         double term3 = 0;
 63 |         for (int j = 0; j < confusionMatrixPredicted.length; j++)
 64 |         {
 65 |             term3 += confusionMatrixPredicted[j] * Math.log(confusionMatrixPredicted[j] / numOfNodes);
 66 |         }
 67 |         nmi = term1 / (term2 + term3);
 68 |         return nmi;
 69 |     }
 70 | 
 71 |     private int[][] confusionMatrix(Map<Integer, List<Integer>> actualPartitions,
 72 |         Map<Integer, List<Integer>> predictedPartitions)
 73 |     {
 74 |         int actualPartitionsSize = actualPartitions.size();
 75 |         int predictedPartitionsSize = predictedPartitions.size();
 76 |         int[][] confusionMatrix = new int[actualPartitionsSize][];
 77 |         int actualPartitionsKeys[] = new int[actualPartitionsSize];
 78 |         int predictedPartitionsKeys[] = new int[predictedPartitionsSize];
 79 | 
 80 |         int actualPartitionsIndex = 0;
 81 |         for (int key : actualPartitions.keySet())
 82 |         {
 83 |             actualPartitionsKeys[actualPartitionsIndex] = key;
 84 |             actualPartitionsIndex++;
 85 |         }
 86 |         int predictedPartitionsIndex = 0;
 87 |         for (int key : predictedPartitions.keySet())
 88 |         {
 89 |             predictedPartitionsKeys[predictedPartitionsIndex] = key;
 90 |             predictedPartitionsIndex++;
 91 |         }
 92 | 
 93 |         for (int i = 0; i < actualPartitionsSize; i++)
 94 |         {
 95 |             confusionMatrix[i] = new int[predictedPartitionsSize];
 96 |             for (int j = 0; j < predictedPartitionsSize; j++)
 97 |             {
 98 |                 int commonNodes = 0;
 99 |                 for (int node : predictedPartitions.get(predictedPartitionsKeys[j]))
100 |                 {
101 |                     if (actualPartitions.get(actualPartitionsKeys[i]).contains(node))
102 |                     {
103 |                         commonNodes++;
104 |                     }
105 |                 }
106 |                 confusionMatrix[i][j] = commonNodes;
107 |             }
108 |         }
109 |         return confusionMatrix;
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/InsertionBase.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.insert;
  2 | 
  3 | import java.io.File;
  4 | import java.util.ArrayList;
  5 | import java.util.List;
  6 | import java.util.concurrent.TimeUnit;
  7 | 
  8 | import org.apache.logging.log4j.LogManager;
  9 | import org.apache.logging.log4j.Logger;
 10 | 
 11 | import com.codahale.metrics.Timer;
 12 | import com.google.common.base.Stopwatch;
 13 | 
 14 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark;
 15 | import eu.socialsensor.dataset.Dataset;
 16 | import eu.socialsensor.dataset.DatasetFactory;
 17 | import eu.socialsensor.main.GraphDatabaseBenchmark;
 18 | import eu.socialsensor.main.GraphDatabaseType;
 19 | import eu.socialsensor.utils.Utils;
 20 | 
 21 | /**
 22 |  * Base class for business logic of insertion workloads
 23 |  * 
 24 |  * @author Alexander Patrikalakis
 25 |  *
 26 |  * @param <T>
 27 |  *            the Type of vertexes (graph database vendor specific)
 28 |  */
 29 | public abstract class InsertionBase<T> implements Insertion
 30 | {
 31 |     private static final Logger logger = LogManager.getLogger();
 32 |     public static final String INSERTION_CONTEXT = ".eu.socialsensor.insertion.";
 33 |     private final Timer getOrCreateTimes;
 34 |     private final Timer relateNodesTimes;
 35 | 
 36 |     protected final GraphDatabaseType type;
 37 |     protected final List<Double> insertionTimes;
 38 |     private final boolean single;
 39 | 
 40 |     // to write intermediate times for SingleInsertion subclasses
 41 |     protected final File resultsPath;
 42 | 
 43 |     protected InsertionBase(GraphDatabaseType type, File resultsPath)
 44 |     {
 45 |         this.type = type;
 46 |         this.insertionTimes = new ArrayList<Double>();
 47 |         this.resultsPath = resultsPath;
 48 |         this.single = resultsPath != null;
 49 |         final String insertionTypeCtxt = type.getShortname() + INSERTION_CONTEXT + (single ? "adhoc." : "batch.");
 50 |         this.getOrCreateTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "getOrCreate");
 51 |         this.relateNodesTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "relateNodes");
 52 |     }
 53 | 
 54 |     /**
 55 |      * Gets or creates a vertex
 56 |      * 
 57 |      * @param value
 58 |      *            the identifier of the vertex
 59 |      * @return the id of the created vertex
 60 |      */
 61 |     protected abstract T getOrCreate(final String value);
 62 | 
 63 |     /**
 64 |      * 
 65 |      * @param src
 66 |      * @param dest
 67 |      */
 68 |     protected abstract void relateNodes(final T src, final T dest);
 69 | 
 70 |     /**
 71 |      * sometimes a transaction needs to be committed at the end of a batch run.
 72 |      * this is the hook.
 73 |      */
 74 |     protected void post()
 75 |     {
 76 |         // NOOP
 77 |     }
 78 | 
 79 |     public final void createGraph(File datasetFile, int scenarioNumber)
 80 |     {
 81 |         logger.info("Loading data in {} mode in {} database . . . .", single ? "single" : "massive",
 82 |             type.name());
 83 |         Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile);
 84 | 
 85 |         T srcNode, dstNode;
 86 |         Stopwatch thousandWatch = new Stopwatch(), watch = new Stopwatch();
 87 |         thousandWatch.start();
 88 |         watch.start();
 89 |         int i = 4;
 90 |         for (List<String> line : dataset)
 91 |         {
 92 |             final Timer.Context contextSrc = getOrCreateTimes.time();
 93 |             try {
 94 |                 srcNode = getOrCreate(line.get(0));
 95 |             } finally {
 96 |                 contextSrc.stop();
 97 |             }
 98 | 
 99 |             final Timer.Context contextDest = getOrCreateTimes.time();
100 |             try {
101 |                 dstNode = getOrCreate(line.get(1));
102 |             } finally {
103 |                 contextDest.stop();
104 |             }
105 | 
106 |             final Timer.Context contextRelate = relateNodesTimes.time();
107 |             try {
108 |                 relateNodes(srcNode, dstNode);
109 |             } finally {
110 |                 contextRelate.stop();
111 |             }
112 | 
113 |             if (i % 1000 == 0)
114 |             {
115 |                 insertionTimes.add((double) thousandWatch.elapsed(TimeUnit.MILLISECONDS));
116 |                 thousandWatch.stop();
117 |                 thousandWatch = new Stopwatch();
118 |                 thousandWatch.start();
119 |             }
120 |             i++;
121 |         }
122 |         post();
123 |         insertionTimes.add((double) watch.elapsed(TimeUnit.MILLISECONDS));
124 | 
125 |         if (single)
126 |         {
127 |             Utils.writeTimes(insertionTimes, new File(resultsPath,
128 |                 SingleInsertionBenchmark.INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname() + "."
129 |                     + Integer.toString(scenarioNumber)));
130 |         }
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/test/resources/META-INF/input.properties:
--------------------------------------------------------------------------------
  1 | # Choose which data sets you want to include in the benchmark by removing the contents.
  2 | #eu.socialsensor.dataset=data/Email-Enron.txt
  3 | #eu.socialsensor.dataset=data/com-youtube.ungraph.txt
  4 | #eu.socialsensor.dataset=data/Amazon0601.txt
  5 | #eu.socialsensor.dataset=data/com-lj.ungraph.txt
  6 | #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000
  7 | eu.socialsensor.dataset=data/network1000.dat
  8 | eu.socialsensor.actual-communities=data/community1000.dat
  9 | 
 10 | eu.socialsensor.database-storage-directory=storage
 11 | # Sample meters this frequently (milliseconds)
 12 | eu.socialsensor.metrics.csv.interval=1000
 13 | # for the csv reporter
 14 | eu.socialsensor.metrics.csv.directory=metrics
 15 | # for the graphite reporter
 16 | #eu.socialsensor.metrics.graphite.hostname=192.168.59.103
 17 | 
 18 | # Choose which databases you want to in the benchmark by removing the comments.
 19 | # Available dbs are:
 20 | eu.socialsensor.databases=tbdb
 21 | eu.socialsensor.databases=tddb
 22 | #eu.socialsensor.databases=tc
 23 | #eu.socialsensor.databases=thb
 24 | #eu.socialsensor.databases=tce
 25 | #eu.socialsensor.databases=tp
 26 | #eu.socialsensor.databases=orient
 27 | #eu.socialsensor.databases=neo4j
 28 | #eu.socialsensor.databases=sparksee
 29 | 
 30 | # Database specific options
 31 | # Titan options
 32 | # page-size - Number of results to pull when iterating over a storage backend (default 100)
 33 | eu.socialsensor.titan.page-size=100
 34 | # to disable buffering on mutations, set to zero. Default 1024. This will set the queue size as well
 35 | eu.socialsensor.titan.buffer-size=10000
 36 | # id block size default 10000
 37 | eu.socialsensor.titan.ids.block-size=10000
 38 | # Titan DynamoDB options
 39 | # when warm-tables is set to true, the benchmark will create tables of the specified data model in parallel
 40 | eu.socialsensor.dynamodb.precreate-tables=true
 41 | # DynamoDBDelegate worker thread pool size. should not be larger than the number of HTTP connections
 42 | # assuming a round trip time of 10ms for writes, one thread can do 50tps. When using the MULTI data
 43 | # model, items are usually small, so the round trip time is close to that.
 44 | eu.socialsensor.dynamodb.workers=15
 45 | # TPS (both read and write to set per table). 750 R and W TPS = 1 DynamoDB table partition
 46 | # http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GuidelinesForTables.html#GuidelinesForTables.Partitions
 47 | eu.socialsensor.dynamodb.tps=750
 48 | # Data model for the Titan DynamoDB plugin. Can be SINGLE or MULTI
 49 | eu.socialsensor.dynamodb.data-model=MULTI
 50 | # Whether to allow eventually consistent reads or not
 51 | # (allowing eventually consistent reads allows queries to happen faster)
 52 | eu.socialsensor.dynamodb.force-consistent-read=true
 53 | # Credentials. You can set credentials to any value when running against DynamoDBLocal
 54 | # Needs to be the fully qualified class name of a class implementing 
 55 | # com.amazonaws.auth.AWSCredentials or com.amazonaws.auth.AWSCredentialsProvider.
 56 | eu.socialsensor.dynamodb.credentials.class-name=com.amazonaws.auth.DefaultAWSCredentialsProviderChain
 57 | # Comma separated list of strings to pass to the constructor of the class specified above.
 58 | eu.socialsensor.dynamodb.credentials.constructor-args=
 59 | # Endpoint. the titan-dynamodb database option above requires you to specify an endpoint.
 60 | # This endpoint could be DynamoDBLocal running in a separate process (eg, http://127.0.0.1:4567),
 61 | # or the https endpoint of a production region of the DynamoDB service.
 62 | eu.socialsensor.dynamodb.endpoint=http://127.0.0.1:4567
 63 | #eu.socialsensor.dynamodb.endpoint=https://dynamodb.us-east-1.amazonaws.com
 64 | 
 65 | # OrientDB options
 66 | eu.socialsensor.orient.lightweight-edges=true
 67 | 
 68 | # Sparksee options
 69 | eu.socialsensor.sparksee.license-key=DEADBEEF
 70 | 
 71 | # The following five benchmarks are permutable (that is, the suite can run them
 72 | # many times in different database order). To turn on permutations, set
 73 | # eu.socialsensor.permute-benchmarks=true
 74 | eu.socialsensor.permute-benchmarks=false
 75 | 
 76 | # Choose which benchmark you want to run by removing the comments. Choose one Insertion
 77 | # workload and then query/clustering workloads afterward.
 78 | eu.socialsensor.benchmarks=MASSIVE_INSERTION
 79 | #eu.socialsensor.benchmarks=SINGLE_INSERTION
 80 | eu.socialsensor.benchmarks=FIND_NEIGHBOURS
 81 | eu.socialsensor.benchmarks=FIND_ADJACENT_NODES
 82 | eu.socialsensor.benchmarks=FIND_SHORTEST_PATH
 83 | eu.socialsensor.shortest-path-random-nodes=100
 84 | 
 85 | # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true
 86 | #eu.socialsensor.benchmarks=CLUSTERING
 87 | eu.socialsensor.randomize-clustering=false
 88 | eu.socialsensor.nodes-count=1000
 89 | 
 90 | # Choose the cache values you want run the CW benchmark, or have them generated. To choose:
 91 | eu.socialsensor.cache-values=25
 92 | eu.socialsensor.cache-values=50
 93 | eu.socialsensor.cache-values=75
 94 | eu.socialsensor.cache-values=100
 95 | eu.socialsensor.cache-values=125
 96 | eu.socialsensor.cache-values=150
 97 | 
 98 | # To have the cache values generated for the CW benchmark.
 99 | #eu.socialsensor.cache-increment-factor=1
100 | #eu.socialsensor.cache-values-count=6
101 | 
102 | # This benchmark measures the time it takes to delete the database
103 | #eu.socialsensor.benchmarks=DELETION
104 | 
105 | # Results folder path
106 | eu.socialsensor.results-path=results
107 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/clustering/LouvainMethod.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.clustering;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.List;
  5 | import java.util.Random;
  6 | import java.util.Set;
  7 | import java.util.concurrent.ExecutionException;
  8 | 
  9 | import eu.socialsensor.graphdatabases.GraphDatabase;
 10 | 
 11 | /**
 12 |  * Implementation of Louvain Method on top of graph databases. Gephi Toolkit
 13 |  * (https://gephi.org/toolkit/) java implementation was used as guide.
 14 |  * 
 15 |  * @author sotbeis
 16 |  * @email sotbeis@iti.gr
 17 |  */
 18 | public class LouvainMethod
 19 | {
 20 |     boolean isRandomized;
 21 |     private double resolution = 1.0;
 22 |     private double graphWeightSum;
 23 |     private int N;
 24 |     private List<Double> communityWeights;
 25 |     private boolean communityUpdate = false;
 26 | 
 27 |     GraphDatabase<?,?,?,?> graphDatabase;
 28 |     Cache cache;
 29 | 
 30 |     public LouvainMethod(GraphDatabase<?,?,?,?> graphDatabase, int cacheSize, boolean isRandomized) throws ExecutionException
 31 |     {
 32 |         this.graphDatabase = graphDatabase;
 33 |         this.isRandomized = isRandomized;
 34 |         initialize();
 35 |         cache = new Cache(graphDatabase, cacheSize);
 36 |     }
 37 | 
 38 |     private void initialize()
 39 |     {
 40 |         this.N = this.graphDatabase.getNodeCount();// this step takes a long
 41 |                                                    // time on dynamodb.
 42 |         this.graphWeightSum = this.graphDatabase.getGraphWeightSum() / 2;
 43 | 
 44 |         this.communityWeights = new ArrayList<Double>(this.N);
 45 |         for (int i = 0; i < this.N; i++)
 46 |         {
 47 |             this.communityWeights.add(0.0);
 48 |         }
 49 | 
 50 |         this.graphDatabase.initCommunityProperty();
 51 |     }
 52 | 
 53 |     public void computeModularity() throws ExecutionException
 54 |     {
 55 |         Random rand = new Random();
 56 |         boolean someChange = true;
 57 |         while (someChange)
 58 |         {
 59 |             someChange = false;
 60 |             boolean localChange = true;
 61 |             while (localChange)
 62 |             {
 63 |                 localChange = false;
 64 |                 int start = 0;
 65 |                 if (this.isRandomized)
 66 |                 {
 67 |                     start = Math.abs(rand.nextInt()) % this.N;
 68 |                 }
 69 |                 int step = 0;
 70 |                 for (int i = start; step < this.N; i = (i + 1) % this.N)
 71 |                 {
 72 |                     step++;
 73 |                     int bestCommunity = updateBestCommunity(i);
 74 |                     if ((this.cache.getCommunity(i) != bestCommunity) && (this.communityUpdate))
 75 |                     {
 76 | 
 77 |                         this.cache.moveNodeCommunity(i, bestCommunity);
 78 |                         this.graphDatabase.moveNode(i, bestCommunity);
 79 | 
 80 |                         double bestCommunityWeight = this.communityWeights.get(bestCommunity);
 81 | 
 82 |                         bestCommunityWeight += cache.getNodeCommunityWeight(i);
 83 |                         this.communityWeights.set(bestCommunity, bestCommunityWeight);
 84 |                         localChange = true;
 85 |                     }
 86 | 
 87 |                     this.communityUpdate = false;
 88 |                 }
 89 |                 someChange = localChange || someChange;
 90 |             }
 91 |             if (someChange)
 92 |             {
 93 |                 zoomOut();
 94 |             }
 95 |         }
 96 |     }
 97 | 
 98 |     private int updateBestCommunity(int node) throws ExecutionException
 99 |     {
100 |         int bestCommunity = 0;
101 |         double best = 0;
102 |         Set<Integer> communities = this.cache.getCommunitiesConnectedToNodeCommunities(node);
103 |         for (int community : communities)
104 |         {
105 |             double qValue = q(node, community);
106 |             if (qValue > best)
107 |             {
108 |                 best = qValue;
109 |                 bestCommunity = community;
110 |                 this.communityUpdate = true;
111 |             }
112 |         }
113 |         return bestCommunity;
114 |     }
115 | 
116 |     private double q(int nodeCommunity, int community) throws ExecutionException
117 |     {
118 |         double edgesInCommunity = this.cache.getEdgesInsideCommunity(nodeCommunity, community);
119 |         double communityWeight = this.communityWeights.get(community);
120 |         double nodeWeight = this.cache.getNodeCommunityWeight(nodeCommunity);
121 |         double qValue = this.resolution * edgesInCommunity - (nodeWeight * communityWeight)
122 |             / (2.0 * this.graphWeightSum);
123 |         int actualNodeCom = this.cache.getCommunity(nodeCommunity);
124 |         int communitySize = this.cache.getCommunitySize(community);
125 | 
126 |         if ((actualNodeCom == community) && (communitySize > 1))
127 |         {
128 |             qValue = this.resolution * edgesInCommunity - (nodeWeight * (communityWeight - nodeWeight))
129 |                 / (2.0 * this.graphWeightSum);
130 |         }
131 |         if ((actualNodeCom == community) && (communitySize == 1))
132 |         {
133 |             qValue = 0.;
134 |         }
135 |         return qValue;
136 |     }
137 | 
138 |     public void zoomOut()
139 |     {
140 |         this.N = this.graphDatabase.reInitializeCommunities();
141 |         this.cache.reInitializeCommunities();
142 |         this.communityWeights = new ArrayList<Double>(this.N);
143 |         for (int i = 0; i < this.N; i++)
144 |         {
145 |             this.communityWeights.add(graphDatabase.getCommunityWeight(i));
146 |         }
147 |     }
148 | 
149 |     public int getN()
150 |     {
151 |         return this.N;
152 |     }
153 | 
154 | }


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.benchmarks;
  2 | 
  3 | import java.io.BufferedWriter;
  4 | import java.io.FileWriter;
  5 | import java.io.IOException;
  6 | import java.util.ArrayList;
  7 | import java.util.HashMap;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | import java.util.SortedMap;
 11 | import java.util.TreeMap;
 12 | import java.util.concurrent.ExecutionException;
 13 | import java.util.concurrent.TimeUnit;
 14 | 
 15 | import org.apache.logging.log4j.Logger;
 16 | import org.apache.logging.log4j.LogManager;
 17 | 
 18 | import com.google.common.base.Stopwatch;
 19 | 
 20 | import eu.socialsensor.clustering.LouvainMethod;
 21 | import eu.socialsensor.graphdatabases.GraphDatabase;
 22 | import eu.socialsensor.main.BenchmarkConfiguration;
 23 | import eu.socialsensor.main.BenchmarkType;
 24 | import eu.socialsensor.main.BenchmarkingException;
 25 | import eu.socialsensor.main.GraphDatabaseType;
 26 | import eu.socialsensor.utils.Metrics;
 27 | import eu.socialsensor.utils.Utils;
 28 | 
 29 | /**
 30 |  * ClusteringBenchmark implementation
 31 |  * 
 32 |  * @author sotbeis, sotbeis@iti.gr
 33 |  * @author Alexander Patrikalakis
 34 |  */
 35 | public class ClusteringBenchmark extends BenchmarkBase implements RequiresGraphData
 36 | {
 37 |     private static final Logger LOG = LogManager.getLogger();
 38 |     private final List<Integer> cacheValues;
 39 | 
 40 |     public ClusteringBenchmark(BenchmarkConfiguration config)
 41 |     {
 42 |         super(config, BenchmarkType.CLUSTERING);
 43 |         this.cacheValues = new ArrayList<Integer>();
 44 |         if (config.getCacheValues() == null)
 45 |         {
 46 |             int cacheValueMultiplier = config.getCacheIncrementFactor().intValue() * config.getNodesCount();
 47 |             for (int i = 1; i <= config.getCacheValuesCount(); i++)
 48 |             {
 49 |                 cacheValues.add(i * cacheValueMultiplier);
 50 |             }
 51 |         }
 52 |         else
 53 |         {
 54 |             cacheValues.addAll(config.getCacheValues());
 55 |         }
 56 |     }
 57 | 
 58 |     @Override
 59 |     public void startBenchmarkInternal()
 60 |     {
 61 |         LOG.info("Executing Clustering Benchmark . . . .");
 62 |         SortedMap<GraphDatabaseType, Map<Integer, Double>> typeTimesMap = new TreeMap<GraphDatabaseType, Map<Integer, Double>>();
 63 |         try
 64 |         {
 65 |             for (GraphDatabaseType type : bench.getSelectedDatabases())
 66 |             {
 67 |                 typeTimesMap.put(type, clusteringBenchmark(type));
 68 |             }
 69 |         }
 70 |         catch (ExecutionException e)
 71 |         {
 72 |             throw new BenchmarkingException("Unable to run clustering benchmark: " + e.getMessage(), e);
 73 |         }
 74 | 
 75 |         try (BufferedWriter out = new BufferedWriter(new FileWriter(outputFile)))
 76 |         {
 77 |             out.write("DB,Cache Size (measured in nodes),Clustering Benchmark Time (s)\n");
 78 |             for (GraphDatabaseType type : bench.getSelectedDatabases())
 79 |             {
 80 |                 for (Integer cacheSize : typeTimesMap.get(type).keySet())
 81 |                 {
 82 |                     out.write(String.format("%s,%d,%f\n", type.getShortname(), cacheSize,
 83 |                         typeTimesMap.get(type).get(cacheSize)));
 84 |                 }
 85 |             }
 86 |         }
 87 |         catch (IOException e)
 88 |         {
 89 |             throw new BenchmarkingException("Unable to write clustering results to file");
 90 |         }
 91 |         LOG.info("Clustering Benchmark finished");
 92 |     }
 93 | 
 94 |     private SortedMap<Integer, Double> clusteringBenchmark(GraphDatabaseType type) throws ExecutionException
 95 |     {
 96 |         GraphDatabase<?,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
 97 |         graphDatabase.open();
 98 | 
 99 |         SortedMap<Integer, Double> timeMap = new TreeMap<Integer, Double>();
100 |         for (int cacheSize : cacheValues)
101 |         {
102 |             LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName()
103 |                 + ", Cache Size: " + cacheSize);
104 | 
105 |             Stopwatch watch = new Stopwatch();
106 |             watch.start();
107 |             LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering());
108 |             louvainMethodCache.computeModularity();
109 |             timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0);
110 | 
111 |             // evaluation with NMI
112 |             Map<Integer, List<Integer>> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getN());
113 |             Map<Integer, List<Integer>> actualCommunities = mapNodesToCommunities(Utils.readTabulatedLines(
114 |                 bench.getActualCommunitiesFile(), 4 /* numberOfLinesToSkip */));
115 |             Metrics metrics = new Metrics();
116 |             double NMI = metrics.normalizedMutualInformation(bench.getNodesCount(), actualCommunities,
117 |                 predictedCommunities);
118 |             LOG.info("NMI value: " + NMI);
119 |         }
120 |         graphDatabase.shutdown();
121 |         return timeMap;
122 |     }
123 | 
124 |     private static Map<Integer, List<Integer>> mapNodesToCommunities(List<List<String>> tabulatedLines)
125 |     {
126 |         Map<Integer, List<Integer>> communities = new HashMap<Integer, List<Integer>>();
127 |         // http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760
128 |         // the format of the communityNNNN.dat files have node and community
129 |         // number separated by a tab.
130 |         // community number starts at 1 and not zero.
131 |         for (List<String> line : tabulatedLines)
132 |         {
133 |             int node = Integer.valueOf(line.get(0));
134 |             int community = Integer.valueOf(line.get(1).trim()) - 1;
135 |             if (!communities.containsKey(community))
136 |             {
137 |                 communities.put(community, new ArrayList<Integer>());
138 |             }
139 |             communities.get(community).add(node);
140 |         }
141 |         return communities;
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/clustering/Cache.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.clustering;
  2 | 
  3 | import java.util.HashSet;
  4 | import java.util.Set;
  5 | import java.util.concurrent.ExecutionException;
  6 | 
  7 | import com.google.common.cache.CacheBuilder;
  8 | import com.google.common.cache.CacheLoader;
  9 | import com.google.common.cache.LoadingCache;
 10 | 
 11 | import eu.socialsensor.graphdatabases.GraphDatabase;
 12 | 
 13 | /**
 14 |  * Cache implementation for the temporary storage of required information of the
 15 |  * graph databases in order to execute the Louvain Method
 16 |  * 
 17 |  * @author sotbeis
 18 |  * @email sotbeis@iti.gr
 19 |  */
 20 | public class Cache
 21 | {
 22 | 
 23 |     LoadingCache<Integer, Set<Integer>> nodeCommunitiesMap; // key=nodeCommunity
 24 |                                                             // value=nodeIds
 25 |                                                             // contained in
 26 |                                                             // nodeCommunityC
 27 |     LoadingCache<Integer, Set<Integer>> communitiesMap; // key=community
 28 |                                                         // value=nodeIds
 29 |                                                         // contained in
 30 |                                                         // community
 31 |     LoadingCache<Integer, Integer> nodeCommunitiesToCommunities; // key=nodeCommunity
 32 |                                                                  // value=community
 33 |     LoadingCache<Integer, Set<Integer>> nodeNeighbours; // key=nodeId
 34 |                                                         // value=nodeId
 35 |                                                         // neighbors
 36 |     LoadingCache<Integer, Integer> nodeToCommunityMap; // key=nodeId
 37 |                                                        // value=communityId
 38 | 
 39 |     public Cache(final GraphDatabase<?,?,?,?> graphDatabase, int cacheSize) throws ExecutionException
 40 |     {
 41 |         nodeNeighbours = CacheBuilder.newBuilder().maximumSize(cacheSize)
 42 |             .build(new CacheLoader<Integer, Set<Integer>>() {
 43 |                 public Set<Integer> load(Integer nodeId)
 44 |                 {
 45 |                     return graphDatabase.getNeighborsIds(nodeId);
 46 |                 }
 47 |             });
 48 | 
 49 |         nodeCommunitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
 50 |             .build(new CacheLoader<Integer, Set<Integer>>() {
 51 |                 public Set<Integer> load(Integer nodeCommunityId)
 52 |                 {
 53 |                     return graphDatabase.getNodesFromNodeCommunity(nodeCommunityId);
 54 |                 }
 55 |             });
 56 | 
 57 |         communitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
 58 |             .build(new CacheLoader<Integer, Set<Integer>>() {
 59 |                 public Set<Integer> load(Integer communityId)
 60 |                 {
 61 |                     return graphDatabase.getNodesFromCommunity(communityId);
 62 |                 }
 63 |             });
 64 | 
 65 |         nodeToCommunityMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
 66 |             .build(new CacheLoader<Integer, Integer>() {
 67 |                 public Integer load(Integer nodeId)
 68 |                 {
 69 |                     return graphDatabase.getCommunityFromNode(nodeId);
 70 |                 }
 71 |             });
 72 | 
 73 |         nodeCommunitiesToCommunities = CacheBuilder.newBuilder().maximumSize(cacheSize)
 74 |             .build(new CacheLoader<Integer, Integer>() {
 75 |                 public Integer load(Integer nodeCommunity)
 76 |                 {
 77 |                     return graphDatabase.getCommunity(nodeCommunity);
 78 |                 }
 79 |             });
 80 |     }
 81 | 
 82 |     public Set<Integer> getCommunitiesConnectedToNodeCommunities(int nodeCommunity) throws ExecutionException
 83 |     {
 84 |         Set<Integer> nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity);
 85 |         Set<Integer> communities = new HashSet<Integer>();
 86 |         for (int nodeFromNodeCommunity : nodesFromNodeCommunity)
 87 |         {
 88 |             Set<Integer> neighbors = nodeNeighbours.get(nodeFromNodeCommunity);
 89 |             for (int neighbor : neighbors)
 90 |             {
 91 |                 communities.add(nodeToCommunityMap.get(neighbor));
 92 |             }
 93 |         }
 94 |         return communities;
 95 |     }
 96 | 
 97 |     public void moveNodeCommunity(int nodeCommunity, int toCommunity) throws ExecutionException
 98 |     {
 99 |         int fromCommunity = nodeCommunitiesToCommunities.get(nodeCommunity);
100 |         nodeCommunitiesToCommunities.put(nodeCommunity, toCommunity);
101 |         Set<Integer> nodesFromCommunity = communitiesMap.get(fromCommunity);
102 |         communitiesMap.invalidate(fromCommunity);
103 |         communitiesMap.get(toCommunity).addAll(nodesFromCommunity);
104 |         Set<Integer> nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity);
105 |         for (int nodeFromCommunity : nodesFromNodeCommunity)
106 |         {
107 |             nodeToCommunityMap.put(nodeFromCommunity, toCommunity);
108 |         }
109 |     }
110 | 
111 |     public double getNodeCommunityWeight(int nodeCommunity) throws ExecutionException
112 |     {
113 |         Set<Integer> nodes = nodeCommunitiesMap.get(nodeCommunity);
114 |         double weight = 0;
115 |         for (int node : nodes)
116 |         {
117 |             weight += nodeNeighbours.get(node).size();
118 |         }
119 |         return weight;
120 |     }
121 | 
122 |     public int getCommunity(int community) throws ExecutionException
123 |     {
124 |         return nodeCommunitiesToCommunities.get(community);
125 |     }
126 | 
127 |     public int getCommunitySize(int community) throws ExecutionException
128 |     {
129 |         return communitiesMap.get(community).size();
130 |     }
131 | 
132 |     public double getEdgesInsideCommunity(int nodeCommunity, int community) throws ExecutionException
133 |     {
134 |         Set<Integer> nodeCommunityNodes = nodeCommunitiesMap.get(nodeCommunity);
135 |         Set<Integer> communityNodes = communitiesMap.get(community);
136 |         double edges = 0;
137 |         for (int nodeCommunityNode : nodeCommunityNodes)
138 |         {
139 |             for (int communityNode : communityNodes)
140 |             {
141 |                 if (nodeNeighbours.get(nodeCommunityNode).contains(communityNode))
142 |                 {
143 |                     edges++;
144 |                 }
145 |             }
146 |         }
147 |         return edges;
148 |     }
149 | 
150 |     public void reInitializeCommunities()
151 |     {
152 |         nodeCommunitiesMap.invalidateAll();
153 |         communitiesMap.invalidateAll();
154 |         nodeToCommunityMap.invalidateAll();
155 |         nodeCommunitiesToCommunities.invalidateAll();
156 |     }
157 | 
158 | }
159 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.graphdatabases;
  2 | 
  3 | import java.io.File;
  4 | import java.util.List;
  5 | import java.util.Map;
  6 | import java.util.Set;
  7 | 
  8 | /**
  9 |  * Represents a graph database
 10 |  * 
 11 |  * @author sotbeis, sotbeis@iti.gr
 12 |  * @author Alexander Patrikalakis
 13 |  */
 14 | public interface GraphDatabase<VertexIteratorType, EdgeIteratorType, VertexType, EdgeType>
 15 | {
 16 |     //edge and vertex operations
 17 |     public VertexType getOtherVertexFromEdge(EdgeType r, VertexType oneVertex);
 18 |     public VertexType getSrcVertexFromEdge(EdgeType edge);
 19 |     public VertexType getDestVertexFromEdge(EdgeType edge);
 20 |     public VertexType getVertex(Integer i);
 21 |     
 22 |     //edge iterators
 23 |     public EdgeIteratorType getAllEdges();
 24 |     public EdgeIteratorType getNeighborsOfVertex(VertexType v);
 25 |     public boolean edgeIteratorHasNext(EdgeIteratorType it);
 26 |     public EdgeType nextEdge(EdgeIteratorType it);
 27 |     public void cleanupEdgeIterator(EdgeIteratorType it);
 28 |     
 29 |     //vertex iterators
 30 |     public VertexIteratorType getVertexIterator();
 31 |     public boolean vertexIteratorHasNext(VertexIteratorType it);
 32 |     public VertexType nextVertex(VertexIteratorType it);
 33 |     public void cleanupVertexIterator(VertexIteratorType it);
 34 |     
 35 |     //benchmarks
 36 |     public void findAllNodeNeighbours();
 37 |     public void findNodesOfAllEdges();
 38 | 
 39 |     /**
 40 |      * Opens the graph database
 41 |      * 
 42 |      * @param dbPath
 43 |      *            - database path
 44 |      */
 45 |     public void open();
 46 | 
 47 |     /**
 48 |      * Creates a graph database and configures for single data insertion
 49 |      * 
 50 |      * @param dbPath
 51 |      *            - database path
 52 |      */
 53 |     public void createGraphForSingleLoad();
 54 | 
 55 |     /**
 56 |      * Inserts data in massive mode
 57 |      * 
 58 |      * @param dataPath
 59 |      *            - dataset path
 60 |      */
 61 |     public void massiveModeLoading(File dataPath);
 62 | 
 63 |     /**
 64 |      * Inserts data in single mode
 65 |      * 
 66 |      * @param dataPath
 67 |      *            - dataset path
 68 |      * @param resultsPath
 69 |      * @param scenarioNumber
 70 |      */
 71 |     public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber);
 72 | 
 73 |     /**
 74 |      * Creates a graph database and configures for bulk data insertion
 75 |      * 
 76 |      * @param dataPath
 77 |      *            - dataset path
 78 |      */
 79 |     public void createGraphForMassiveLoad();
 80 | 
 81 |     /**
 82 |      * Shut down the graph database
 83 |      */
 84 |     public void shutdown();
 85 | 
 86 |     /**
 87 |      * Delete the graph database
 88 |      */
 89 |     public void delete();
 90 | 
 91 |     /**
 92 |      * Shutdown the graph database, which configuration is for massive insertion
 93 |      * of data
 94 |      */
 95 |     public void shutdownMassiveGraph();
 96 | 
 97 |     /**
 98 |      * Find the shortest path between vertex 1 and each of the vertexes in the list
 99 |      * 
100 |      * @param nodes
101 |      *            any number of random nodes
102 |      */
103 |     public void shortestPaths(Set<Integer> nodes);
104 |     
105 |     /**
106 |      * Execute findShortestPaths query from the Query interface
107 |      * 
108 |      * @param nodes
109 |      *            any number of random nodes
110 |      */
111 |     public void shortestPath(final VertexType fromNode, Integer node);
112 | 
113 |     /**
114 |      * @return the number of nodes
115 |      */
116 |     public int getNodeCount();
117 | 
118 |     /**
119 |      * @param nodeId
120 |      * @return the neighbours of a particular node
121 |      */
122 |     public Set<Integer> getNeighborsIds(int nodeId);
123 | 
124 |     /**
125 |      * @param nodeId
126 |      * @return the node degree
127 |      */
128 |     public double getNodeWeight(int nodeId);
129 | 
130 |     /**
131 |      * Initializes the community and nodeCommunity property in each database
132 |      */
133 |     public void initCommunityProperty();
134 | 
135 |     /**
136 |      * @param nodeCommunities
137 |      * @return the communities (communityId) that are connected with a
138 |      *         particular nodeCommunity
139 |      */
140 |     public Set<Integer> getCommunitiesConnectedToNodeCommunities(int nodeCommunities);
141 | 
142 |     /**
143 |      * @param community
144 |      * @return the nodes a particular community contains
145 |      */
146 |     public Set<Integer> getNodesFromCommunity(int community);
147 | 
148 |     /**
149 |      * @param nodeCommunity
150 |      * @return the nodes a particular nodeCommunity contains
151 |      */
152 |     public Set<Integer> getNodesFromNodeCommunity(int nodeCommunity);
153 | 
154 |     /**
155 |      * @param nodeCommunity
156 |      * @param communityNodes
157 |      * @return the number of edges between a community and a nodeCommunity
158 |      */
159 |     public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes);
160 | 
161 |     /**
162 |      * @param community
163 |      * @return the sum of node degrees
164 |      */
165 |     public double getCommunityWeight(int community);
166 | 
167 |     /**
168 |      * @param nodeCommunity
169 |      * @return the sum of node degrees
170 |      */
171 |     public double getNodeCommunityWeight(int nodeCommunity);
172 | 
173 |     /**
174 |      * Moves a node from a community to another
175 |      * 
176 |      * @param from
177 |      * @param to
178 |      */
179 |     public void moveNode(int from, int to);
180 | 
181 |     /**
182 |      * @return the number of edges of the graph database
183 |      */
184 |     public double getGraphWeightSum();
185 | 
186 |     /**
187 |      * Reinitializes the community and nodeCommunity property
188 |      * 
189 |      * @return the number of communities
190 |      */
191 |     public int reInitializeCommunities();
192 | 
193 |     /**
194 |      * @param nodeId
195 |      * @return in which community a particular node belongs
196 |      */
197 |     public int getCommunityFromNode(int nodeId);
198 | 
199 |     /**
200 |      * @param nodeCommunity
201 |      * @return in which community a particular nodeCommunity belongs
202 |      */
203 |     public int getCommunity(int nodeCommunity);
204 | 
205 |     /**
206 |      * @param community
207 |      * @return the number of nodeCommunities a particular community contains
208 |      */
209 |     public int getCommunitySize(int community);
210 | 
211 |     /**
212 |      * @param numberOfCommunities
213 |      * @return a map where the key is the community id and the value is the
214 |      *         nodes each community has.
215 |      */
216 |     public Map<Integer, List<Integer>> mapCommunities(int numberOfCommunities);
217 | 
218 |     /**
219 |      * 
220 |      * @param nodeId
221 |      * @return return true if node exist, false if not
222 |      */
223 |     public boolean nodeExists(int nodeId);
224 | }
225 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.main;
  2 | 
  3 | import eu.socialsensor.benchmarks.Benchmark;
  4 | import eu.socialsensor.benchmarks.ClusteringBenchmark;
  5 | import eu.socialsensor.benchmarks.DeleteGraphBenchmark;
  6 | import eu.socialsensor.benchmarks.FindNeighboursOfAllNodesBenchmark;
  7 | import eu.socialsensor.benchmarks.FindNodesOfAllEdgesBenchmark;
  8 | import eu.socialsensor.benchmarks.FindShortestPathBenchmark;
  9 | import eu.socialsensor.benchmarks.MassiveInsertionBenchmark;
 10 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark;
 11 | 
 12 | import org.apache.commons.configuration.Configuration;
 13 | import org.apache.commons.configuration.ConfigurationException;
 14 | import org.apache.commons.configuration.PropertiesConfiguration;
 15 | import org.apache.commons.io.FileDeleteStrategy;
 16 | import org.apache.logging.log4j.Logger;
 17 | import org.apache.logging.log4j.LogManager;
 18 | 
 19 | import com.codahale.metrics.CsvReporter;
 20 | import com.codahale.metrics.MetricFilter;
 21 | import com.codahale.metrics.MetricRegistry;
 22 | import com.codahale.metrics.graphite.Graphite;
 23 | import com.codahale.metrics.graphite.GraphiteReporter;
 24 | 
 25 | import java.io.File;
 26 | import java.io.IOException;
 27 | import java.net.InetSocketAddress;
 28 | import java.net.URL;
 29 | import java.util.Locale;
 30 | import java.util.concurrent.ExecutionException;
 31 | import java.util.concurrent.TimeUnit;
 32 | 
 33 | /**
 34 |  * Main class for the execution of GraphDatabaseBenchmark.
 35 |  * 
 36 |  * @author sotbeis, sotbeis@iti.gr
 37 |  * @author Alexander Patrikalakis
 38 |  */
 39 | public class GraphDatabaseBenchmark
 40 | {
 41 |     public static final Logger logger = LogManager.getLogger();
 42 |     public static final MetricRegistry metrics = new MetricRegistry();
 43 |     public static final String DEFAULT_INPUT_PROPERTIES = "META-INF/input.properties";
 44 |     private final BenchmarkConfiguration config;
 45 | 
 46 |     public static final Configuration getAppconfigFromClasspath()
 47 |     {
 48 |         Configuration appconfig;
 49 |         try
 50 |         {
 51 |             ClassLoader classLoader = GraphDatabaseBenchmark.class.getClassLoader();
 52 |             URL resource = classLoader.getResource(DEFAULT_INPUT_PROPERTIES);
 53 |             appconfig = new PropertiesConfiguration(resource);
 54 |         }
 55 |         catch (ConfigurationException e)
 56 |         {
 57 |             throw new IllegalArgumentException(String.format(
 58 |                 "Unable to load properties file from classpath because %s", e.getMessage()));
 59 |         }
 60 |         return appconfig;
 61 |     }
 62 | 
 63 |     public GraphDatabaseBenchmark(String inputPath) throws IllegalArgumentException
 64 |     {
 65 |         final Configuration appconfig;
 66 |         try
 67 |         {
 68 |             appconfig = inputPath == null ? getAppconfigFromClasspath() : new PropertiesConfiguration(new File(
 69 |                 inputPath));
 70 |         }
 71 |         catch (ConfigurationException e)
 72 |         {
 73 |             throw new IllegalArgumentException(String.format("Unable to load properties file %s because %s", inputPath,
 74 |                 e.getMessage()));
 75 |         }
 76 |         config = new BenchmarkConfiguration(appconfig);
 77 |         if(config.publishCsvMetrics()) {
 78 |             final CsvReporter reporter = CsvReporter.forRegistry(metrics)
 79 |                 .formatFor(Locale.US)
 80 |                 .convertRatesTo(TimeUnit.SECONDS)
 81 |                 .convertDurationsTo(TimeUnit.MILLISECONDS)
 82 |                 .build(config.getCsvDir());
 83 |             reporter.start(config.getCsvReportingInterval(), TimeUnit.MILLISECONDS);
 84 |         }
 85 |         if(config.publishGraphiteMetrics()) {
 86 |             final Graphite graphite = new Graphite(new InetSocketAddress(config.getGraphiteHostname(), 80 /*port*/));
 87 |             final GraphiteReporter reporter = GraphiteReporter.forRegistry(metrics)
 88 |                                                               .convertRatesTo(TimeUnit.SECONDS)
 89 |                                                               .convertDurationsTo(TimeUnit.MILLISECONDS)
 90 |                                                               .filter(MetricFilter.ALL)
 91 |                                                               .build(graphite);
 92 |             reporter.start(config.getGraphiteReportingInterval(), TimeUnit.MILLISECONDS);
 93 |         }
 94 |     }
 95 | 
 96 |     public void run()
 97 |     {
 98 |         //MetricRegistry registry = MetricRegistry.name(klass, names)
 99 |         for (BenchmarkType type : config.getBenchmarkTypes())
100 |         {
101 |             runBenchmark(type);
102 |         }
103 |     }
104 | 
105 |     private final void runBenchmark(BenchmarkType type)
106 |     {
107 |         final Benchmark benchmark;
108 |         logger.info(type.longname() + " Benchmark Selected");
109 |         switch (type)
110 |         {
111 |             case MASSIVE_INSERTION:
112 |                 benchmark = new MassiveInsertionBenchmark(config);
113 |                 break;
114 |             case SINGLE_INSERTION:
115 |                 benchmark = new SingleInsertionBenchmark(config);
116 |                 break;
117 |             case FIND_ADJACENT_NODES:
118 |                 benchmark = new FindNodesOfAllEdgesBenchmark(config);
119 |                 break;
120 |             case CLUSTERING:
121 |                 benchmark = new ClusteringBenchmark(config);
122 |                 break;
123 |             case FIND_NEIGHBOURS:
124 |                 benchmark = new FindNeighboursOfAllNodesBenchmark(config);
125 |                 break;
126 |             case FIND_SHORTEST_PATH:
127 |                 benchmark = new FindShortestPathBenchmark(config);
128 |                 break;
129 |             case DELETION:
130 |                 benchmark = new DeleteGraphBenchmark(config);
131 |                 break;
132 |             default:
133 |                 throw new UnsupportedOperationException("unsupported benchmark " + type == null ? "null"
134 |                     : type.toString());
135 |         }
136 |         benchmark.startBenchmark();
137 |     }
138 | 
139 |     /**
140 |      * This is the main function. Set the proper property file and run
141 |      * 
142 |      * @throws ExecutionException
143 |      */
144 |     public static void main(String[] args) throws ExecutionException
145 |     {
146 |         final String inputPath = args.length != 1 ? null : args[0];
147 |         GraphDatabaseBenchmark benchmarks = new GraphDatabaseBenchmark(inputPath);
148 |         try
149 |         {
150 |             benchmarks.run();
151 |         }
152 |         catch (Throwable t)
153 |         {
154 |             logger.fatal(t.getMessage());
155 |             System.exit(1);
156 |         }
157 |         System.exit(0);
158 |     }
159 | 
160 |     public void cleanup()
161 |     {
162 |         try
163 |         {
164 |             FileDeleteStrategy.FORCE.delete(config.getDbStorageDirectory());
165 |         }
166 |         catch (IOException e)
167 |         {
168 |             logger.fatal("Unable to clean up db storage directory: " + e.getMessage());
169 |             System.exit(1);
170 |         }
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.graphdatabases;
  2 | 
  3 | import java.io.File;
  4 | import java.util.Set;
  5 | 
  6 | import org.neo4j.graphdb.Transaction;
  7 | import org.neo4j.kernel.GraphDatabaseAPI;
  8 | 
  9 | import com.codahale.metrics.MetricRegistry;
 10 | import com.codahale.metrics.Timer;
 11 | 
 12 | import eu.socialsensor.main.GraphDatabaseBenchmark;
 13 | import eu.socialsensor.main.GraphDatabaseType;
 14 | 
 15 | @SuppressWarnings("deprecation")
 16 | public abstract class GraphDatabaseBase<VertexIteratorType, EdgeIteratorType, VertexType, EdgeType> implements GraphDatabase<VertexIteratorType, EdgeIteratorType, VertexType, EdgeType>
 17 | {
 18 |     public static final String SIMILAR = "similar";
 19 |     public static final String QUERY_CONTEXT = ".eu.socialsensor.query.";
 20 |     public static final String NODE_ID = "nodeId";
 21 |     public static final String NODE_COMMUNITY = "nodeCommunity";
 22 |     public static final String COMMUNITY = "community";
 23 |     protected final File dbStorageDirectory;
 24 |     protected final MetricRegistry metrics = new MetricRegistry();
 25 |     protected final GraphDatabaseType type;
 26 |     private final Timer nextVertexTimes;
 27 |     private final Timer getNeighborsOfVertexTimes;
 28 |     private final Timer nextEdgeTimes;
 29 |     private final Timer getOtherVertexFromEdgeTimes;
 30 |     private final Timer getAllEdgesTimes;
 31 |     private final Timer shortestPathTimes;
 32 | 
 33 |     protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory)
 34 |     {
 35 |         this.type = type;
 36 |         final String queryTypeContext = type.getShortname() + QUERY_CONTEXT;
 37 |         this.nextVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextVertex");
 38 |         this.getNeighborsOfVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getNeighborsOfVertex");
 39 |         this.nextEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextEdge");
 40 |         this.getOtherVertexFromEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getOtherVertexFromEdge");
 41 |         this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges");
 42 |         this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath");
 43 |         
 44 |         this.dbStorageDirectory = dbStorageDirectory;
 45 |         if (!this.dbStorageDirectory.exists())
 46 |         {
 47 |             this.dbStorageDirectory.mkdirs();
 48 |         }
 49 |     }
 50 |     
 51 |     @Override
 52 |     public void findAllNodeNeighbours() {
 53 |         //get the iterator
 54 |         Object tx = null;
 55 |         if(GraphDatabaseType.NEO4J == type) { //TODO fix this
 56 |             tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
 57 |         }
 58 |         try {
 59 |             VertexIteratorType vertexIterator =  this.getVertexIterator();
 60 |             while(vertexIteratorHasNext(vertexIterator)) {
 61 |                 VertexType vertex;
 62 |                 Timer.Context ctxt = nextVertexTimes.time();
 63 |                 try {
 64 |                     vertex = nextVertex(vertexIterator);
 65 |                 } finally {
 66 |                     ctxt.stop();
 67 |                 }
 68 |                 
 69 |                 final EdgeIteratorType edgeNeighborIterator;
 70 |                 ctxt = getNeighborsOfVertexTimes.time();
 71 |                 try {
 72 |                     edgeNeighborIterator = this.getNeighborsOfVertex(vertex);
 73 |                 } finally {
 74 |                     ctxt.stop();
 75 |                 }
 76 |                 while(edgeIteratorHasNext(edgeNeighborIterator)) {
 77 |                     EdgeType edge;
 78 |                     ctxt = nextEdgeTimes.time();
 79 |                     try {
 80 |                         edge = nextEdge(edgeNeighborIterator);
 81 |                     } finally {
 82 |                         ctxt.stop();
 83 |                     }
 84 |                     @SuppressWarnings("unused")
 85 |                     Object other;
 86 |                     ctxt = getOtherVertexFromEdgeTimes.time();
 87 |                     try {
 88 |                         other = getOtherVertexFromEdge(edge, vertex);
 89 |                     } finally {
 90 |                         ctxt.stop();
 91 |                     }
 92 |                 }
 93 |                 this.cleanupEdgeIterator(edgeNeighborIterator);
 94 |             }
 95 |             this.cleanupVertexIterator(vertexIterator);
 96 |             if(this instanceof Neo4jGraphDatabase) {
 97 |                 ((Transaction) tx).success();
 98 |             }
 99 |         } finally {//TODO fix this
100 |             if(GraphDatabaseType.NEO4J == type) {
101 |                 ((Transaction) tx).finish();
102 |             }
103 |         }
104 |     }
105 |     
106 |     @Override
107 |     public void findNodesOfAllEdges() {
108 |         Object tx = null;
109 |         if(GraphDatabaseType.NEO4J == type) {//TODO fix this
110 |             tx = ((GraphDatabaseAPI) ((Neo4jGraphDatabase) this).neo4jGraph).tx().unforced().begin();
111 |         }
112 |         try {
113 |             
114 |             EdgeIteratorType edgeIterator;
115 |             Timer.Context ctxt = getAllEdgesTimes.time();
116 |             try {
117 |                 edgeIterator = this.getAllEdges();
118 |             } finally {
119 |                 ctxt.stop();
120 |             }
121 |             
122 |             while(edgeIteratorHasNext(edgeIterator)) {
123 |                 EdgeType edge;
124 |                 ctxt = nextEdgeTimes.time();
125 |                 try {
126 |                     edge = nextEdge(edgeIterator);
127 |                 } finally {
128 |                     ctxt.stop();
129 |                 }
130 |                 @SuppressWarnings("unused")
131 |                 VertexType source = this.getSrcVertexFromEdge(edge);
132 |                 @SuppressWarnings("unused")
133 |                 VertexType destination = this.getDestVertexFromEdge(edge);
134 |             }
135 |         } finally {//TODO fix this
136 |             if(GraphDatabaseType.NEO4J == type) {
137 |                 ((Transaction) tx).close();
138 |             }
139 |         }
140 |     }
141 |     
142 |     @Override
143 |     public void shortestPaths(Set<Integer> nodes) {
144 |         Object tx = null;
145 |         if(GraphDatabaseType.NEO4J == type) {//TODO fix this
146 |             tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
147 |         }
148 |         try {
149 |             //TODO(amcp) change this to use 100+1 random node list and then to use a sublist instead of always choosing node # 1
150 |             VertexType from = getVertex(1);
151 |             Timer.Context ctxt;
152 |             for(Integer i : nodes) {
153 |                 //time this
154 |                 ctxt = shortestPathTimes.time();
155 |                 try {
156 |                     shortestPath(from, i);
157 |                 } finally {
158 |                     ctxt.stop();
159 |                 }
160 |             }
161 |             if(this instanceof Neo4jGraphDatabase) {
162 |                 ((Transaction) tx).success();
163 |             }
164 |         } finally {//TODO fix this
165 |             if(GraphDatabaseType.NEO4J == type) {
166 |                 ((Transaction) tx).finish();
167 |             }
168 |         }
169 |     }
170 | }
171 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | graphdb-benchmarks
  2 | ==================
  3 | The project graphdb-benchmarks is a benchmark between popular graph dataases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/) and [Sparksee](http://www.sparsity-technologies.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems.
  4 | 
  5 | - *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge.
  6 | - *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph.
  7 | - *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular dataset. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges.
  8 | - *Query Workload (QW)*: we execute three common queries:
  9 |   * FindNeighbours (FN): finds the neighbours of all nodes.
 10 |   * FindAdjacentNodes (FA): finds the adjacent nodes of all edges.
 11 |   * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes.
 12 | 
 13 | Here we measure the execution time of each query.
 14 | 
 15 | For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP dataset collection ([Enron Dataset](http://snap.stanford.edu/data/email-Enron.html), [Amazon dataset](http://snap.stanford.edu/data/amazon0601.html), [Youtube dataset](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal dataset](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded form [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760).
 16 | 
 17 | For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation).
 18 | 
 19 | **Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database.
 20 | 
 21 | **Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf).
 22 | 
 23 | **Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Blueprints 2.5 and added support for the DynamoDB Storage Backend for Titan.
 24 | 
 25 | Instructions
 26 | ------------
 27 | To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn dependency:copy-dependencies && mvn test -Pbench` to execute the benchmarking run.
 28 | 
 29 | Results
 30 | -------
 31 | This section contains the results of each benchmark. All the measurements are in seconds.
 32 | 
 33 | 
 34 | ####CW results
 35 | Below we list the results of the CW for graphs with 1,000, 5,000, 10,0000, 20,000, 30,000, 40,000, 50,000 nodes.
 36 | 
 37 | | Graph-Cache | Titan | OrientDB | Neo4j |
 38 | | ----------- | ----- | -------- | ----- |
 39 | |Graph1k-5%   |2.39   |**0.92**  |2.46   |
 40 | |Graph1k-10%  |1.45   |**0.59**  |2.07   |
 41 | |Graph1k-15%  |1.30   |**0.58**  |1.88   |
 42 | |Graph1k-20%  |1.25   |**0.55**  |1.72   |
 43 | |Graph1k-25%  |1.19   |**0.49**  |1.67   |
 44 | |Graph1k-30%  |1.15   |**0.48**  |1.55   |
 45 | |                                        |
 46 | |Graph5k-5%   |16.01  |**5.88**  |12.80  |
 47 | |Graph5k-10%  |15.10  |**5.67**  |12.13  |
 48 | |Graph5k-15%  |14.63  |**4.81**  |11.91  |
 49 | |Graph5k-20%  |14.16  |**4.62**  |11.68  |
 50 | |Graph5k-25%  |13.76  |**4.51**  |11.31  |
 51 | |Graph5k-30%  |13.38  |**4.45**  |10.94  |
 52 | |                                        |
 53 | |Graph10k-5%  |46.06  |**18.20** |34.05  |
 54 | |Graph10k-10% |44.59  |**17.92** |32.88  |
 55 | |Graph10k-15% |43.68  |**17.31** |31.91  |
 56 | |Graph10k-20% |42.48  |**16.88** |31.01  |
 57 | |Graph10k-25% |41.32  |**16.58** |30.74  |
 58 | |Graph10k-30% |39.98  |**16.34** |30.13  |
 59 | |                                        | 
 60 | |Graph20k-5%  |140.46 |**54.01** |87.04  |
 61 | |Graph20k-10% |138.10 |**52.51** |85.49  |
 62 | |Graph20k-15% |137.25 |**52.12** |82.88  |
 63 | |Graph20k-20% |133.11 |**51.68** |82.16  |
 64 | |Graph20k-25% |122.48 |**50.79** |79.87  |
 65 | |Graph20k-30% |120.94 |**50.49** |78.81  |
 66 | |                                        |
 67 | |Graph30k-5%  |310.25 |**96.38** |154.60 |
 68 | |Graph30k-10% |301.80 |**94.98** |151.81 |
 69 | |Graph30k-15% |299.27 |**94.85** |151.12 |
 70 | |Graph30k-20% |296.43 |**94.67** |146.25 |
 71 | |Graph30k-25% |294.33 |**92.62** |144.08 |
 72 | |Graph30k-30% |288.50 |**90.13** |142.33 |
 73 | |                                        |
 74 | |Graph40k-5%  |533.29 |**201.19**|250.79 |
 75 | |Graph40k-10% |505.91 |**199.18**|244.79 |
 76 | |Graph40k-15% |490.39 |**194.34**|242.55 |
 77 | |Graph40k-20% |478.31 |**183.14**|241.47 |
 78 | |Graph40k-25% |467.18 |**177.55**|237.29 |
 79 | |Graph40k-30% |418.07 |**174.65**|229.65 |
 80 | |                                        |
 81 | |Graph50k-5%  |642.42 |**240.58**|348.33 |
 82 | |Graph50k-10% |624.36 |**238.35**|344.06 |
 83 | |Graph50k-15% |611.70 |**237.65**|340.20 |
 84 | |Graph50k-20% |610.40 |**230.76**|337.36 |
 85 | |Graph50k-25% |596.29 |**230.03**|332.01 |
 86 | |Graph50k-30% |580.44 |**226.31**|325.88 |
 87 | 
 88 | 
 89 | ####MIW & QW results
 90 | Below we list the results of MIW and QW for each dataset.
 91 | 
 92 | | Dataset | Workload | Titan | OrientDB | Neo4j    |
 93 | | ------- | -------- | ----- | -------- | -----    |
 94 | |   EN    |    MIW   |9.36   |62.77     |**6.77**  |
 95 | |   AM    |    MIW   |34.00  |97.00     |**10.61** |
 96 | |   YT    |    MIW   |104.27 |252.15    |**24.69** |
 97 | |   LJ    |    MIW   |663.03 |9416.74   |**349.55**|
 98 | |                                                  |
 99 | |   EN    |  QW-FN   |1.87   |**0.56**  |0.95      |
100 | |   AM    |  QW-FN   |6.47   |3.50      |**1.85**  |
101 | |   YT    |  QW-FN   |20.71  |9.34      |**4.51**  |
102 | |   LJ    |  QW-FN   |213.41 |303.09    |**47.07** |
103 | |                                                  |
104 | |   EN    |  QW-FA   |3.78   |0.71      |**0.16**  |
105 | |   AM    |  QW-FA   |13.77  |2.30      |**0.36**  |
106 | |   YT    |  QW-FA   |42.82  |6.15      |**1.46**  |
107 | |   LJ    |  QW-FA   |460.25 |518.12    |**16.53** |
108 | |                                                  |
109 | |   EN    |  QW-FS   |1.63   |3.09      |**0.16**  |
110 | |   AM    |  QW-FS   |0.12   |83.29     |**0.302** |
111 | |   YT    |  QW-FS   |24.87  |23.47     |**0.08**  |
112 | |   LJ    |  QW-FS   |123.50 |86.87     |**18.13** |
113 | 
114 | 
115 | ####SIW results
116 | Below we list the results of SIW for each dataset.
117 | 
118 | ![siw_benchmark_updated](https://cloud.githubusercontent.com/assets/8163869/12272282/62b1c9f4-b914-11e5-85be-efd3f58e1e05.png)
119 | <!---
120 | ![alt text](https://raw.githubusercontent.com/socialsensor/graphdb-benchmarks/master/images/SIWEnron.png "Logo Title Text 1")
121 | ![alt text](https://raw.githubusercontent.com/socialsensor/graphdb-benchmarks/master/images/SIWAmazon.png "Logo2 Title Text 1")
122 | ![alt text](https://raw.githubusercontent.com/socialsensor/graphdb-benchmarks/master/images/SIWYoutube.png "Logo Title Text 1")
123 | ![alt text](https://raw.githubusercontent.com/socialsensor/graphdb-benchmarks/master/images/SIWLivejournal.png "Logo4 Title Text 1")
124 | --->
125 | 
126 | Contact
127 | -------
128 | For more information or support, please contact: sotbeis@iti.gr, sot.beis@gmail.com, papadop@iti.gr or amcp@me.com.
129 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/Utils.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.utils;
  2 | 
  3 | import java.io.BufferedWriter;
  4 | import java.io.File;
  5 | import java.io.FileWriter;
  6 | import java.io.IOException;
  7 | import java.util.ArrayList;
  8 | import java.util.Arrays;
  9 | import java.util.LinkedList;
 10 | import java.util.List;
 11 | import java.util.Map;
 12 | import java.util.Map.Entry;
 13 | import java.util.SortedMap;
 14 | import java.util.TreeMap;
 15 | 
 16 | import org.apache.commons.io.FileUtils;
 17 | import org.apache.commons.io.LineIterator;
 18 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
 19 | import org.apache.commons.math3.util.MathArrays;
 20 | import org.apache.logging.log4j.LogManager;
 21 | import org.apache.logging.log4j.Logger;
 22 | 
 23 | import eu.socialsensor.graphdatabases.GraphDatabase;
 24 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
 25 | import eu.socialsensor.graphdatabases.OrientGraphDatabase;
 26 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
 27 | import eu.socialsensor.graphdatabases.TitanGraphDatabase;
 28 | import eu.socialsensor.main.BenchmarkConfiguration;
 29 | import eu.socialsensor.main.BenchmarkingException;
 30 | import eu.socialsensor.main.GraphDatabaseType;
 31 | 
 32 | /**
 33 |  * This class contains all the required utility functions for the benchmark
 34 |  * 
 35 |  * @author sotbeis, sotbeis@iti.gr
 36 |  * @author Alexander Patrikalakis
 37 |  * 
 38 |  */
 39 | public class Utils
 40 | {
 41 |     public static final Logger logger = LogManager.getLogger();
 42 | 
 43 |     public static List<List<Double>> getDocumentsAs2dList(String docPath, int scenarios)
 44 |     {
 45 |         List<List<Double>> data = new ArrayList<List<Double>>(scenarios);
 46 |         for (int i = 0; i < scenarios; i++)
 47 |         {
 48 |             File intermediateFile = new File(docPath + "." + (i + 1));
 49 |             if (!intermediateFile.exists())
 50 |             {
 51 |                 throw new IllegalStateException("file " + intermediateFile.getAbsolutePath() + " does not exist");
 52 |             }
 53 |             data.add(getListFromTextDoc(intermediateFile));
 54 |         }
 55 |         return data;
 56 |     }
 57 | 
 58 |     public static final List<String> readlines(File file)
 59 |     {
 60 |         if (file == null || !file.exists())
 61 |         {
 62 |             throw new IllegalArgumentException("file object must not be null and must exist: " + file.getAbsolutePath());
 63 |         }
 64 |         if (!file.isFile() || !(file.isFile() && file.canRead()))
 65 |         {
 66 |             throw new IllegalArgumentException("file object must be a readable file: " + file.getAbsolutePath());
 67 |         }
 68 |         LineIterator it;
 69 |         try
 70 |         {
 71 |             it = FileUtils.lineIterator(file, "UTF-8");
 72 |         }
 73 |         catch (IOException e)
 74 |         {
 75 |             throw new BenchmarkingException("Unable to read lines from file: " + file.getAbsolutePath(), e);
 76 |         }
 77 |         List<String> result = new LinkedList<String>();
 78 |         try
 79 |         {
 80 |             while (it.hasNext())
 81 |             {
 82 |                 result.add(it.nextLine());
 83 |             }
 84 |         }
 85 |         finally
 86 |         {
 87 |             LineIterator.closeQuietly(it);
 88 |         }
 89 | 
 90 |         return result;
 91 |     }
 92 | 
 93 |     public static final List<List<String>> parseTabulatedLines(List<String> lines, int numberOfLinesToSkip)
 94 |     {
 95 |         if (numberOfLinesToSkip < 0 || numberOfLinesToSkip > lines.size())
 96 |         {
 97 |             throw new IllegalArgumentException("can skip at least zero and at most lines.size lines");
 98 |         }
 99 |         List<List<String>> result = new LinkedList<List<String>>();
100 | 
101 |         lines.subList(numberOfLinesToSkip, lines.size()).parallelStream()
102 |             .forEachOrdered(line -> result.add(Arrays.asList(line.split("\t"))));
103 |         return result;
104 |     }
105 | 
106 |     public static final List<List<String>> readTabulatedLines(File file, int numberOfLinesToSkip)
107 |     {
108 |         return parseTabulatedLines(readlines(file), numberOfLinesToSkip);
109 |     }
110 | 
111 |     public static void deleteRecursively(File file)
112 |     {
113 |         if (!file.exists())
114 |         {
115 |             return;
116 |         }
117 |         if (file.isDirectory())
118 |         {
119 |             for (File child : file.listFiles())
120 |             {
121 |                 deleteRecursively(child);
122 |             }
123 |         }
124 |         if (!file.delete())
125 |         {
126 |             throw new RuntimeException("Couldn't empty database.");
127 |         }
128 |     }
129 | 
130 |     public static void deleteMultipleFiles(String filePath, int numberOfFiles)
131 |     {
132 |         for (int i = 0; i < numberOfFiles; i++)
133 |         {
134 |             deleteRecursively(new File(filePath + "." + (i + 1)));
135 |         }
136 |     }
137 | 
138 |     public static void writeTimes(List<Double> insertionTimes, File outputPath)
139 |     {
140 |         try (BufferedWriter out = new BufferedWriter(new FileWriter(outputPath)))
141 |         {
142 |             for (Double insertionTime : insertionTimes)
143 |             {
144 |                 out.write(insertionTime.toString());
145 |                 out.write("\n");
146 |             }
147 |         }
148 |         catch (IOException e)
149 |         {
150 |             throw new BenchmarkingException("unable to write times to: " + outputPath.getAbsolutePath(), e);
151 |         }
152 |     }
153 | 
154 |     public static List<Double> getListFromTextDoc(File file)
155 |     {
156 |         List<String> lines = readlines(file);
157 |         List<Double> values = new ArrayList<Double>(lines.size());
158 |         for (String line : lines)
159 |         {
160 |             values.add(Double.valueOf(line.trim()));
161 |         }
162 |         return values;
163 |     }
164 | 
165 |     public static <T, E> T getKeyByValue(Map<T, E> map, E value)
166 |     {
167 |         for (Entry<T, E> entry : map.entrySet())
168 |         {
169 |             if (value.equals(entry.getValue()))
170 |             {
171 |                 return entry.getKey();
172 |             }
173 |         }
174 |         return null;
175 |     }
176 | 
177 |     public static final File generateStorageDirectory(GraphDatabaseType type, File storageBaseDir)
178 |     {
179 |         return new File(storageBaseDir, type.getShortname());
180 |     }
181 | 
182 |     public static final GraphDatabase<?,?,?,?> createDatabaseInstance(BenchmarkConfiguration config, GraphDatabaseType type)
183 |     {
184 |         final GraphDatabase<?,?,?,?> graphDatabase;
185 |         final File dbStorageDirectory = generateStorageDirectory(type, config.getDbStorageDirectory());
186 |         if (GraphDatabaseType.TITAN_FLAVORS.contains(type))
187 |         {
188 |             graphDatabase = new TitanGraphDatabase(type, config, dbStorageDirectory);
189 |         }
190 |         else if (GraphDatabaseType.NEO4J == type)
191 |         {
192 |             graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory);
193 |         }
194 |         else if (GraphDatabaseType.ORIENT_DB == type)
195 |         {
196 |             graphDatabase = new OrientGraphDatabase(config, dbStorageDirectory);
197 |         }
198 |         else if (GraphDatabaseType.SPARKSEE == type)
199 |         {
200 |             graphDatabase = new SparkseeGraphDatabase(config, dbStorageDirectory);
201 |         }
202 |         else
203 |         {
204 |             // For safety, will handle the null case
205 |             throw new IllegalArgumentException("Unknown type: " + type == null ? "null" : type.toString());
206 |         }
207 | 
208 |         return graphDatabase;
209 |     }
210 | 
211 |     public static void createMassiveLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
212 |     {
213 |         final GraphDatabase<?,?,?,?> graphDatabase = createDatabaseInstance(config, type);
214 |         graphDatabase.createGraphForMassiveLoad();
215 |         graphDatabase.massiveModeLoading(config.getDataset());
216 |         graphDatabase.shutdownMassiveGraph();
217 |     }
218 | 
219 |     /**
220 |      * Returns a graph database loaded with data in individual transactions.
221 |      * Does not shut down the database after the data load
222 |      * 
223 |      * @param type
224 |      *            database type
225 |      * @param config
226 |      *            for individual databases.
227 |      * @return
228 |      */
229 |     public static GraphDatabase<?,?,?,?> createSingleLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
230 |     {
231 |         final GraphDatabase<?,?,?,?> graphDatabase = createDatabaseInstance(config, type);
232 |         graphDatabase.createGraphForSingleLoad();
233 |         graphDatabase.singleModeLoading(config.getDataset(), null /* resultsPath */, 0);
234 |         return graphDatabase;
235 |     }
236 | 
237 |     public static void deleteDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
238 |     {
239 |         logger.info(String.format("Deleting graph database %s . . . .", type.getShortname()));
240 | 
241 |         final GraphDatabase<?,?,?,?> graphDatabase = createDatabaseInstance(config, type);
242 |         graphDatabase.delete();
243 |     }
244 | 
245 |     public static double[] convert(List<Double> list)
246 |     {
247 |         if (list == null || list.isEmpty())
248 |         {
249 |             return new double[0];
250 |         }
251 |         double[] result = new double[list.size()];
252 |         for (int i = 0; i < list.size(); i++)
253 |         {
254 |             result[i] = list.get(i).doubleValue();
255 |         }
256 | 
257 |         return result;
258 |     }
259 | 
260 |     /**
261 |      * 
262 |      * @param output
263 |      * @param times
264 |      *            in milliseconds
265 |      * @param benchmarkTitle
266 |      */
267 |     public static void writeResults(File output, Map<GraphDatabaseType, List<Double>> times, String benchmarkTitle)
268 |     {
269 |         logger.info("Write results to " + output);
270 |         if (output.isDirectory())
271 |         {
272 |             throw new IllegalArgumentException("output was a directory: " + output.getAbsolutePath());
273 |         }
274 | 
275 |         SortedMap<GraphDatabaseType, Double> means = new TreeMap<GraphDatabaseType, Double>();
276 |         SortedMap<GraphDatabaseType, Double> standardDeviations = new TreeMap<GraphDatabaseType, Double>();
277 |         for (GraphDatabaseType type : times.keySet())
278 |         {
279 |             final double[] scaledTimesArray = MathArrays.scale(0.001, convert(times.get(type)));
280 |             DescriptiveStatistics stats = new DescriptiveStatistics();
281 |             for (double val : scaledTimesArray)
282 |             {
283 |                 stats.addValue(val);
284 |             }
285 |             means.put(type, stats.getMean());
286 |             standardDeviations.put(type, stats.getStandardDeviation());
287 |         }
288 | 
289 |         // use two passes so the compute is not interleaved with disk IO
290 |         try (BufferedWriter out = new BufferedWriter(new FileWriter(output)))
291 |         {
292 |             // TODO(amcp) add other sample means p50 p90 p99 p99.9 etc
293 |             out.write(String.format("DB,%s p100 Mean (s),Sample Size,Standard Deviation\n", benchmarkTitle));
294 |             for (GraphDatabaseType type : times.keySet())
295 |             {
296 |                 out.write(String.format("%s,%f,%d,%f\n", type.getShortname(), means.get(type), times.get(type).size(),
297 |                     standardDeviations.get(type)));
298 |             }
299 |         }
300 |         catch (IOException e)
301 |         {
302 |             throw new BenchmarkingException(String.format("Exception thrown when writing output to %s: %s", output,
303 |                 e.getMessage()));
304 |         }
305 |     }
306 | 
307 |     public static List<Double> calculateMeanList(List<List<Double>> lists)
308 |     {
309 |         if (lists == null || lists.isEmpty())
310 |         {
311 |             return new ArrayList<Double>(0);
312 |         }
313 |         List<Double> result = new ArrayList<Double>();
314 |         for (List<Double> list : lists)
315 |         {
316 |             result.add(new DescriptiveStatistics(convert(list)).getMean());
317 |         }
318 |         return result;
319 |     }
320 | }
321 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.graphdatabases;
  2 | 
  3 | import com.google.common.collect.Iterables;
  4 | import com.orientechnologies.common.collection.OMultiCollectionIterator;
  5 | import com.orientechnologies.common.util.OCallable;
  6 | import com.orientechnologies.orient.core.command.OBasicCommandContext;
  7 | import com.orientechnologies.orient.core.config.OGlobalConfiguration;
  8 | import com.orientechnologies.orient.core.id.ORID;
  9 | import com.orientechnologies.orient.core.metadata.schema.OType;
 10 | import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath;
 11 | import com.tinkerpop.blueprints.Direction;
 12 | import com.tinkerpop.blueprints.Edge;
 13 | import com.tinkerpop.blueprints.Parameter;
 14 | import com.tinkerpop.blueprints.Vertex;
 15 | import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph;
 16 | import com.tinkerpop.blueprints.impls.orient.OrientEdgeType;
 17 | import com.tinkerpop.blueprints.impls.orient.OrientGraph;
 18 | import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory;
 19 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx;
 20 | import com.tinkerpop.blueprints.impls.orient.OrientVertex;
 21 | import com.tinkerpop.blueprints.impls.orient.OrientVertexType;
 22 | 
 23 | import eu.socialsensor.insert.Insertion;
 24 | import eu.socialsensor.insert.OrientMassiveInsertion;
 25 | import eu.socialsensor.insert.OrientSingleInsertion;
 26 | import eu.socialsensor.main.BenchmarkConfiguration;
 27 | import eu.socialsensor.main.GraphDatabaseType;
 28 | import eu.socialsensor.utils.Utils;
 29 | 
 30 | import java.io.File;
 31 | import java.util.ArrayList;
 32 | import java.util.HashMap;
 33 | import java.util.HashSet;
 34 | import java.util.Iterator;
 35 | import java.util.List;
 36 | import java.util.Map;
 37 | import java.util.Set;
 38 | 
 39 | /**
 40 |  * OrientDB graph database implementation
 41 |  * 
 42 |  * @author sotbeis, sotbeis@iti.gr
 43 |  * @author Alexander Patrikalakis
 44 |  */
 45 | public class OrientGraphDatabase extends GraphDatabaseBase<Iterator<Vertex>, Iterator<Edge>, Vertex, Edge>
 46 | {
 47 | 
 48 |     private OrientGraph graph = null;
 49 |     private boolean useLightWeightEdges;
 50 | 
 51 |     //
 52 |     public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn)
 53 |     {
 54 |         super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn);
 55 |         OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing");
 56 |         this.useLightWeightEdges = config.orientLightweightEdges() == null ? true : config.orientLightweightEdges()
 57 |             .booleanValue();
 58 |     }
 59 | 
 60 |     @Override
 61 |     public void open()
 62 |     {
 63 |         graph = getGraph(dbStorageDirectory);
 64 |     }
 65 | 
 66 |     @SuppressWarnings("deprecation")
 67 |     @Override
 68 |     public void createGraphForSingleLoad()
 69 |     {
 70 |         OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
 71 |         graph = getGraph(dbStorageDirectory);
 72 |         createSchema();
 73 |     }
 74 | 
 75 |     @SuppressWarnings("deprecation")
 76 |     @Override
 77 |     public void createGraphForMassiveLoad()
 78 |     {
 79 |         OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
 80 |         graph = getGraph(dbStorageDirectory);
 81 |         createSchema();
 82 |     }
 83 | 
 84 |     @Override
 85 |     public void massiveModeLoading(File dataPath)
 86 |     {
 87 |         OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(this.graph.getRawGraph().getURL());
 88 |         orientMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */);
 89 |     }
 90 | 
 91 |     @Override
 92 |     public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber)
 93 |     {
 94 |         Insertion orientSingleInsertion = new OrientSingleInsertion(this.graph, resultsPath);
 95 |         orientSingleInsertion.createGraph(dataPath, scenarioNumber);
 96 |     }
 97 | 
 98 |     @Override
 99 |     public void shutdown()
100 |     {
101 |         if (graph == null)
102 |         {
103 |             return;
104 |         }
105 |         graph.shutdown();
106 |         graph = null;
107 |     }
108 | 
109 |     @Override
110 |     public void delete()
111 |     {
112 |         OrientGraphNoTx g = new OrientGraphNoTx("plocal:" + dbStorageDirectory.getAbsolutePath());
113 |         g.drop();
114 | 
115 |         Utils.deleteRecursively(dbStorageDirectory);
116 |     }
117 | 
118 |     @Override
119 |     public void shutdownMassiveGraph()
120 |     {
121 |         shutdown();
122 |     }
123 | 
124 |     @Override
125 |     public void shortestPath(final Vertex v1, Integer i)
126 |     {
127 |         final OrientVertex v2 = (OrientVertex) getVertex(i);
128 | 
129 |         List<ORID> result = new OSQLFunctionShortestPath().execute(graph,
130 |             null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 },
131 |             new OBasicCommandContext());
132 | 
133 |         result.size();
134 |     }
135 | 
136 |     @Override
137 |     public int getNodeCount()
138 |     {
139 |         return (int) graph.countVertices();
140 |     }
141 | 
142 |     @Override
143 |     public Set<Integer> getNeighborsIds(int nodeId)
144 |     {
145 |         Set<Integer> neighbours = new HashSet<Integer>();
146 |         Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
147 |         for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR))
148 |         {
149 |             Integer neighborId = v.getProperty(NODE_ID);
150 |             neighbours.add(neighborId);
151 |         }
152 |         return neighbours;
153 |     }
154 | 
155 |     @Override
156 |     public double getNodeWeight(int nodeId)
157 |     {
158 |         Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
159 |         double weight = getNodeOutDegree(vertex);
160 |         return weight;
161 |     }
162 | 
163 |     public double getNodeInDegree(Vertex vertex)
164 |     {
165 |         @SuppressWarnings("rawtypes")
166 |         OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.IN, SIMILAR);
167 |         return (double) result.size();
168 |     }
169 | 
170 |     public double getNodeOutDegree(Vertex vertex)
171 |     {
172 |         @SuppressWarnings("rawtypes")
173 |         OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.OUT, SIMILAR);
174 |         return (double) result.size();
175 |     }
176 | 
177 |     @Override
178 |     public void initCommunityProperty()
179 |     {
180 |         int communityCounter = 0;
181 |         for (Vertex v : graph.getVertices())
182 |         {
183 |             ((OrientVertex) v).setProperties(NODE_COMMUNITY, communityCounter, COMMUNITY, communityCounter);
184 |             ((OrientVertex) v).save();
185 |             communityCounter++;
186 |         }
187 |     }
188 | 
189 |     @Override
190 |     public Set<Integer> getCommunitiesConnectedToNodeCommunities(int nodeCommunities)
191 |     {
192 |         Set<Integer> communities = new HashSet<Integer>();
193 |         Iterable<Vertex> vertices = graph.getVertices(NODE_COMMUNITY, nodeCommunities);
194 |         for (Vertex vertex : vertices)
195 |         {
196 |             for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
197 |             {
198 |                 int community = v.getProperty(COMMUNITY);
199 |                 if (!communities.contains(community))
200 |                 {
201 |                     communities.add(community);
202 |                 }
203 |             }
204 |         }
205 |         return communities;
206 |     }
207 | 
208 |     @Override
209 |     public Set<Integer> getNodesFromCommunity(int community)
210 |     {
211 |         Set<Integer> nodes = new HashSet<Integer>();
212 |         Iterable<Vertex> iter = graph.getVertices(COMMUNITY, community);
213 |         for (Vertex v : iter)
214 |         {
215 |             Integer nodeId = v.getProperty(NODE_ID);
216 |             nodes.add(nodeId);
217 |         }
218 |         return nodes;
219 |     }
220 | 
221 |     @Override
222 |     public Set<Integer> getNodesFromNodeCommunity(int nodeCommunity)
223 |     {
224 |         Set<Integer> nodes = new HashSet<Integer>();
225 |         Iterable<Vertex> iter = graph.getVertices("nodeCommunity", nodeCommunity);
226 |         for (Vertex v : iter)
227 |         {
228 |             Integer nodeId = v.getProperty(NODE_ID);
229 |             nodes.add(nodeId);
230 |         }
231 |         return nodes;
232 |     }
233 | 
234 |     @Override
235 |     public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices)
236 |     {
237 |         double edges = 0;
238 |         Iterable<Vertex> vertices = graph.getVertices(NODE_COMMUNITY, vertexCommunity);
239 |         Iterable<Vertex> comVertices = graph.getVertices(COMMUNITY, communityVertices);
240 |         for (Vertex vertex : vertices)
241 |         {
242 |             for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
243 |             {
244 |                 if (Iterables.contains(comVertices, v))
245 |                 {
246 |                     edges++;
247 |                 }
248 |             }
249 |         }
250 |         return edges;
251 |     }
252 | 
253 |     @Override
254 |     public double getCommunityWeight(int community)
255 |     {
256 |         double communityWeight = 0;
257 |         Iterable<Vertex> iter = graph.getVertices(COMMUNITY, community);
258 |         if (Iterables.size(iter) > 1)
259 |         {
260 |             for (Vertex vertex : iter)
261 |             {
262 |                 communityWeight += getNodeOutDegree(vertex);
263 |             }
264 |         }
265 |         return communityWeight;
266 |     }
267 | 
268 |     @Override
269 |     public double getNodeCommunityWeight(int nodeCommunity)
270 |     {
271 |         double nodeCommunityWeight = 0;
272 |         Iterable<Vertex> iter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
273 |         for (Vertex vertex : iter)
274 |         {
275 |             nodeCommunityWeight += getNodeOutDegree(vertex);
276 |         }
277 |         return nodeCommunityWeight;
278 |     }
279 | 
280 |     @Override
281 |     public void moveNode(int nodeCommunity, int toCommunity)
282 |     {
283 |         Iterable<Vertex> fromIter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
284 |         for (Vertex vertex : fromIter)
285 |         {
286 |             vertex.setProperty(COMMUNITY, toCommunity);
287 |         }
288 |     }
289 | 
290 |     @Override
291 |     public double getGraphWeightSum()
292 |     {
293 |         long edges = 0;
294 |         for (Vertex o : graph.getVertices())
295 |         {
296 |             edges += ((OrientVertex) o).countEdges(Direction.OUT, SIMILAR);
297 |         }
298 |         return (double) edges;
299 |     }
300 | 
301 |     @Override
302 |     public int reInitializeCommunities()
303 |     {
304 |         Map<Integer, Integer> initCommunities = new HashMap<Integer, Integer>();
305 |         int communityCounter = 0;
306 |         for (Vertex v : graph.getVertices())
307 |         {
308 |             int communityId = v.getProperty(COMMUNITY);
309 |             if (!initCommunities.containsKey(communityId))
310 |             {
311 |                 initCommunities.put(communityId, communityCounter);
312 |                 communityCounter++;
313 |             }
314 |             int newCommunityId = initCommunities.get(communityId);
315 |             ((OrientVertex) v).setProperties(COMMUNITY, newCommunityId, NODE_COMMUNITY, newCommunityId);
316 |             ((OrientVertex) v).save();
317 |         }
318 |         return communityCounter;
319 |     }
320 | 
321 |     @Override
322 |     public int getCommunity(int nodeCommunity)
323 |     {
324 |         final Iterator<Vertex> result = graph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator();
325 |         if (!result.hasNext())
326 |             throw new IllegalArgumentException("node community not found: " + nodeCommunity);
327 | 
328 |         Vertex vertex = result.next();
329 |         int community = vertex.getProperty(COMMUNITY);
330 |         return community;
331 |     }
332 | 
333 |     @Override
334 |     public int getCommunityFromNode(int nodeId)
335 |     {
336 |         Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
337 |         return vertex.getProperty(COMMUNITY);
338 |     }
339 | 
340 |     @Override
341 |     public int getCommunitySize(int community)
342 |     {
343 |         Iterable<Vertex> vertices = graph.getVertices(COMMUNITY, community);
344 |         Set<Integer> nodeCommunities = new HashSet<Integer>();
345 |         for (Vertex v : vertices)
346 |         {
347 |             int nodeCommunity = v.getProperty(NODE_COMMUNITY);
348 |             if (!nodeCommunities.contains(nodeCommunity))
349 |             {
350 |                 nodeCommunities.add(nodeCommunity);
351 |             }
352 |         }
353 |         return nodeCommunities.size();
354 |     }
355 | 
356 |     @Override
357 |     public Map<Integer, List<Integer>> mapCommunities(int numberOfCommunities)
358 |     {
359 |         Map<Integer, List<Integer>> communities = new HashMap<Integer, List<Integer>>();
360 |         for (int i = 0; i < numberOfCommunities; i++)
361 |         {
362 |             Iterator<Vertex> verticesIter = graph.getVertices(COMMUNITY, i).iterator();
363 |             List<Integer> vertices = new ArrayList<Integer>();
364 |             while (verticesIter.hasNext())
365 |             {
366 |                 Integer nodeId = verticesIter.next().getProperty(NODE_ID);
367 |                 vertices.add(nodeId);
368 |             }
369 |             communities.put(i, vertices);
370 |         }
371 |         return communities;
372 |     }
373 | 
374 |     protected void createSchema()
375 |     {
376 |         graph.executeOutsideTx(new OCallable<Object, OrientBaseGraph>() {
377 |             @SuppressWarnings({ "unchecked", "rawtypes" })
378 |             @Override
379 |             public Object call(final OrientBaseGraph g)
380 |             {
381 |                 OrientVertexType v = g.getVertexBaseType();
382 |                 if(!v.existsProperty(NODE_ID)) { // TODO fix schema detection hack later
383 |                     v.createProperty(NODE_ID, OType.INTEGER);
384 |                     g.createKeyIndex(NODE_ID, Vertex.class, new Parameter("type", "UNIQUE_HASH_INDEX"), new Parameter(
385 |                         "keytype", "INTEGER"));
386 | 
387 |                     v.createEdgeProperty(Direction.OUT, SIMILAR, OType.LINKBAG);
388 |                     v.createEdgeProperty(Direction.IN, SIMILAR, OType.LINKBAG);
389 |                     OrientEdgeType similar = g.createEdgeType(SIMILAR);
390 |                     similar.createProperty("out", OType.LINK, v);
391 |                     similar.createProperty("in", OType.LINK, v);
392 |                     g.createKeyIndex(COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"),
393 |                         new Parameter("keytype", "INTEGER"));
394 |                     g.createKeyIndex(NODE_COMMUNITY, Vertex.class, new Parameter("type", "NOTUNIQUE_HASH_INDEX"),
395 |                         new Parameter("keytype", "INTEGER"));
396 |                 }
397 | 
398 |                 return null;
399 |             }
400 |         });
401 |     }
402 | 
403 |     private OrientGraph getGraph(final File dbPath)
404 |     {
405 |         OrientGraph g;
406 |         OrientGraphFactory graphFactory = new OrientGraphFactory("plocal:" + dbPath.getAbsolutePath());
407 |         g = graphFactory.getTx();
408 |         g.setUseLightweightEdges(this.useLightWeightEdges);
409 |         return g;
410 |     }
411 | 
412 |     @Override
413 |     public boolean nodeExists(int nodeId)
414 |     {
415 |         Iterable<Vertex> iter = graph.getVertices(NODE_ID, nodeId);
416 |         return iter.iterator().hasNext();
417 |     }
418 | 
419 |     @Override
420 |     public Iterator<Vertex> getVertexIterator()
421 |     {
422 |         return graph.getVertices().iterator();
423 |     }
424 | 
425 |     @Override
426 |     public Iterator<Edge> getNeighborsOfVertex(Vertex v)
427 |     {
428 |         return v.getEdges(Direction.BOTH, SIMILAR).iterator();
429 |     }
430 | 
431 |     @Override
432 |     public void cleanupVertexIterator(Iterator<Vertex> it)
433 |     {
434 |         // NOOP for timing
435 |     }
436 | 
437 |     @Override
438 |     public Vertex getOtherVertexFromEdge(Edge edge, Vertex oneVertex)
439 |     {
440 |         return edge.getVertex(Direction.IN).equals(oneVertex) ? edge.getVertex(Direction.OUT) : edge.getVertex(Direction.IN);
441 |     }
442 | 
443 |     @Override
444 |     public Iterator<Edge> getAllEdges()
445 |     {
446 |         return graph.getEdges().iterator();
447 |     }
448 | 
449 |     @Override
450 |     public Vertex getSrcVertexFromEdge(Edge edge)
451 |     {
452 |         return edge.getVertex(Direction.IN);
453 |     }
454 | 
455 |     @Override
456 |     public Vertex getDestVertexFromEdge(Edge edge)
457 |     {
458 |         return edge.getVertex(Direction.OUT);
459 |     }
460 | 
461 |     @Override
462 |     public boolean edgeIteratorHasNext(Iterator<Edge> it)
463 |     {
464 |         return it.hasNext();
465 |     }
466 | 
467 |     @Override
468 |     public Edge nextEdge(Iterator<Edge> it)
469 |     {
470 |         return it.next();
471 |     }
472 | 
473 |     @Override
474 |     public void cleanupEdgeIterator(Iterator<Edge> it)
475 |     {
476 |         // NOOP
477 |     }
478 | 
479 |     @Override
480 |     public boolean vertexIteratorHasNext(Iterator<Vertex> it)
481 |     {
482 |         return it.hasNext();
483 |     }
484 | 
485 |     @Override
486 |     public Vertex nextVertex(Iterator<Vertex> it)
487 |     {
488 |         return it.next();
489 |     }
490 | 
491 |     @Override
492 |     public Vertex getVertex(Integer i)
493 |     {
494 |         return graph.getVertices(NODE_ID, i).iterator().next();
495 |     }
496 | }
497 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/BenchmarkConfiguration.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.main;
  2 | 
  3 | import java.io.File;
  4 | import java.util.ArrayList;
  5 | import java.util.HashSet;
  6 | import java.util.List;
  7 | import java.util.Set;
  8 | import java.util.SortedSet;
  9 | import java.util.TreeSet;
 10 | 
 11 | import org.apache.commons.configuration.Configuration;
 12 | import org.apache.commons.math3.util.CombinatoricsUtils;
 13 | 
 14 | import com.amazon.titan.diskstorage.dynamodb.BackendDataModel;
 15 | import com.amazon.titan.diskstorage.dynamodb.Constants;
 16 | import com.google.common.primitives.Ints;
 17 | import com.thinkaurelius.titan.graphdb.configuration.GraphDatabaseConfiguration;
 18 | 
 19 | import eu.socialsensor.dataset.DatasetFactory;
 20 | 
 21 | /**
 22 |  * 
 23 |  * @author Alexander Patrikalakis
 24 |  *
 25 |  */
 26 | public class BenchmarkConfiguration
 27 | {
 28 |     // orientdb Configuration
 29 |     private static final String LIGHTWEIGHT_EDGES = "lightweight-edges";
 30 | 
 31 |     // Sparksee / DEX configuration
 32 |     private static final String LICENSE_KEY = "license-key";
 33 | 
 34 |     // Titan specific configuration
 35 |     private static final String TITAN = "titan";
 36 |     private static final String BUFFER_SIZE = GraphDatabaseConfiguration.BUFFER_SIZE.getName();
 37 |     private static final String IDS_BLOCKSIZE = GraphDatabaseConfiguration.IDS_BLOCK_SIZE.getName();
 38 |     private static final String PAGE_SIZE = GraphDatabaseConfiguration.PAGE_SIZE.getName();
 39 |     public static final String CSV_INTERVAL = GraphDatabaseConfiguration.METRICS_CSV_INTERVAL.getName();
 40 |     public static final String CSV = GraphDatabaseConfiguration.METRICS_CSV_NS.getName();
 41 |     private static final String CSV_DIR = GraphDatabaseConfiguration.METRICS_CSV_DIR.getName();
 42 |     public static final String GRAPHITE = GraphDatabaseConfiguration.METRICS_GRAPHITE_NS.getName();
 43 |     private static final String GRAPHITE_HOSTNAME = GraphDatabaseConfiguration.GRAPHITE_HOST.getName();
 44 | 
 45 |     // DynamoDB Storage Backend for Titan specific configuration
 46 |     private static final String CONSTRUCTOR_ARGS = Constants.DYNAMODB_CREDENTIALS_CONSTRUCTOR_ARGS.getName();
 47 |     private static final String CLASS_NAME = Constants.DYNAMODB_CREDENTIALS_CLASS_NAME.getName();
 48 |     private static final String CONSISTENT_READ = Constants.DYNAMODB_FORCE_CONSISTENT_READ.getName();
 49 |     private static final String TPS = "tps";
 50 |     private static final String CREDENTIALS = Constants.DYNAMODB_CLIENT_CREDENTIALS_NAMESPACE.getName();
 51 |     private static final String ENDPOINT = Constants.DYNAMODB_CLIENT_ENDPOINT.getName();
 52 |     private static final String TABLE_PREFIX = Constants.DYNAMODB_TABLE_PREFIX.getName();
 53 | 
 54 |     // benchmark configuration
 55 |     private static final String DATASET = "dataset";
 56 |     private static final String DATABASE_STORAGE_DIRECTORY = "database-storage-directory";
 57 |     private static final String ACTUAL_COMMUNITIES = "actual-communities";
 58 |     private static final String NODES_COUNT = "nodes-count";
 59 |     private static final String RANDOMIZE_CLUSTERING = "randomize-clustering";
 60 |     private static final String CACHE_VALUES = "cache-values";
 61 |     private static final String CACHE_INCREMENT_FACTOR = "cache-increment-factor";
 62 |     private static final String CACHE_VALUES_COUNT = "cache-values-count";
 63 |     private static final String PERMUTE_BENCHMARKS = "permute-benchmarks";
 64 |     private static final String RANDOM_NODES = "shortest-path-random-nodes";
 65 |     
 66 |     private static final Set<String> metricsReporters = new HashSet<String>();
 67 |     static {
 68 |         metricsReporters.add(CSV);
 69 |         metricsReporters.add(GRAPHITE);
 70 |     }
 71 | 
 72 |     private final File dataset;
 73 |     private final List<BenchmarkType> benchmarkTypes;
 74 |     private final SortedSet<GraphDatabaseType> selectedDatabases;
 75 |     private final File resultsPath;
 76 | 
 77 |     // storage directory
 78 |     private final File dbStorageDirectory;
 79 | 
 80 |     // metrics (optional)
 81 |     private final long csvReportingInterval;
 82 |     private final File csvDir;
 83 |     private final String graphiteHostname;
 84 |     private final long graphiteReportingInterval;
 85 | 
 86 |     // storage backend specific settings
 87 |     private final long dynamodbTps;
 88 |     private final BackendDataModel dynamodbDataModel;
 89 |     private final boolean dynamodbConsistentRead;
 90 |     private final Boolean orientLightweightEdges;
 91 |     private final String sparkseeLicenseKey;
 92 | 
 93 |     // shortest path
 94 |     private final int randomNodes;
 95 | 
 96 |     // clustering
 97 |     private final Boolean randomizedClustering;
 98 |     private final Integer nodesCount;
 99 |     private final Integer cacheValuesCount;
100 |     private final Double cacheIncrementFactor;
101 |     private final List<Integer> cacheValues;
102 |     private final File actualCommunities;
103 |     private final boolean permuteBenchmarks;
104 |     private final int scenarios;
105 |     private final String dynamodbCredentialsFqClassName;
106 |     private final String dynamodbCredentialsCtorArguments;
107 |     private final String dynamodbEndpoint;
108 |     private final int bufferSize;
109 |     private final int blocksize;
110 |     private final int pageSize;
111 |     private final int dynamodbWorkerThreads;
112 |     private final boolean dynamodbPrecreateTables;
113 |     private final String dynamodbTablePrefix;
114 | 
115 |     public String getDynamodbCredentialsFqClassName()
116 |     {
117 |         return dynamodbCredentialsFqClassName;
118 |     }
119 | 
120 |     public String getDynamodbCredentialsCtorArguments()
121 |     {
122 |         return dynamodbCredentialsCtorArguments;
123 |     }
124 | 
125 |     public String getDynamodbEndpoint()
126 |     {
127 |         return dynamodbEndpoint;
128 |     }
129 | 
130 |     public BenchmarkConfiguration(Configuration appconfig)
131 |     {
132 |         if (appconfig == null)
133 |         {
134 |             throw new IllegalArgumentException("appconfig may not be null");
135 |         }
136 | 
137 |         Configuration eu = appconfig.subset("eu");
138 |         Configuration socialsensor = eu.subset("socialsensor");
139 |         
140 |         //metrics
141 |         final Configuration metrics = socialsensor.subset(GraphDatabaseConfiguration.METRICS_NS.getName());
142 | 
143 |         final Configuration graphite = metrics.subset(GRAPHITE);
144 |         this.graphiteHostname = graphite.getString(GRAPHITE_HOSTNAME, null);
145 |         this.graphiteReportingInterval = graphite.getLong(GraphDatabaseConfiguration.GRAPHITE_INTERVAL.getName(), 1000 /*default 1sec*/);
146 | 
147 |         final Configuration csv = metrics.subset(CSV);
148 |         this.csvReportingInterval = metrics.getLong(CSV_INTERVAL, 1000 /*ms*/);
149 |         this.csvDir = csv.containsKey(CSV_DIR) ? new File(csv.getString(CSV_DIR, System.getProperty("user.dir") /*default*/)) : null;
150 | 
151 |         Configuration dynamodb = socialsensor.subset("dynamodb");
152 |         this.dynamodbWorkerThreads = dynamodb.getInt("workers", 25);
153 |         Configuration credentials = dynamodb.subset(CREDENTIALS);
154 |         this.dynamodbPrecreateTables = dynamodb.getBoolean("precreate-tables", Boolean.FALSE);
155 |         this.dynamodbTps = Math.max(1, dynamodb.getLong(TPS, 750 /*default*/));
156 |         this.dynamodbConsistentRead = dynamodb.containsKey(CONSISTENT_READ) ? dynamodb.getBoolean(CONSISTENT_READ)
157 |             : false;
158 |         this.dynamodbDataModel = dynamodb.containsKey("data-model") ? BackendDataModel.valueOf(dynamodb
159 |             .getString("data-model")) : null;
160 |         this.dynamodbCredentialsFqClassName = credentials.containsKey(CLASS_NAME) ? credentials.getString(CLASS_NAME)
161 |             : null;
162 |         this.dynamodbCredentialsCtorArguments = credentials.containsKey(CONSTRUCTOR_ARGS) ? credentials
163 |             .getString(CONSTRUCTOR_ARGS) : null;
164 |         this.dynamodbEndpoint = dynamodb.containsKey(ENDPOINT) ? dynamodb.getString(ENDPOINT) : null;
165 |         this.dynamodbTablePrefix = dynamodb.containsKey(TABLE_PREFIX) ? dynamodb.getString(TABLE_PREFIX) : Constants.DYNAMODB_TABLE_PREFIX.getDefaultValue();
166 | 
167 |         Configuration orient = socialsensor.subset("orient");
168 |         orientLightweightEdges = orient.containsKey(LIGHTWEIGHT_EDGES) ? orient.getBoolean(LIGHTWEIGHT_EDGES) : null;
169 | 
170 |         Configuration sparksee = socialsensor.subset("sparksee");
171 |         sparkseeLicenseKey = sparksee.containsKey(LICENSE_KEY) ? sparksee.getString(LICENSE_KEY) : null;
172 | 
173 |         Configuration titan = socialsensor.subset(TITAN); //TODO(amcp) move dynamodb ns into titan
174 |         bufferSize = titan.getInt(BUFFER_SIZE, GraphDatabaseConfiguration.BUFFER_SIZE.getDefaultValue());
175 |         blocksize = titan.getInt(IDS_BLOCKSIZE, GraphDatabaseConfiguration.IDS_BLOCK_SIZE.getDefaultValue());
176 |         pageSize = titan.getInt(PAGE_SIZE, GraphDatabaseConfiguration.PAGE_SIZE.getDefaultValue());
177 | 
178 |         // database storage directory
179 |         if (!socialsensor.containsKey(DATABASE_STORAGE_DIRECTORY))
180 |         {
181 |             throw new IllegalArgumentException("configuration must specify database-storage-directory");
182 |         }
183 |         dbStorageDirectory = new File(socialsensor.getString(DATABASE_STORAGE_DIRECTORY));
184 |         dataset = validateReadableFile(socialsensor.getString(DATASET), DATASET);
185 | 
186 |         // load the dataset
187 |         DatasetFactory.getInstance().getDataset(dataset);
188 | 
189 |         if (!socialsensor.containsKey(PERMUTE_BENCHMARKS))
190 |         {
191 |             throw new IllegalArgumentException("configuration must set permute-benchmarks to true or false");
192 |         }
193 |         permuteBenchmarks = socialsensor.getBoolean(PERMUTE_BENCHMARKS);
194 | 
195 |         List<?> benchmarkList = socialsensor.getList("benchmarks");
196 |         benchmarkTypes = new ArrayList<BenchmarkType>();
197 |         for (Object str : benchmarkList)
198 |         {
199 |             benchmarkTypes.add(BenchmarkType.valueOf(str.toString()));
200 |         }
201 | 
202 |         selectedDatabases = new TreeSet<GraphDatabaseType>();
203 |         for (Object database : socialsensor.getList("databases"))
204 |         {
205 |             if (!GraphDatabaseType.STRING_REP_MAP.keySet().contains(database.toString()))
206 |             {
207 |                 throw new IllegalArgumentException(String.format("selected database %s not supported",
208 |                     database.toString()));
209 |             }
210 |             selectedDatabases.add(GraphDatabaseType.STRING_REP_MAP.get(database));
211 |         }
212 |         scenarios = permuteBenchmarks ? Ints.checkedCast(CombinatoricsUtils.factorial(selectedDatabases.size())) : 1;
213 | 
214 |         resultsPath = new File(System.getProperty("user.dir"), socialsensor.getString("results-path"));
215 |         if (!resultsPath.exists() && !resultsPath.mkdirs())
216 |         {
217 |             throw new IllegalArgumentException("unable to create results directory");
218 |         }
219 |         if (!resultsPath.canWrite())
220 |         {
221 |             throw new IllegalArgumentException("unable to write to results directory");
222 |         }
223 | 
224 |         randomNodes = socialsensor.getInteger(RANDOM_NODES, new Integer(100));
225 | 
226 |         if (this.benchmarkTypes.contains(BenchmarkType.CLUSTERING))
227 |         {
228 |             if (!socialsensor.containsKey(NODES_COUNT))
229 |             {
230 |                 throw new IllegalArgumentException("the CW benchmark requires nodes-count integer in config");
231 |             }
232 |             nodesCount = socialsensor.getInt(NODES_COUNT);
233 | 
234 |             if (!socialsensor.containsKey(RANDOMIZE_CLUSTERING))
235 |             {
236 |                 throw new IllegalArgumentException("the CW benchmark requires randomize-clustering bool in config");
237 |             }
238 |             randomizedClustering = socialsensor.getBoolean(RANDOMIZE_CLUSTERING);
239 | 
240 |             if (!socialsensor.containsKey(ACTUAL_COMMUNITIES))
241 |             {
242 |                 throw new IllegalArgumentException("the CW benchmark requires a file with actual communities");
243 |             }
244 |             actualCommunities = validateReadableFile(socialsensor.getString(ACTUAL_COMMUNITIES), ACTUAL_COMMUNITIES);
245 | 
246 |             final boolean notGenerating = socialsensor.containsKey(CACHE_VALUES);
247 |             if (notGenerating)
248 |             {
249 |                 List<?> objects = socialsensor.getList(CACHE_VALUES);
250 |                 cacheValues = new ArrayList<Integer>(objects.size());
251 |                 cacheValuesCount = null;
252 |                 cacheIncrementFactor = null;
253 |                 for (Object o : objects)
254 |                 {
255 |                     cacheValues.add(Integer.valueOf(o.toString()));
256 |                 }
257 |             }
258 |             else if (socialsensor.containsKey(CACHE_VALUES_COUNT) && socialsensor.containsKey(CACHE_INCREMENT_FACTOR))
259 |             {
260 |                 cacheValues = null;
261 |                 // generate the cache values with parameters
262 |                 if (!socialsensor.containsKey(CACHE_VALUES_COUNT))
263 |                 {
264 |                     throw new IllegalArgumentException(
265 |                         "the CW benchmark requires cache-values-count int in config when cache-values not specified");
266 |                 }
267 |                 cacheValuesCount = socialsensor.getInt(CACHE_VALUES_COUNT);
268 | 
269 |                 if (!socialsensor.containsKey(CACHE_INCREMENT_FACTOR))
270 |                 {
271 |                     throw new IllegalArgumentException(
272 |                         "the CW benchmark requires cache-increment-factor int in config when cache-values not specified");
273 |                 }
274 |                 cacheIncrementFactor = socialsensor.getDouble(CACHE_INCREMENT_FACTOR);
275 |             }
276 |             else
277 |             {
278 |                 throw new IllegalArgumentException(
279 |                     "when doing CW benchmark, must provide cache-values or parameters to generate them");
280 |             }
281 |         }
282 |         else
283 |         {
284 |             randomizedClustering = null;
285 |             nodesCount = null;
286 |             cacheValuesCount = null;
287 |             cacheIncrementFactor = null;
288 |             cacheValues = null;
289 |             actualCommunities = null;
290 |         }
291 |     }
292 | 
293 |     public File getDataset()
294 |     {
295 |         return dataset;
296 |     }
297 | 
298 |     public SortedSet<GraphDatabaseType> getSelectedDatabases()
299 |     {
300 |         return selectedDatabases;
301 |     }
302 | 
303 |     public File getDbStorageDirectory()
304 |     {
305 |         return dbStorageDirectory;
306 |     }
307 | 
308 |     public File getResultsPath()
309 |     {
310 |         return resultsPath;
311 |     }
312 | 
313 |     public long getDynamodbTps()
314 |     {
315 |         return dynamodbTps;
316 |     }
317 | 
318 |     public boolean dynamodbConsistentRead()
319 |     {
320 |         return dynamodbConsistentRead;
321 |     }
322 | 
323 |     public BackendDataModel getDynamodbDataModel()
324 |     {
325 |         return dynamodbDataModel;
326 |     }
327 | 
328 |     public List<BenchmarkType> getBenchmarkTypes()
329 |     {
330 |         return benchmarkTypes;
331 |     }
332 | 
333 |     public Boolean randomizedClustering()
334 |     {
335 |         return randomizedClustering;
336 |     }
337 | 
338 |     public Integer getNodesCount()
339 |     {
340 |         return nodesCount;
341 |     }
342 | 
343 |     public Integer getCacheValuesCount()
344 |     {
345 |         return cacheValuesCount;
346 |     }
347 | 
348 |     public Double getCacheIncrementFactor()
349 |     {
350 |         return cacheIncrementFactor;
351 |     }
352 | 
353 |     public List<Integer> getCacheValues()
354 |     {
355 |         return cacheValues;
356 |     }
357 | 
358 |     public File getActualCommunitiesFile()
359 |     {
360 |         return actualCommunities;
361 |     }
362 | 
363 |     public Boolean orientLightweightEdges()
364 |     {
365 |         return orientLightweightEdges;
366 |     }
367 | 
368 |     public String getSparkseeLicenseKey()
369 |     {
370 |         return sparkseeLicenseKey;
371 |     }
372 | 
373 |     public boolean permuteBenchmarks()
374 |     {
375 |         return permuteBenchmarks;
376 |     }
377 | 
378 |     public int getScenarios()
379 |     {
380 |         return scenarios;
381 |     }
382 | 
383 |     private static final File validateReadableFile(String fileName, String fileType)
384 |     {
385 |         File file = new File(fileName);
386 |         if (!file.exists())
387 |         {
388 |             throw new IllegalArgumentException(String.format("the %s does not exist", fileType));
389 |         }
390 | 
391 |         if (!(file.isFile() && file.canRead()))
392 |         {
393 |             throw new IllegalArgumentException(String.format("the %s must be a file that this user can read", fileType));
394 |         }
395 |         return file;
396 |     }
397 | 
398 |     public int getRandomNodes()
399 |     {
400 |         return randomNodes;
401 |     }
402 | 
403 |     public long getCsvReportingInterval()
404 |     {
405 |         return csvReportingInterval;
406 |     }
407 | 
408 |     public long getGraphiteReportingInterval()
409 |     {
410 |         return graphiteReportingInterval;
411 |     }
412 | 
413 |     public File getCsvDir()
414 |     {
415 |         return csvDir;
416 |     }
417 | 
418 |     public String getGraphiteHostname()
419 |     {
420 |         return graphiteHostname;
421 |     }
422 | 
423 |     public int getTitanBufferSize()
424 |     {
425 |         return bufferSize;
426 |     }
427 | 
428 |     public int getTitanIdsBlocksize()
429 |     {
430 |         return blocksize;
431 |     }
432 | 
433 |     public int getTitanPageSize()
434 |     {
435 |         return pageSize;
436 |     }
437 | 
438 |     public int getDynamodbWorkerThreads()
439 |     {
440 |         return dynamodbWorkerThreads;
441 |     }
442 | 
443 |     public boolean getDynamodbPrecreateTables()
444 |     {
445 |         return dynamodbPrecreateTables;
446 |     }
447 | 
448 |     public String getDynamodbTablePrefix()
449 |     {
450 |         return dynamodbTablePrefix;
451 |     }
452 | 
453 |     public boolean publishCsvMetrics()
454 |     {
455 |         return csvDir != null;
456 |     }
457 | 
458 |     public boolean publishGraphiteMetrics()
459 |     {
460 |         return graphiteHostname != null && !graphiteHostname.isEmpty();
461 |     }
462 | }
463 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 |     <modelVersion>4.0.0</modelVersion>
  5 |     <groupId>eu.socialsensor</groupId>
  6 |     <artifactId>graphdb-benchmarks</artifactId>
  7 |     <version>1.0</version>
  8 | 
  9 |     <name>graphdb-benchmarks</name>
 10 |     <url>https://github.com/socialsensor/graphdb-benchmarks</url>
 11 |     <description>Performance benchmark between popular graph databases.</description>
 12 | 
 13 |     <parent>
 14 |         <groupId>org.sonatype.oss</groupId>
 15 |         <artifactId>oss-parent</artifactId>
 16 |         <version>7</version>
 17 |     </parent>
 18 | 
 19 |     <organization>
 20 |         <name>SocialSensor</name>
 21 |         <url>http://www.socialsensor.eu/</url>
 22 |     </organization>
 23 | 
 24 |     <developers>
 25 |         <developer>
 26 |             <id>sarovios</id>
 27 |             <name>Sotiris Beis</name>
 28 |             <email>sotbeis@iti.gr</email>
 29 |         </developer>
 30 |         <developer>
 31 |             <id>amcp</id>
 32 |             <name>Alexander Patrikalakis</name>
 33 |             <email>amcp@me.com</email>
 34 |         </developer>
 35 |     </developers>
 36 | 
 37 |     <licenses>
 38 |         <license>
 39 |             <name>The Apache Software License, Version 2.0</name>
 40 |             <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 41 |             <distribution>repo</distribution>
 42 |         </license>
 43 |     </licenses>
 44 | 
 45 |     <scm>
 46 |         <connection>scm:git:git@github.com:socialsensor/graphdb-benchmarks.git</connection>
 47 |         <developerConnection>scm:git:git@github.com:socialsensor/graphdb-benchmarks.git</developerConnection>
 48 |         <url>git@github.com:socialsensor/graphdb-benchmarks.git</url>
 49 |         <tag>graphdb-benchmarks-1.0</tag>
 50 |     </scm>
 51 | 
 52 |     <properties>
 53 |         <blueprints.version>2.6.0</blueprints.version>
 54 |         <orientdb.version>2.2.5</orientdb.version>
 55 |         <titan.version>0.5.4</titan.version>
 56 |         <hbase.version>0.98.8-hadoop2</hbase.version>
 57 |         <neo4j.version>2.0.1</neo4j.version>
 58 |         <dynamodb.titan.version>1.0.0</dynamodb.titan.version>
 59 |         <log4j2.version>2.1</log4j2.version>
 60 |         <maven.surefire.version>2.18.1</maven.surefire.version>
 61 |         <jdk.version>1.8</jdk.version>
 62 |         <metrics.version>3.0.0-BETA3</metrics.version>
 63 |     </properties>
 64 | 
 65 |     <dependencies>
 66 |         <dependency>
 67 |             <groupId>org.antlr</groupId>
 68 |             <artifactId>antlr-runtime</artifactId>
 69 |             <version>3.2</version>
 70 |         </dependency>
 71 |         <dependency>
 72 |             <groupId>com.google.guava</groupId>
 73 |             <artifactId>guava</artifactId>
 74 |             <version>14.0.1</version>
 75 |         </dependency>
 76 |         <dependency>
 77 |             <groupId>colt</groupId>
 78 |             <artifactId>colt</artifactId>
 79 |             <version>1.2.0</version>
 80 |         </dependency>
 81 |         <dependency>
 82 |             <groupId>commons-codec</groupId>
 83 |             <artifactId>commons-codec</artifactId>
 84 |             <version>1.7</version>
 85 |         </dependency>
 86 |         <dependency>
 87 |             <groupId>org.apache.commons</groupId>
 88 |             <artifactId>commons-collections4</artifactId>
 89 |             <version>4.0</version>
 90 |         </dependency>
 91 |         <dependency>
 92 |             <groupId>org.apache.commons</groupId>
 93 |             <artifactId>commons-math3</artifactId>
 94 |             <version>3.4.1</version>
 95 |         </dependency>
 96 |         <dependency>
 97 |             <groupId>commons-configuration</groupId>
 98 |             <artifactId>commons-configuration</artifactId>
 99 |             <version>1.6</version>
100 |         </dependency>
101 |         <dependency>
102 |             <groupId>org.apache.commons</groupId>
103 |             <artifactId>commons-lang3</artifactId>
104 |             <version>3.3.2</version>
105 |         </dependency>
106 |         <dependency>
107 |             <groupId>org.apache.logging.log4j</groupId>
108 |             <artifactId>log4j-api</artifactId>
109 |             <version>${log4j2.version}</version>
110 |         </dependency>
111 |         <dependency>
112 |             <groupId>org.apache.logging.log4j</groupId>
113 |             <artifactId>log4j-core</artifactId>
114 |             <version>${log4j2.version}</version>
115 |         </dependency>
116 |         <dependency>
117 |             <groupId>org.apache.geronimo.specs</groupId>
118 |             <artifactId>geronimo-jta_1.1_spec</artifactId>
119 |             <version>1.1.1</version>
120 |         </dependency>
121 |         <dependency>
122 |             <groupId>com.tinkerpop.gremlin</groupId>
123 |             <artifactId>gremlin-groovy</artifactId>
124 |             <version>${blueprints.version}</version>
125 |         </dependency>
126 |         <dependency>
127 |             <groupId>com.tinkerpop.gremlin</groupId>
128 |             <artifactId>gremlin-java</artifactId>
129 |             <version>${blueprints.version}</version>
130 |         </dependency>
131 |         <dependency>
132 |             <groupId>com.github.stephenc.high-scale-lib</groupId>
133 |             <artifactId>high-scale-lib</artifactId>
134 |             <version>1.1.2</version>
135 |         </dependency>
136 |         <dependency>
137 |             <groupId>com.carrotsearch</groupId>
138 |             <artifactId>hppc</artifactId>
139 |             <version>0.4.2</version>
140 |         </dependency>
141 |         <dependency>
142 |             <groupId>com.sleepycat</groupId>
143 |             <artifactId>je</artifactId>
144 |             <version>5.0.73</version>
145 |         </dependency>
146 |         <dependency>
147 |             <groupId>net.java.dev.jna</groupId>
148 |             <artifactId>jna</artifactId>
149 |             <version>4.0.0</version>
150 |         </dependency>
151 |         <dependency>
152 |             <groupId>org.apache.lucene</groupId>
153 |             <artifactId>lucene-core</artifactId>
154 |             <version>3.6.2</version>
155 |         </dependency>
156 |         <dependency>
157 |             <groupId>org.neo4j</groupId>
158 |             <artifactId>neo4j-cypher</artifactId>
159 |             <version>${neo4j.version}</version>
160 |         </dependency>
161 |         <dependency>
162 |             <groupId>org.neo4j</groupId>
163 |             <artifactId>neo4j</artifactId>
164 |             <version>${neo4j.version}</version>
165 |         </dependency>
166 |         <dependency>
167 |             <groupId>com.tinkerpop.blueprints</groupId>
168 |             <artifactId>blueprints-neo4j2-graph</artifactId>
169 |             <version>${blueprints.version}</version>
170 |             <exclusions>
171 |                 <exclusion>
172 |                     <groupId>ch.qos.logback</groupId>
173 |                     <artifactId>logback-classic</artifactId>
174 |                 </exclusion>
175 |             </exclusions>
176 |         </dependency>
177 |         <dependency>
178 |             <groupId>com.orientechnologies</groupId>
179 |             <artifactId>orientdb-graphdb</artifactId>
180 |             <version>${orientdb.version}</version>
181 |         </dependency>
182 |         <dependency>
183 |             <groupId>com.tinkerpop</groupId>
184 |             <artifactId>pipes</artifactId>
185 |             <version>${blueprints.version}</version>
186 |         </dependency>
187 |         <dependency>
188 |             <groupId>org.slf4j</groupId>
189 |             <artifactId>slf4j-api</artifactId>
190 |             <version>1.7.5</version>
191 |         </dependency>
192 |         <dependency>
193 |             <groupId>org.slf4j</groupId>
194 |             <artifactId>slf4j-log4j12</artifactId>
195 |             <version>1.7.5</version>
196 |         </dependency>
197 |         <dependency>
198 |             <groupId>org.iq80.snappy</groupId>
199 |             <artifactId>snappy</artifactId>
200 |             <version>0.3</version>
201 |         </dependency>
202 |         <dependency>
203 |             <groupId>com.spatial4j</groupId>
204 |             <artifactId>spatial4j</artifactId>
205 |             <version>0.3</version>
206 |         </dependency>
207 |         <dependency>
208 |             <groupId>com.thinkaurelius.titan</groupId>
209 |             <artifactId>titan-berkeleyje</artifactId>
210 |             <version>${titan.version}</version>
211 |         </dependency>
212 |         <dependency>
213 |             <groupId>com.thinkaurelius.titan</groupId>
214 |             <artifactId>titan-cassandra</artifactId>
215 |             <version>${titan.version}</version>
216 |             <!--
217 |             <exclusions>
218 |                 <exclusion>
219 |                     <groupId>org.apache.httpcomponents</groupId>
220 |                     <artifactId>httpcore</artifactId>
221 |                 </exclusion>
222 |             </exclusions>
223 |             -->
224 |         </dependency>
225 |         <dependency>
226 |             <groupId>com.thinkaurelius.titan</groupId>
227 |             <artifactId>titan-hbase</artifactId>
228 |             <version>${titan.version}</version>
229 |         </dependency>
230 |         <dependency>
231 |             <groupId>com.thinkaurelius.titan</groupId>
232 |             <artifactId>titan-core</artifactId>
233 |             <version>${titan.version}</version>
234 |         </dependency>
235 |         <dependency>
236 |             <groupId>org.apache.hbase</groupId>
237 |             <artifactId>hbase-client</artifactId>
238 |             <version>${hbase.version}</version>
239 |         </dependency>
240 |         <dependency>
241 |             <groupId>com.amazonaws</groupId>
242 |             <artifactId>dynamodb-titan054-storage-backend</artifactId>
243 |             <version>${dynamodb.titan.version}</version>
244 |         </dependency>
245 |         <dependency>
246 |             <groupId>com.sparsity</groupId>
247 |             <artifactId>sparkseejava</artifactId>
248 |             <version>5.0.0</version>
249 |         </dependency>
250 |         <dependency>
251 |            <groupId>com.tinkerpop.blueprints</groupId>
252 |            <artifactId>blueprints-sparksee-graph</artifactId>
253 |            <version>${blueprints.version}</version>
254 |         </dependency>
255 |         <dependency>
256 |             <groupId>junit</groupId>
257 |             <artifactId>junit</artifactId>
258 |             <version>4.11</version>
259 |             <scope>test</scope>
260 |         </dependency>
261 |         <dependency>
262 |             <groupId>com.codahale.metrics</groupId>
263 |             <artifactId>metrics-core</artifactId>
264 |             <version>${metrics.version}</version>
265 |         </dependency>
266 |     </dependencies>
267 |     <build>
268 |         <pluginManagement>
269 |             <plugins>
270 |                 <plugin>
271 |                     <groupId>org.apache.maven.plugins</groupId>
272 |                     <artifactId>maven-release-plugin</artifactId>
273 |                     <version>2.5</version>
274 |                     <configuration>
275 |                         <useReleaseProfile>false</useReleaseProfile>
276 |                         <releaseProfiles>release</releaseProfiles>
277 |                         <goals>deploy</goals>
278 |                     </configuration>
279 |                 </plugin>
280 |                 <plugin>
281 |                     <artifactId>maven-clean-plugin</artifactId>
282 |                     <version>2.6.1</version>
283 |                     <configuration>
284 |                         <filesets>
285 |                             <fileset>
286 |                                 <directory>${basedir}</directory>
287 |                                 <includes>
288 |                                     <include>**/storage</include>
289 |                                     <include>**/results</include>
290 |                                 </includes>
291 |                                 <followSymlinks>false</followSymlinks>
292 |                             </fileset>
293 |                         </filesets>
294 |                     </configuration>
295 |                 </plugin>
296 |                 <plugin>
297 |                     <groupId>org.sonatype.plugins</groupId>
298 |                     <artifactId>nexus-staging-maven-plugin</artifactId>
299 |                     <version>1.6.6</version>
300 |                     <extensions>true</extensions>
301 |                     <configuration>
302 |                         <serverId>sonatype-nexus-staging</serverId>
303 |                         <nexusUrl>https://oss.sonatype.org/</nexusUrl>
304 |                         <autoReleaseAfterClose>true</autoReleaseAfterClose>
305 |                     </configuration>
306 |                 </plugin>
307 |                 <plugin>
308 |                     <groupId>org.apache.maven.plugins</groupId>
309 |                     <artifactId>maven-compiler-plugin</artifactId>
310 |                     <version>3.2</version>
311 |                     <configuration>
312 |                         <source>${jdk.version}</source>
313 |                         <target>${jdk.version}</target>
314 |                     </configuration>
315 |                 </plugin>
316 |                 <plugin>
317 |                     <groupId>org.apache.maven.plugins</groupId>
318 |                     <artifactId>maven-source-plugin</artifactId>
319 |                     <version>2.4</version>
320 |                     <executions>
321 |                         <execution>
322 |                             <id>attach-sources</id>
323 |                             <goals>
324 |                                 <goal>jar-no-fork</goal>
325 |                             </goals>
326 |                         </execution>
327 |                     </executions>
328 |                 </plugin>
329 |                 <plugin>
330 |                     <groupId>org.apache.maven.plugins</groupId>
331 |                     <artifactId>maven-javadoc-plugin</artifactId>
332 |                     <version>2.10.1</version>
333 |                     <executions>
334 |                         <execution>
335 |                             <id>attach-javadocs</id>
336 |                             <goals>
337 |                                 <goal>jar</goal>
338 |                             </goals>
339 |                         </execution>
340 |                     </executions>
341 |                 </plugin>
342 |                 <plugin>
343 |                     <groupId>org.apache.maven.plugins</groupId>
344 |                     <artifactId>maven-dependency-plugin</artifactId>
345 |                     <version>2.2</version>
346 |                     <executions>
347 |                         <execution>
348 |                             <id>copy-dependencies</id>
349 |                             <phase>package</phase>
350 |                             <goals>
351 |                                 <goal>copy-dependencies</goal>
352 |                             </goals>
353 |                             <configuration>
354 |                                 <outputDirectory>${project.build.directory}/dependency</outputDirectory>
355 |                                 <overWriteReleases>false</overWriteReleases>
356 |                                 <overWriteSnapshots>false</overWriteSnapshots>
357 |                                 <overWriteIfNewer>true</overWriteIfNewer>
358 |                             </configuration>
359 |                         </execution>
360 |                     </executions>
361 |                 </plugin>
362 |                 <plugin>
363 |                     <groupId>org.apache.maven.plugins</groupId>
364 |                     <artifactId>maven-surefire-plugin</artifactId>
365 |                     <version>${maven.surefire.version}</version>
366 |                     <configuration>
367 |                         <skip>true</skip>
368 |                     </configuration>
369 |                 </plugin>
370 |                 <plugin>
371 |                     <groupId>org.apache.maven.plugins</groupId>
372 |                     <artifactId>maven-gpg-plugin</artifactId>
373 |                     <version>1.5</version>
374 |                     <executions>
375 |                         <execution>
376 |                             <id>sign-artifacts</id>
377 |                             <phase>verify</phase>
378 |                             <goals>
379 |                                 <goal>sign</goal>
380 |                             </goals>
381 |                         </execution>
382 |                     </executions>
383 |                 </plugin>
384 |             </plugins>
385 |         </pluginManagement>
386 |     </build>
387 |     <profiles>
388 |         <profile>
389 |             <id>share</id>
390 |             <activation>
391 |                 <file>
392 |                     <exists>src/assembly/component.xml</exists>
393 |                 </file>
394 |             </activation>
395 |             <build>
396 |                 <plugins>
397 |                     <plugin>
398 |                         <artifactId>maven-assembly-plugin</artifactId>
399 |                         <version>2.5.3</version>
400 |                         <configuration>
401 |                             <descriptors>
402 |                                 <descriptor>src/assembly/component.xml</descriptor>
403 |                             </descriptors>
404 |                         </configuration>
405 |                         <executions>
406 |                             <execution>
407 |                                 <id>make-assembly</id> <!-- this is used for inheritance merges -->
408 |                                 <phase>package</phase> <!-- bind to the packaging phase -->
409 |                                 <goals>
410 |                                     <goal>single</goal>
411 |                                 </goals>
412 |                             </execution>
413 |                         </executions>
414 |                     </plugin>
415 |                 </plugins>
416 |             </build>
417 |         </profile>
418 |         <profile>
419 |             <id>bench</id>
420 |             <build>
421 |                 <plugins>
422 |                     <plugin>
423 |                         <artifactId>maven-surefire-plugin</artifactId>
424 |                         <version>${maven.surefire.version}</version>
425 |                         <executions>
426 |                             <execution>
427 |                                 <phase>test</phase>
428 |                                 <goals>
429 |                                     <goal>test</goal>
430 |                                 </goals>
431 |                                 <configuration>
432 |                                     <includes>
433 |                                         <include>**/GraphDatabaseBenchmarkTest.java</include>
434 |                                     </includes>
435 |                                     <skip>false</skip>
436 |                                     <systemPropertyVariables>
437 |                                         <log4j.configurationFile>${basedir}/src/test/resources/META-INF/log4j2.xml</log4j.configurationFile>
438 |                                     </systemPropertyVariables>
439 |                                 </configuration>
440 |                             </execution>
441 |                         </executions>
442 |                     </plugin>
443 |                 </plugins>
444 |             </build>
445 |         </profile>
446 |     </profiles>
447 |     <dependencyManagement>
448 |         <dependencies>
449 |             <dependency>
450 |                 <groupId>org.apache.httpcomponents</groupId>
451 |                 <artifactId>httpclient</artifactId>
452 |                 <version>4.3.6</version>
453 |             </dependency>
454 |             <dependency>
455 |                 <groupId>org.apache.httpcomponents</groupId>
456 |                 <artifactId>httpcore</artifactId>
457 |                 <version>4.3.3</version>
458 |             </dependency>
459 |             <dependency>
460 |                 <groupId>joda-time</groupId>
461 |                 <artifactId>joda-time</artifactId>
462 |                 <version>2.8.1</version>
463 |             </dependency>
464 |         </dependencies>
465 |     </dependencyManagement>
466 | </project>
467 | 


--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/SparkseeGraphDatabase.java:
--------------------------------------------------------------------------------
  1 | package eu.socialsensor.graphdatabases;
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.util.ArrayList;
  6 | import java.util.HashMap;
  7 | import java.util.HashSet;
  8 | import java.util.List;
  9 | import java.util.Map;
 10 | import java.util.Set;
 11 | 
 12 | import com.sparsity.sparksee.algorithms.SinglePairShortestPathBFS;
 13 | import com.sparsity.sparksee.gdb.AttributeKind;
 14 | import com.sparsity.sparksee.gdb.Condition;
 15 | import com.sparsity.sparksee.gdb.DataType;
 16 | import com.sparsity.sparksee.gdb.Database;
 17 | import com.sparsity.sparksee.gdb.EdgeData;
 18 | import com.sparsity.sparksee.gdb.EdgesDirection;
 19 | import com.sparsity.sparksee.gdb.Graph;
 20 | import com.sparsity.sparksee.gdb.Objects;
 21 | import com.sparsity.sparksee.gdb.ObjectsIterator;
 22 | import com.sparsity.sparksee.gdb.Session;
 23 | import com.sparsity.sparksee.gdb.Sparksee;
 24 | import com.sparsity.sparksee.gdb.SparkseeConfig;
 25 | import com.sparsity.sparksee.gdb.Value;
 26 | 
 27 | import eu.socialsensor.insert.Insertion;
 28 | import eu.socialsensor.insert.SparkseeMassiveInsertion;
 29 | import eu.socialsensor.insert.SparkseeSingleInsertion;
 30 | import eu.socialsensor.main.BenchmarkConfiguration;
 31 | import eu.socialsensor.main.BenchmarkingException;
 32 | import eu.socialsensor.main.GraphDatabaseType;
 33 | import eu.socialsensor.utils.Utils;
 34 | 
 35 | /**
 36 |  * Sparksee graph database implementation
 37 |  * 
 38 |  * @author sotbeis, sotbeis@iti.gr
 39 |  * @author Alexander Patrikalakis
 40 |  */
 41 | public class SparkseeGraphDatabase extends GraphDatabaseBase<ObjectsIterator, ObjectsIterator, Long, Long>
 42 | {
 43 |     public static final String NODE = "node";
 44 | 
 45 |     public static final String INSERTION_TIMES_OUTPUT_PATH = "data/sparksee.insertion.times";
 46 | 
 47 |     private final String sparkseeLicenseKey;
 48 | 
 49 |     private boolean readOnly = false;
 50 | 
 51 |     double totalWeight;
 52 | 
 53 |     private SparkseeConfig sparkseeConfig;
 54 |     private Sparksee sparksee;
 55 |     private Database database;
 56 |     private Session session;
 57 |     private Graph sparkseeGraph;
 58 | 
 59 |     public static int NODE_ATTRIBUTE;
 60 |     public static int COMMUNITY_ATTRIBUTE;
 61 |     public static int NODE_COMMUNITY_ATTRIBUTE;
 62 | 
 63 |     public static int NODE_TYPE;
 64 | 
 65 |     public static int EDGE_TYPE;
 66 | 
 67 |     Value value = new Value();
 68 | 
 69 |     public SparkseeGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn)
 70 |     {
 71 |         super(GraphDatabaseType.SPARKSEE, dbStorageDirectoryIn);
 72 |         this.sparkseeLicenseKey = config.getSparkseeLicenseKey();
 73 |     }
 74 | 
 75 |     @Override
 76 |     public void open()
 77 |     {
 78 |         sparkseeConfig = new SparkseeConfig();
 79 |         sparkseeConfig.setLicense(sparkseeLicenseKey);
 80 |         sparksee = new Sparksee(sparkseeConfig);
 81 |         try
 82 |         {
 83 |             this.database = sparksee.open(getDbFile(dbStorageDirectory), readOnly);
 84 |         }
 85 |         catch (FileNotFoundException e)
 86 |         {
 87 |             throw new BenchmarkingException("unable to open the db storage directory for sparksee", e);
 88 |         }
 89 |         this.session = database.newSession();
 90 |         this.sparkseeGraph = session.getGraph();
 91 |         createSchema();
 92 |     }
 93 | 
 94 |     private String getDbFile(File dbPath)
 95 |     {
 96 |         return new File(dbPath, "SparkseeDB.gdb").getAbsolutePath();
 97 |     }
 98 | 
 99 |     @Override
100 |     public void createGraphForSingleLoad()
101 |     {
102 |         try
103 |         {
104 |             dbStorageDirectory.mkdirs();
105 |             sparkseeConfig = new SparkseeConfig();
106 |             sparkseeConfig.setLicense(sparkseeLicenseKey);
107 |             sparksee = new Sparksee(sparkseeConfig);
108 |             database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB");
109 |             session = database.newSession();
110 |             sparkseeGraph = session.getGraph();
111 |             createSchema();
112 |         }
113 |         catch (FileNotFoundException e)
114 |         {
115 |             e.printStackTrace();
116 |         }
117 | 
118 |     }
119 | 
120 |     @Override
121 |     public void createGraphForMassiveLoad()
122 |     {
123 |         // maybe some more configuration?
124 |         try
125 |         {
126 |             dbStorageDirectory.mkdirs();
127 |             sparkseeConfig = new SparkseeConfig();
128 |             sparkseeConfig.setLicense(sparkseeLicenseKey);
129 |             sparksee = new Sparksee(sparkseeConfig);
130 |             database = sparksee.create(getDbFile(dbStorageDirectory), "SparkseeDB");
131 |             session = database.newSession();
132 |             sparkseeGraph = session.getGraph();
133 |             createSchema();
134 |         }
135 |         catch (FileNotFoundException e)
136 |         {
137 |             e.printStackTrace();
138 |         }
139 |     }
140 | 
141 |     private void createSchema()
142 |     {
143 |         NODE_TYPE = sparkseeGraph.newNodeType(NODE);
144 |         NODE_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_ID, DataType.String, AttributeKind.Unique);
145 |         EDGE_TYPE = sparkseeGraph.newEdgeType(SIMILAR, true, false);
146 |         COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, COMMUNITY, DataType.Integer,
147 |             AttributeKind.Indexed);
148 |         NODE_COMMUNITY_ATTRIBUTE = sparkseeGraph.newAttribute(NODE_TYPE, NODE_COMMUNITY, DataType.Integer,
149 |             AttributeKind.Indexed);
150 |     }
151 | 
152 |     @Override
153 |     public void massiveModeLoading(File dataPath)
154 |     {
155 |         Insertion sparkseeMassiveInsertion = new SparkseeMassiveInsertion(session);
156 |         sparkseeMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */);
157 |     }
158 | 
159 |     @Override
160 |     public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber)
161 |     {
162 |         Insertion sparkseeSingleInsertion = new SparkseeSingleInsertion(this.session, resultsPath);
163 |         sparkseeSingleInsertion.createGraph(dataPath, scenarioNumber);
164 |     }
165 | 
166 |     @Override
167 |     public void shutdown()
168 |     {
169 |         if (session != null)
170 |         {
171 |             session.close();
172 |             session = null;
173 |             database.close();
174 |             database = null;
175 |             sparksee.close();
176 |             sparksee = null;
177 |         }
178 | 
179 |     }
180 | 
181 |     @Override
182 |     public void shutdownMassiveGraph()
183 |     {
184 |         shutdown();
185 |     }
186 | 
187 |     @Override
188 |     public void delete()
189 |     {
190 |         Utils.deleteRecursively(dbStorageDirectory);
191 |     }
192 | 
193 |     @Override
194 |     public void shortestPath(final Long srcNodeID, Integer i)
195 |     {
196 |         int nodeType = sparkseeGraph.findType(NODE);
197 |         int edgeType = sparkseeGraph.findType(SIMILAR);
198 | 
199 |         long dstNodeID = getVertex(i);
200 |         SinglePairShortestPathBFS shortestPathBFS = new SinglePairShortestPathBFS(session, srcNodeID, dstNodeID);
201 |         shortestPathBFS.addNodeType(nodeType);
202 |         shortestPathBFS.addEdgeType(edgeType, EdgesDirection.Outgoing);
203 |         shortestPathBFS.setMaximumHops(4);
204 |         shortestPathBFS.run();
205 |         shortestPathBFS.close();
206 |     }
207 | 
208 |     @Override
209 |     public int getNodeCount()
210 |     {
211 |         return (int) sparkseeGraph.countNodes();
212 |     }
213 | 
214 |     @Override
215 |     public Set<Integer> getNeighborsIds(int nodeId)
216 |     {
217 |         Set<Integer> neighbors = new HashSet<Integer>();
218 |         long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId)));
219 |         Objects neighborsObjects = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing);
220 |         ObjectsIterator neighborsIter = neighborsObjects.iterator();
221 |         while (neighborsIter.hasNext())
222 |         {
223 |             long neighborID = neighborsIter.next();
224 |             Value neighborNodeID = sparkseeGraph.getAttribute(neighborID, NODE_ATTRIBUTE);
225 |             neighbors.add(Integer.valueOf(neighborNodeID.getString()));
226 |         }
227 |         neighborsIter.close();
228 |         neighborsObjects.close();
229 |         return neighbors;
230 |     }
231 | 
232 |     @Override
233 |     public double getNodeWeight(int nodeId)
234 |     {
235 |         long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId)));
236 |         return getNodeOutDegree(nodeID);
237 |     }
238 | 
239 |     public double getNodeInDegree(long node)
240 |     {
241 |         long inDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Ingoing);
242 |         return (double) inDegree;
243 |     }
244 | 
245 |     public double getNodeOutDegree(long node)
246 |     {
247 |         long outDegree = sparkseeGraph.degree(node, EDGE_TYPE, EdgesDirection.Outgoing);
248 |         return (double) outDegree;
249 |     }
250 | 
251 |     @Override
252 |     public void initCommunityProperty()
253 |     {
254 |         int communityCounter = 0;
255 |         // basic or indexed attribute?
256 |         Objects nodes = sparkseeGraph.select(NODE_TYPE);
257 |         ObjectsIterator nodesIter = nodes.iterator();
258 |         while (nodesIter.hasNext())
259 |         {
260 |             long nodeID = nodesIter.next();
261 |             sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter));
262 |             sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(communityCounter));
263 |             communityCounter++;
264 |         }
265 |         nodesIter.close();
266 |         nodes.close();
267 |     }
268 | 
269 |     @Override
270 |     public Set<Integer> getCommunitiesConnectedToNodeCommunities(int nodeCommunities)
271 |     {
272 |         Set<Integer> communities = new HashSet<Integer>();
273 |         Objects nodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal,
274 |             value.setInteger(nodeCommunities));
275 |         ObjectsIterator nodesIter = nodes.iterator();
276 |         while (nodesIter.hasNext())
277 |         {
278 |             long nodeID = nodesIter.next();
279 |             Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing);
280 |             ObjectsIterator neighborsIter = neighbors.iterator();
281 |             while (neighborsIter.hasNext())
282 |             {
283 |                 long neighborID = neighborsIter.next();
284 |                 Value community = sparkseeGraph.getAttribute(neighborID, COMMUNITY_ATTRIBUTE);
285 |                 communities.add(community.getInteger());
286 |             }
287 |             neighborsIter.close();
288 |             neighbors.close();
289 |         }
290 |         nodesIter.close();
291 |         nodes.close();
292 |         return communities;
293 |     }
294 | 
295 |     @Override
296 |     public Set<Integer> getNodesFromCommunity(int community)
297 |     {
298 |         Set<Integer> nodesFromCommunity = new HashSet<Integer>();
299 |         Objects nodes = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(community));
300 |         ObjectsIterator nodesIter = nodes.iterator();
301 |         while (nodesIter.hasNext())
302 |         {
303 |             Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE);
304 |             nodesFromCommunity.add(Integer.valueOf(nodeId.getString()));
305 |         }
306 |         nodesIter.close();
307 |         nodes.close();
308 |         return nodesFromCommunity;
309 |     }
310 | 
311 |     @Override
312 |     public Set<Integer> getNodesFromNodeCommunity(int nodeCommunity)
313 |     {
314 |         Set<Integer> nodesFromNodeCommunity = new HashSet<Integer>();
315 |         Objects nodes = sparkseeGraph
316 |             .select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(nodeCommunity));
317 |         ObjectsIterator nodesIter = nodes.iterator();
318 |         while (nodesIter.hasNext())
319 |         {
320 |             Value nodeId = sparkseeGraph.getAttribute(nodesIter.next(), NODE_ATTRIBUTE);
321 |             nodesFromNodeCommunity.add(Integer.valueOf(nodeId.getString()));
322 |         }
323 |         nodesIter.close();
324 |         nodes.close();
325 |         return nodesFromNodeCommunity;
326 |     }
327 | 
328 |     @Override
329 |     public double getEdgesInsideCommunity(int nodeCommunity, int communityNode)
330 |     {
331 |         double edges = 0;
332 |         Objects nodesFromNodeCommunitiy = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal,
333 |             value.setInteger(nodeCommunity));
334 |         Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal,
335 |             value.setInteger(communityNode));
336 |         ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunitiy.iterator();
337 |         while (nodesFromNodeCommunityIter.hasNext())
338 |         {
339 |             long nodeID = nodesFromNodeCommunityIter.next();
340 |             Objects neighbors = sparkseeGraph.neighbors(nodeID, EDGE_TYPE, EdgesDirection.Outgoing);
341 |             ObjectsIterator neighborsIter = neighbors.iterator();
342 |             while (neighborsIter.hasNext())
343 |             {
344 |                 if (nodesFromCommunity.contains(neighborsIter.next()))
345 |                 {
346 |                     edges++;
347 |                 }
348 |             }
349 |             neighborsIter.close();
350 |             neighbors.close();
351 |         }
352 |         nodesFromNodeCommunityIter.close();
353 |         nodesFromCommunity.close();
354 |         nodesFromNodeCommunitiy.close();
355 |         return edges;
356 |     }
357 | 
358 |     @Override
359 |     public double getCommunityWeight(int community)
360 |     {
361 |         double communityWeight = 0;
362 |         Objects nodesFromCommunity = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal,
363 |             value.setInteger(community));
364 |         ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator();
365 |         if (nodesFromCommunity.size() > 1)
366 |         {
367 |             while (nodesFromCommunityIter.hasNext())
368 |             {
369 |                 communityWeight += getNodeOutDegree(nodesFromCommunityIter.next());
370 |             }
371 |         }
372 |         nodesFromCommunityIter.close();
373 |         nodesFromCommunity.close();
374 |         return communityWeight;
375 |     }
376 | 
377 |     @Override
378 |     public double getNodeCommunityWeight(int nodeCommunity)
379 |     {
380 |         double nodeCommunityWeight = 0;
381 |         Objects nodesFromNodeCommunity = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal,
382 |             value.setInteger(nodeCommunity));
383 |         ObjectsIterator nodesFromNodeCommunityIter = nodesFromNodeCommunity.iterator();
384 |         if (nodesFromNodeCommunity.size() > 1)
385 |         {
386 |             while (nodesFromNodeCommunityIter.hasNext())
387 |             {
388 |                 nodeCommunityWeight += getNodeOutDegree(nodesFromNodeCommunityIter.next());
389 |             }
390 |         }
391 |         nodesFromNodeCommunityIter.close();
392 |         nodesFromNodeCommunity.close();
393 |         return nodeCommunityWeight;
394 |     }
395 | 
396 |     @Override
397 |     public void moveNode(int nodeCommunity, int toCommunity)
398 |     {
399 |         Objects fromNodes = sparkseeGraph.select(NODE_COMMUNITY_ATTRIBUTE, Condition.Equal,
400 |             value.setInteger(nodeCommunity));
401 |         ObjectsIterator fromNodesIter = fromNodes.iterator();
402 |         while (fromNodesIter.hasNext())
403 |         {
404 |             sparkseeGraph.setAttribute(fromNodesIter.next(), COMMUNITY_ATTRIBUTE, value.setInteger(toCommunity));
405 |         }
406 |         fromNodesIter.close();
407 |         fromNodes.close();
408 |     }
409 | 
410 |     @Override
411 |     public double getGraphWeightSum()
412 |     {
413 |         return (double) sparkseeGraph.countEdges();
414 |     }
415 | 
416 |     @Override
417 |     public int reInitializeCommunities()
418 |     {
419 |         Map<Integer, Integer> initCommunities = new HashMap<Integer, Integer>();
420 |         int communityCounter = 0;
421 |         Objects nodes = sparkseeGraph.select(NODE_TYPE);
422 |         ObjectsIterator nodesIter = nodes.iterator();
423 |         while (nodesIter.hasNext())
424 |         {
425 |             long nodeID = nodesIter.next();
426 |             Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE);
427 |             if (!initCommunities.containsKey(communityId.getInteger()))
428 |             {
429 |                 initCommunities.put(communityId.getInteger(), communityCounter);
430 |                 communityCounter++;
431 |             }
432 |             int newCommunityId = initCommunities.get(communityId.getInteger());
433 |             sparkseeGraph.setAttribute(nodeID, COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId));
434 |             sparkseeGraph.setAttribute(nodeID, NODE_COMMUNITY_ATTRIBUTE, value.setInteger(newCommunityId));
435 |         }
436 |         nodesIter.close();
437 |         nodes.close();
438 |         return communityCounter;
439 |     }
440 | 
441 |     @Override
442 |     public int getCommunity(int nodeCommunity)
443 |     {
444 |         long nodeID = sparkseeGraph.findObject(NODE_COMMUNITY_ATTRIBUTE, value.setInteger(nodeCommunity));
445 |         Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE);
446 |         return communityId.getInteger();
447 |     }
448 | 
449 |     @Override
450 |     public int getCommunityFromNode(int nodeId)
451 |     {
452 |         long nodeID = sparkseeGraph.findObject(NODE_ATTRIBUTE, value.setString(String.valueOf(nodeId)));
453 |         Value communityId = sparkseeGraph.getAttribute(nodeID, COMMUNITY_ATTRIBUTE);
454 |         return communityId.getInteger();
455 |     }
456 | 
457 |     @Override
458 |     public int getCommunitySize(int community)
459 |     {
460 |         Objects nodesFromCommunities = sparkseeGraph.select(COMMUNITY_ATTRIBUTE, Condition.Equal,
461 |             value.setInteger(community));
462 |         ObjectsIterator nodesFromCommunitiesIter = nodesFromCommunities.iterator();
463 |         Set<Integer> nodeCommunities = new HashSet<Integer>();
464 |         while (nodesFromCommunitiesIter.hasNext())
465 |         {
466 |             Value nodeCommunityId = sparkseeGraph.getAttribute(nodesFromCommunitiesIter.next(),
467 |                 NODE_COMMUNITY_ATTRIBUTE);
468 |             nodeCommunities.add(nodeCommunityId.getInteger());
469 |         }
470 |         nodesFromCommunitiesIter.close();
471 |         nodesFromCommunities.close();
472 |         return nodeCommunities.size();
473 |     }
474 | 
475 |     @Override
476 |     public Map<Integer, List<Integer>> mapCommunities(int numberOfCommunities)
477 |     {
478 |         Map<Integer, List<Integer>> communities = new HashMap<Integer, List<Integer>>();
479 |         for (int i = 0; i < numberOfCommunities; i++)
480 |         {
481 |             Objects nodesFromCommunity = sparkseeGraph
482 |                 .select(COMMUNITY_ATTRIBUTE, Condition.Equal, value.setInteger(i));
483 |             ObjectsIterator nodesFromCommunityIter = nodesFromCommunity.iterator();
484 |             List<Integer> nodes = new ArrayList<Integer>();
485 |             while (nodesFromCommunityIter.hasNext())
486 |             {
487 |                 Value nodeId = sparkseeGraph.getAttribute(nodesFromCommunityIter.next(), NODE_ATTRIBUTE);
488 |                 nodes.add(Integer.valueOf(nodeId.getString()));
489 |             }
490 |             communities.put(i, nodes);
491 |             nodesFromCommunityIter.close();
492 |             nodesFromCommunity.close();
493 |         }
494 |         return communities;
495 |     }
496 | 
497 |     @Override
498 |     public boolean nodeExists(int nodeId)
499 |     {
500 |         Objects nodes = sparkseeGraph.select(NODE_ATTRIBUTE, Condition.Equal, value.setInteger(nodeId));
501 |         ObjectsIterator nodesIter = nodes.iterator();
502 |         if (nodesIter.hasNext())
503 |         {
504 |             nodesIter.close();
505 |             nodes.close();
506 |             return true;
507 |         }
508 |         nodesIter.close();
509 |         nodes.close();
510 |         return false;
511 |     }
512 | 
513 |     @Override
514 |     public ObjectsIterator getVertexIterator()
515 |     {
516 |         final int nodeType = sparkseeGraph.findType(NODE);
517 |         final Objects objects = sparkseeGraph.select(nodeType);
518 |         return objects.iterator();
519 |     }
520 | 
521 |     @Override
522 |     public ObjectsIterator getNeighborsOfVertex(Long v)
523 |     {
524 |         final int edgeType = sparkseeGraph.findType(SIMILAR);
525 |         final Objects neighbors = sparkseeGraph.neighbors(v, edgeType, EdgesDirection.Any);
526 |         return neighbors.iterator();
527 |     }
528 | 
529 |     @Override
530 |     public void cleanupVertexIterator(ObjectsIterator it)
531 |     {
532 |         it.close();
533 |     }
534 | 
535 |     @Override
536 |     public Long getOtherVertexFromEdge(Long r, Long oneVertex)
537 |     {
538 |         return r; //pass through
539 |     }
540 | 
541 |     @Override
542 |     public ObjectsIterator getAllEdges()
543 |     {
544 |         int edgeType = sparkseeGraph.findType(SIMILAR);
545 |         Objects objects = sparkseeGraph.select(edgeType);
546 |         return objects.iterator();
547 |     }
548 | 
549 |     @Override
550 |     public Long getSrcVertexFromEdge(Long edge)
551 |     {
552 |         EdgeData edgeData = sparkseeGraph.getEdgeData(edge);
553 |         return edgeData.getTail();
554 |     }
555 | 
556 |     @Override
557 |     public Long getDestVertexFromEdge(Long edge)
558 |     {
559 |         EdgeData edgeData = sparkseeGraph.getEdgeData(edge);
560 |         return edgeData.getHead();
561 |     }
562 | 
563 |     @Override
564 |     public boolean edgeIteratorHasNext(ObjectsIterator it)
565 |     {
566 |         return it.hasNext();
567 |     }
568 | 
569 |     @Override
570 |     public Long nextEdge(ObjectsIterator it)
571 |     {
572 |         return it.next();
573 |     }
574 | 
575 |     @Override
576 |     public void cleanupEdgeIterator(ObjectsIterator it)
577 |     {
578 |         it.close();
579 |     }
580 | 
581 |     @Override
582 |     public boolean vertexIteratorHasNext(ObjectsIterator it)
583 |     {
584 |         return it.hasNext();
585 |     }
586 | 
587 |     @Override
588 |     public Long nextVertex(ObjectsIterator it)
589 |     {
590 |         return it.next();
591 |     }
592 | 
593 |     @Override
594 |     public Long getVertex(Integer i)
595 |     {
596 |         int nodeType = sparkseeGraph.findType(NODE);
597 |         int nodeAttribute = sparkseeGraph.findAttribute(nodeType, NODE_ID);
598 |         return sparkseeGraph.findObject(nodeAttribute, value.setInteger(i));
599 |     }
600 | }
601 | 


--------------------------------------------------------------------------------