├── src
├── main
│ └── java
│ │ └── eu
│ │ └── socialsensor
│ │ ├── benchmarks
│ │ ├── InsertsGraphData.java
│ │ ├── RequiresGraphData.java
│ │ ├── Benchmark.java
│ │ ├── DeleteGraphBenchmark.java
│ │ ├── FindNodesOfAllEdgesBenchmark.java
│ │ ├── FindNeighboursOfAllNodesBenchmark.java
│ │ ├── FindShortestPathBenchmark.java
│ │ ├── MassiveInsertionBenchmark.java
│ │ ├── BenchmarkBase.java
│ │ ├── SingleInsertionBenchmark.java
│ │ ├── PermutingBenchmarkBase.java
│ │ └── ClusteringBenchmark.java
│ │ ├── insert
│ │ ├── Insertion.java
│ │ ├── SparkseeSingleInsertion.java
│ │ ├── SparkseeMassiveInsertion.java
│ │ ├── Neo4jMassiveInsertion.java
│ │ ├── TitanMassiveInsertion.java
│ │ ├── TitanSingleInsertion.java
│ │ ├── OrientMassiveInsertion.java
│ │ ├── OrientSingleInsertion.java
│ │ ├── OrientAbstractInsertion.java
│ │ ├── Neo4jSingleInsertion.java
│ │ └── InsertionBase.java
│ │ ├── main
│ │ ├── BenchmarkingException.java
│ │ ├── BenchmarkType.java
│ │ ├── GraphDatabaseType.java
│ │ ├── GraphDatabaseBenchmark.java
│ │ └── BenchmarkConfiguration.java
│ │ ├── dataset
│ │ ├── DatasetFactory.java
│ │ └── Dataset.java
│ │ ├── utils
│ │ ├── PermuteMethod.java
│ │ ├── Metrics.java
│ │ └── Utils.java
│ │ ├── clustering
│ │ ├── LouvainMethod.java
│ │ └── Cache.java
│ │ └── graphdatabases
│ │ ├── GraphDatabase.java
│ │ ├── GraphDatabaseBase.java
│ │ ├── OrientGraphDatabase.java
│ │ └── SparkseeGraphDatabase.java
└── test
│ ├── resources
│ └── META-INF
│ │ ├── log4j2.xml
│ │ ├── log4j.properties
│ │ └── input.properties
│ └── java
│ └── eu
│ └── socialsensor
│ └── main
│ └── GraphDatabaseBenchmarkTest.java
├── .gitignore
├── NOTICE
├── README.md
├── LICENSE
└── pom.xml
/src/main/java/eu/socialsensor/benchmarks/InsertsGraphData.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | public interface InsertsGraphData extends Benchmark
4 | {
5 |
6 | }
7 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/RequiresGraphData.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | public interface RequiresGraphData extends Benchmark
4 | {
5 |
6 | }
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/*
2 | *.class
3 | /target
4 | /data
5 | /.project
6 | /.gitignore
7 | /.settings
8 | /.classpath
9 | .idea/
10 | graphdb-benchmarks.iml
11 | metrics/
12 | results/
13 | storage/
14 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/Benchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | /**
4 | * Represents the benchmarks
5 | *
6 | * @author sotbeis
7 | * @email sotbeis@iti.gr
8 | */
9 | public interface Benchmark
10 | {
11 |
12 | /**
13 | * Start the selected benchmark
14 | */
15 | public void startBenchmark();
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/src/test/resources/META-INF/log4j2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | This product includes software developed by Information Technologies Institute
2 | (CERTH, 57001, Thermi, Greece), and the following individuals:
3 | * Sotiris Beis
4 | * Alexander Patrikalakis
5 |
6 | It also includes software from other open source projects including,
7 | but not limited to (check pom.xml for complete listing):
8 | cassandra.yaml file came from:
9 | https://github.com/thinkaurelius/titan/blob/titan05/titan-cassandra/config/cassandra/cassandra.yaml
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Insertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 |
5 | /**
6 | * Represents the insertion of data in each graph database
7 | *
8 | * @author sotbeis, sotbeis@iti.gr
9 | */
10 | public interface Insertion
11 | {
12 |
13 | /**
14 | * Loads the data in each graph database
15 | *
16 | * @param datasetDir
17 | */
18 | public void createGraph(File dataset, int scenarioNumber);
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/src/test/resources/META-INF/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=info, stdout
2 | #log4j.logger.com.amazon.titan=trace
3 | #log4j.logger.com.thinkaurelius=debug
4 | log4j.logger.com.amazonaws=off
5 | log4j.appender.stdout=org.apache.logging.log4j.core.appender.ConsoleAppender
6 | log4j.appender.stdout.layout=org.apache.logging.log4j.core.layout.PatternLayout
7 |
8 | # Pattern to output the caller's file name and line number.
9 | log4j.appender.stdout.layout.ConversionPattern=%d (%t) [%5p] (%F:%L) - %m%n
10 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/BenchmarkingException.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.main;
2 |
3 | public class BenchmarkingException extends RuntimeException
4 | {
5 |
6 | /**
7 | *
8 | */
9 | private static final long serialVersionUID = -4165548376731455231L;
10 |
11 | public BenchmarkingException(String message)
12 | {
13 | super(message);
14 | }
15 |
16 | public BenchmarkingException(String message, Throwable cause)
17 | {
18 | super(message, cause);
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/test/java/eu/socialsensor/main/GraphDatabaseBenchmarkTest.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.main;
2 |
3 | import static org.junit.Assert.fail;
4 | import org.junit.Test;
5 |
6 | public class GraphDatabaseBenchmarkTest
7 | {
8 | @Test
9 | public void testGraphDatabaseBenchmark()
10 | {
11 | GraphDatabaseBenchmark bench = new GraphDatabaseBenchmark(null /* inputPath */);
12 | try
13 | {
14 | bench.run();
15 | }
16 | catch (Exception e)
17 | {
18 | e.printStackTrace();
19 | fail("Got unexpected exception: " + e.getMessage());
20 | }
21 |
22 | //bench.cleanup();
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/dataset/DatasetFactory.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.dataset;
2 |
3 | import java.io.File;
4 | import java.util.HashMap;
5 | import java.util.Map;
6 |
7 | /**
8 | *
9 | * @author Alexander Patrikalakis
10 | *
11 | */
12 | public class DatasetFactory
13 | {
14 | private static DatasetFactory theInstance = null;
15 | private final Map datasetMap;
16 |
17 | private DatasetFactory()
18 | {
19 | datasetMap = new HashMap();
20 | }
21 |
22 | public static DatasetFactory getInstance()
23 | {
24 | if (theInstance == null)
25 | {
26 | theInstance = new DatasetFactory();
27 | }
28 | return theInstance;
29 | }
30 |
31 | public Dataset getDataset(File datasetFile)
32 | {
33 | if (!datasetMap.containsKey(datasetFile))
34 | {
35 | datasetMap.put(datasetFile, new Dataset(datasetFile));
36 | }
37 |
38 | return datasetMap.get(datasetFile);
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/DeleteGraphBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.util.concurrent.TimeUnit;
4 |
5 | import com.google.common.base.Stopwatch;
6 |
7 | import eu.socialsensor.main.BenchmarkConfiguration;
8 | import eu.socialsensor.main.BenchmarkType;
9 | import eu.socialsensor.main.GraphDatabaseType;
10 | import eu.socialsensor.utils.Utils;
11 |
12 | /**
13 | * Benchmark that measures the time requried to delete a graph
14 | * @author Alexander Patrikalakis
15 | *
16 | */
17 | public class DeleteGraphBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
18 | {
19 | public DeleteGraphBenchmark(BenchmarkConfiguration bench)
20 | {
21 | super(bench, BenchmarkType.DELETION);
22 | }
23 |
24 | @Override
25 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
26 | {
27 | Stopwatch watch = new Stopwatch();
28 | watch.start();
29 | Utils.deleteDatabase(type, bench);
30 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindNodesOfAllEdgesBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.util.concurrent.TimeUnit;
4 |
5 | import com.google.common.base.Stopwatch;
6 |
7 | import eu.socialsensor.graphdatabases.GraphDatabase;
8 | import eu.socialsensor.main.BenchmarkConfiguration;
9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 |
13 | /**
14 | * FindNodesOfAllEdgesBenchmark implementation
15 | *
16 | * @author sotbeis, sotbeis@iti.gr
17 | * @author Alexander Patrikalakis
18 | */
19 | public class FindNodesOfAllEdgesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
20 | {
21 | public FindNodesOfAllEdgesBenchmark(BenchmarkConfiguration config)
22 | {
23 | super(config, BenchmarkType.FIND_ADJACENT_NODES);
24 | }
25 |
26 | @Override
27 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
28 | {
29 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
30 | graphDatabase.open();
31 | Stopwatch watch = new Stopwatch();
32 | watch.start();
33 | graphDatabase.findNodesOfAllEdges();
34 | graphDatabase.shutdown();
35 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/SparkseeSingleInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 |
5 | import com.sparsity.sparksee.gdb.Graph;
6 | import com.sparsity.sparksee.gdb.Session;
7 | import com.sparsity.sparksee.gdb.Value;
8 |
9 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 |
12 | public class SparkseeSingleInsertion extends InsertionBase
13 | {
14 | private final Session session;
15 | private final Graph sparkseeGraph;
16 |
17 | Value value = new Value();
18 |
19 | public SparkseeSingleInsertion(Session session, File resultsPath)
20 | {
21 | // no temp files for massive load insert
22 | super(GraphDatabaseType.SPARKSEE, resultsPath);
23 | this.session = session;
24 | this.sparkseeGraph = session.getGraph();
25 | }
26 |
27 | @Override
28 | public Long getOrCreate(String value)
29 | {
30 | Value sparkseeValue = new Value();
31 | return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value));
32 | }
33 |
34 | @Override
35 | public void relateNodes(Long src, Long dest)
36 | {
37 | session.begin();
38 | sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest);
39 | session.commit();
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindNeighboursOfAllNodesBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.util.concurrent.TimeUnit;
4 |
5 | import com.google.common.base.Stopwatch;
6 |
7 | import eu.socialsensor.graphdatabases.GraphDatabase;
8 | import eu.socialsensor.main.BenchmarkConfiguration;
9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 |
13 | /**
14 | * FindNeighboursOfAllNodesBenchmark implementation
15 | *
16 | * @author sotbeis, sotbeis@iti.gr
17 | * @author Alexander Patrikalakis
18 | */
19 | public class FindNeighboursOfAllNodesBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
20 | {
21 | public FindNeighboursOfAllNodesBenchmark(BenchmarkConfiguration config)
22 | {
23 | super(config, BenchmarkType.FIND_NEIGHBOURS);
24 | }
25 |
26 | @Override
27 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
28 | {
29 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
30 | graphDatabase.open();
31 | Stopwatch watch = new Stopwatch();
32 | watch.start();
33 | graphDatabase.findAllNodeNeighbours();
34 | graphDatabase.shutdown();
35 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/BenchmarkType.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.main;
2 |
3 | import java.util.HashSet;
4 | import java.util.Set;
5 |
6 | /**
7 | *
8 | * @author Alexander Patrikalakis
9 | *
10 | */
11 | public enum BenchmarkType
12 | {
13 | MASSIVE_INSERTION("Massive Insertion", "MassiveInsertion"), SINGLE_INSERTION("Single Insertion", "SingleInsertion"), DELETION(
14 | "Delete Graph", "DeleteGraph"), FIND_NEIGHBOURS("Find Neighbours of All Nodes", "FindNeighbours"), FIND_ADJACENT_NODES(
15 | "Find Adjacent Nodes of All Edges", "FindAdjacent"), FIND_SHORTEST_PATH("Find Shortest Path", "FindShortest"), CLUSTERING(
16 | "Clustering", "Clustering");
17 |
18 | public static final Set INSERTING_BENCHMARK_SET = new HashSet();
19 | static
20 | {
21 | INSERTING_BENCHMARK_SET.add(MASSIVE_INSERTION);
22 | INSERTING_BENCHMARK_SET.add(SINGLE_INSERTION);
23 | }
24 |
25 | private final String longname;
26 | private final String filenamePrefix;
27 |
28 | private BenchmarkType(String longName, String filenamePrefix)
29 | {
30 | this.longname = longName;
31 | this.filenamePrefix = filenamePrefix;
32 | }
33 |
34 | public String longname()
35 | {
36 | return longname;
37 | }
38 |
39 | public String getResultsFileName()
40 | {
41 | return filenamePrefix + ".csv";
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/SparkseeMassiveInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import com.sparsity.sparksee.gdb.Graph;
4 | import com.sparsity.sparksee.gdb.Session;
5 | import com.sparsity.sparksee.gdb.Value;
6 |
7 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
8 | import eu.socialsensor.main.GraphDatabaseType;
9 |
10 | public class SparkseeMassiveInsertion extends InsertionBase implements Insertion
11 | {
12 | private final Session session;
13 | private final Graph sparkseeGraph;
14 | private int operations;
15 |
16 | public SparkseeMassiveInsertion(Session session)
17 | {
18 | super(GraphDatabaseType.SPARKSEE, null /* resultsPath */);
19 | this.session = session;
20 | this.sparkseeGraph = session.getGraph();
21 | this.operations = 0;
22 | }
23 |
24 | @Override
25 | public Long getOrCreate(String value)
26 | {
27 | Value sparkseeValue = new Value();
28 | return sparkseeGraph.findOrCreateObject(SparkseeGraphDatabase.NODE_ATTRIBUTE, sparkseeValue.setString(value));
29 | }
30 |
31 | @Override
32 | public void relateNodes(Long src, Long dest)
33 | {
34 | sparkseeGraph.newEdge(SparkseeGraphDatabase.EDGE_TYPE, src, dest);
35 | operations++;
36 | if (operations == 10000)
37 | {
38 | session.commit();
39 | session.begin();
40 | operations = 0;
41 | }
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Neo4jMassiveInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import org.neo4j.helpers.collection.MapUtil;
6 | import org.neo4j.unsafe.batchinsert.BatchInserter;
7 |
8 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
9 | import eu.socialsensor.main.GraphDatabaseType;
10 |
11 | /**
12 | * Implementation of massive Insertion in Neo4j graph database
13 | *
14 | * @author sotbeis, sotbeis@iti.gr
15 | * @author Alexander Patrikalakis
16 | *
17 | */
18 | public final class Neo4jMassiveInsertion extends InsertionBase
19 | {
20 | private final BatchInserter inserter;
21 | Map cache = new HashMap();
22 |
23 | public Neo4jMassiveInsertion(BatchInserter inserter)
24 | {
25 | super(GraphDatabaseType.NEO4J, null /* resultsPath */);
26 | this.inserter = inserter;
27 | }
28 |
29 | @Override
30 | protected Long getOrCreate(String value)
31 | {
32 | Long id = cache.get(Long.valueOf(value));
33 | if (id == null)
34 | {
35 | Map properties = MapUtil.map("nodeId", value);
36 | id = inserter.createNode(properties, Neo4jGraphDatabase.NODE_LABEL);
37 | cache.put(Long.valueOf(value), id);
38 | }
39 | return id;
40 | }
41 |
42 | @Override
43 | protected void relateNodes(Long src, Long dest)
44 | {
45 | inserter.createRelationship(src, dest, Neo4jGraphDatabase.RelTypes.SIMILAR, null);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/TitanMassiveInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import com.thinkaurelius.titan.core.TitanGraph;
4 | import com.thinkaurelius.titan.core.util.TitanId;
5 | import com.tinkerpop.blueprints.Vertex;
6 | import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph;
7 |
8 | import eu.socialsensor.main.GraphDatabaseType;
9 |
10 | /**
11 | * Implementation of massive Insertion in Titan graph database
12 | *
13 | * @author sotbeis, sotbeis@iti.gr
14 | * @author Alexander Patrikalakis
15 | *
16 | */
17 | public class TitanMassiveInsertion extends InsertionBase
18 | {
19 | private final BatchGraph batchGraph;
20 |
21 | public TitanMassiveInsertion(BatchGraph batchGraph, GraphDatabaseType type)
22 | {
23 | super(type, null /* resultsPath */); // no temp files for massive load
24 | // insert
25 | this.batchGraph = batchGraph;
26 | }
27 |
28 | @Override
29 | public Vertex getOrCreate(String value)
30 | {
31 | Integer intVal = Integer.valueOf(value);
32 | final long titanVertexId = TitanId.toVertexId(intVal);
33 | Vertex vertex = batchGraph.getVertex(titanVertexId);
34 | if (vertex == null)
35 | {
36 | vertex = batchGraph.addVertex(titanVertexId);
37 | vertex.setProperty("nodeId", intVal);
38 | }
39 | return vertex;
40 | }
41 |
42 | @Override
43 | public void relateNodes(Vertex src, Vertex dest)
44 | {
45 | src.addEdge("similar", dest);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/FindShortestPathBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import eu.socialsensor.dataset.DatasetFactory;
4 | import eu.socialsensor.graphdatabases.GraphDatabase;
5 | import eu.socialsensor.main.BenchmarkConfiguration;
6 | import eu.socialsensor.main.BenchmarkType;
7 | import eu.socialsensor.main.GraphDatabaseType;
8 | import eu.socialsensor.utils.Utils;
9 |
10 | import java.util.Set;
11 | import java.util.concurrent.TimeUnit;
12 |
13 | import com.google.common.base.Stopwatch;
14 |
15 | /**
16 | * FindShortestPathBenchmark implementation
17 | *
18 | * @author sotbeis, sotbeis@iti.gr
19 | * @author Alexander Patrikalakis
20 | */
21 | public class FindShortestPathBenchmark extends PermutingBenchmarkBase implements RequiresGraphData
22 | {
23 |
24 | private final Set generatedNodes;
25 |
26 | public FindShortestPathBenchmark(BenchmarkConfiguration config)
27 | {
28 | super(config, BenchmarkType.FIND_SHORTEST_PATH);
29 | generatedNodes = DatasetFactory.getInstance().getDataset(config.getDataset())
30 | .generateRandomNodes(config.getRandomNodes());
31 | }
32 |
33 | @Override
34 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
35 | {
36 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
37 | graphDatabase.open();
38 | Stopwatch watch = new Stopwatch();
39 | watch.start();
40 | graphDatabase.shortestPaths(generatedNodes);
41 | graphDatabase.shutdown();
42 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/dataset/Dataset.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.dataset;
2 |
3 | import java.io.File;
4 | import java.util.ArrayList;
5 | import java.util.HashSet;
6 | import java.util.Iterator;
7 | import java.util.List;
8 | import java.util.Set;
9 |
10 | import org.apache.commons.math3.util.MathArrays;
11 |
12 | import eu.socialsensor.utils.Utils;
13 |
14 | /**
15 | *
16 | * @author Alexander Patrikalakis
17 | *
18 | */
19 | public class Dataset implements Iterable>
20 | {
21 | private final List> data;
22 |
23 | public Dataset(File datasetFile)
24 | {
25 | data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */);
26 | }
27 |
28 | public Set generateRandomNodes(int numRandomNodes)
29 | {
30 | Set nodes = new HashSet();
31 | for (List line : data.subList(4, data.size()))
32 | {
33 | for (String nodeId : line)
34 | {
35 | nodes.add(nodeId.trim());
36 | }
37 | }
38 |
39 | List nodeList = new ArrayList(nodes);
40 | int[] nodeIndexList = new int[nodeList.size()];
41 | for (int i = 0; i < nodeList.size(); i++)
42 | {
43 | nodeIndexList[i] = i;
44 | }
45 | MathArrays.shuffle(nodeIndexList);
46 |
47 | Set generatedNodes = new HashSet();
48 | for (int i = 0; i < numRandomNodes; i++)
49 | {
50 | generatedNodes.add(Integer.valueOf(nodeList.get(nodeIndexList[i])));
51 | }
52 | return generatedNodes;
53 | }
54 |
55 | @Override
56 | public Iterator> iterator()
57 | {
58 | return data.iterator();
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/TitanSingleInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 |
5 | import com.thinkaurelius.titan.core.TitanGraph;
6 | import com.thinkaurelius.titan.core.util.TitanId;
7 | import com.tinkerpop.blueprints.Compare;
8 | import com.tinkerpop.blueprints.Vertex;
9 |
10 | import eu.socialsensor.main.GraphDatabaseType;
11 |
12 | /**
13 | * Implementation of single Insertion in Titan graph database
14 | *
15 | * @author sotbeis, sotbeis@iti.gr
16 | * @author Alexander Patrikalakis
17 | *
18 | */
19 | public class TitanSingleInsertion extends InsertionBase
20 | {
21 | private final TitanGraph titanGraph;
22 |
23 | public TitanSingleInsertion(TitanGraph titanGraph, GraphDatabaseType type, File resultsPath)
24 | {
25 | super(type, resultsPath);
26 | this.titanGraph = titanGraph;
27 | }
28 |
29 | @Override
30 | public Vertex getOrCreate(String value)
31 | {
32 | Integer intValue = Integer.valueOf(value);
33 | final Vertex v;
34 | if (titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().hasNext())
35 | {
36 | v = (Vertex) titanGraph.query().has("nodeId", Compare.EQUAL, intValue).vertices().iterator().next();
37 | }
38 | else
39 | {
40 | final long titanVertexId = TitanId.toVertexId(intValue);
41 | v = titanGraph.addVertex(titanVertexId);
42 | v.setProperty("nodeId", intValue);
43 | titanGraph.commit();
44 | }
45 | return v;
46 | }
47 |
48 | @Override
49 | public void relateNodes(Vertex src, Vertex dest)
50 | {
51 | try
52 | {
53 | titanGraph.addEdge(null, src, dest, "similar");
54 | titanGraph.commit();
55 | }
56 | catch (Exception e)
57 | {
58 | titanGraph.rollback(); //TODO(amcp) why can this happen? doesn't this indicate illegal state?
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/MassiveInsertionBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.util.concurrent.TimeUnit;
4 |
5 | import org.apache.logging.log4j.LogManager;
6 | import org.apache.logging.log4j.Logger;
7 |
8 | import com.google.common.base.Stopwatch;
9 |
10 | import eu.socialsensor.graphdatabases.GraphDatabase;
11 | import eu.socialsensor.main.BenchmarkConfiguration;
12 | import eu.socialsensor.main.BenchmarkType;
13 | import eu.socialsensor.main.GraphDatabaseType;
14 | import eu.socialsensor.utils.Utils;
15 |
16 | /**
17 | * MassiveInsertionBenchmark implementation
18 | *
19 | * @author sotbeis, sotbeis@iti.gr
20 | * @author Alexander Patrikalakis
21 | */
22 |
23 | public class MassiveInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData
24 | {
25 | private static final Logger logger = LogManager.getLogger();
26 |
27 | public MassiveInsertionBenchmark(BenchmarkConfiguration config)
28 | {
29 | super(config, BenchmarkType.MASSIVE_INSERTION);
30 | }
31 |
32 | @Override
33 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
34 | {
35 | logger.debug("Creating database instance for type " + type.getShortname());
36 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
37 | logger.debug("Prepare database instance for type {} for massive loading", type.getShortname());
38 | // the following step includes provisioning in managed database
39 | // services. do not measure this time as
40 | // it is not related to the action of inserting.
41 | graphDatabase.createGraphForMassiveLoad();
42 | logger.debug("Massive load graph in database type {}", type.getShortname());
43 | Stopwatch watch = new Stopwatch();
44 | watch.start();
45 | graphDatabase.massiveModeLoading(bench.getDataset());
46 | logger.debug("Shutdown massive graph in database type {}", type.getShortname());
47 | graphDatabase.shutdownMassiveGraph();
48 | times.get(type).add((double) watch.elapsed(TimeUnit.MILLISECONDS));
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/PermuteMethod.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.utils;
2 | import java.lang.reflect.Array;
3 | import java.lang.reflect.Method;
4 | import java.util.Iterator;
5 | import java.util.NoSuchElementException;
6 |
7 | public class PermuteMethod implements Iterator {
8 | private final int size;
9 | private final Method[] elements; // copy of original 0 .. size-1
10 | private final Method[] ar; // array for output, 0 .. size-1
11 | private final int[] permutation; // perm of nums 1..size, perm[0]=0
12 |
13 | private boolean next = true;
14 |
15 | public PermuteMethod(Method[] e) {
16 | size = e.length;
17 | elements = new Method[size];
18 | System.arraycopy(e, 0, elements, 0, size);
19 | ar = new Method[size];
20 | System.arraycopy(e, 0, ar, 0, size);
21 | permutation = new int[size + 1];
22 | for (int i = 0; i < size + 1; i++) {
23 | permutation[i] = i;
24 | }
25 | }
26 |
27 | private void formNextPermutation() {
28 | for (int i = 0; i < size; i++) {
29 | Array.set(ar, i, elements[permutation[i + 1] - 1]);
30 | }
31 | }
32 |
33 | public boolean hasNext() {
34 | return next;
35 | }
36 |
37 | public void remove() throws UnsupportedOperationException {
38 | throw new UnsupportedOperationException();
39 | }
40 |
41 | private void swap(final int i, final int j) {
42 | final int x = permutation[i];
43 | permutation[i] = permutation[j];
44 | permutation[j] = x;
45 | }
46 |
47 | public Method[] next() throws NoSuchElementException {
48 | formNextPermutation(); // copy original elements
49 | int i = size - 1;
50 | while (permutation[i] > permutation[i + 1])
51 | i--;
52 | if (i == 0) {
53 | next = false;
54 | for (int j = 0; j < size + 1; j++) {
55 | permutation[j] = j;
56 | }
57 | return ar;
58 | }
59 | int j = size;
60 | while (permutation[i] > permutation[j])
61 | j--;
62 | swap(i, j);
63 | int r = size;
64 | int s = i + 1;
65 | while (r > s) {
66 | swap(r, s);
67 | r--;
68 | s++;
69 | }
70 | return ar;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/GraphDatabaseType.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.main;
2 |
3 | import java.util.HashMap;
4 | import java.util.HashSet;
5 | import java.util.Map;
6 | import java.util.Set;
7 |
8 | /**
9 | * Enum containing constants that correspond to each database.
10 | *
11 | * @author Alexander Patrikalakis
12 | */
13 | public enum GraphDatabaseType
14 | {
15 | TITAN_BERKELEYDB("Titan", "berkeleyje", "tbdb"),
16 | TITAN_DYNAMODB("Titan", "com.amazon.titan.diskstorage.dynamodb.DynamoDBStoreManager", "tddb"),
17 | TITAN_CASSANDRA("Titan", "cassandra", "tc"),
18 | TITAN_CASSANDRA_EMBEDDED("TitanEmbedded", "embeddedcassandra", "tce"),
19 | TITAN_HBASE("Titan", "hbase", "thb"),
20 | TITAN_PERSISTIT("TitanEmbedded", "persistit", "tp"),
21 | ORIENT_DB("OrientDB", null, "orient"),
22 | NEO4J("Neo4j", null, "neo4j"),
23 | SPARKSEE("Sparksee", null, "sparksee");
24 |
25 | private final String backend;
26 | private final String api;
27 | private final String shortname;
28 |
29 | public static final Map STRING_REP_MAP = new HashMap();
30 | public static final Set TITAN_FLAVORS = new HashSet();
31 | static
32 | {
33 | for (GraphDatabaseType db : values())
34 | {
35 | STRING_REP_MAP.put(db.getShortname(), db);
36 | }
37 | TITAN_FLAVORS.add(TITAN_BERKELEYDB);
38 | TITAN_FLAVORS.add(TITAN_DYNAMODB);
39 | TITAN_FLAVORS.add(TITAN_CASSANDRA);
40 | TITAN_FLAVORS.add(TITAN_CASSANDRA_EMBEDDED);
41 | TITAN_FLAVORS.add(TITAN_HBASE);
42 | TITAN_FLAVORS.add(TITAN_PERSISTIT);
43 | }
44 |
45 | private GraphDatabaseType(String api, String backend, String shortname)
46 | {
47 | this.api = api;
48 | this.backend = backend;
49 | this.shortname = shortname;
50 | }
51 |
52 | public String getBackend()
53 | {
54 | return backend;
55 | }
56 |
57 | public String getApi()
58 | {
59 | return api;
60 | }
61 |
62 | public String getShortname()
63 | {
64 | return shortname;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientMassiveInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import com.orientechnologies.orient.core.config.OGlobalConfiguration;
4 | import com.orientechnologies.orient.graph.batch.OGraphBatchInsertBasic;
5 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx;
6 |
7 | import eu.socialsensor.main.GraphDatabaseType;
8 |
9 | /**
10 | * Implementation of massive Insertion in OrientDB graph database
11 | *
12 | * @author sotbeis, sotbeis@iti.gr
13 | * @author Alexander Patrikalakis
14 | *
15 | */
16 | public class OrientMassiveInsertion extends InsertionBase implements Insertion
17 | {
18 | private static final int ESTIMATED_ENTRIES = 1000000;
19 | private static final int AVERAGE_NUMBER_OF_EDGES_PER_NODE = 40;
20 | private static final int NUMBER_OF_ORIENT_CLUSTERS = 16;
21 | private final OGraphBatchInsertBasic graph;
22 |
23 | public OrientMassiveInsertion(final String url)
24 | {
25 | super(GraphDatabaseType.ORIENT_DB, null /* resultsPath */);
26 | OGlobalConfiguration.ENVIRONMENT_CONCURRENT.setValue(false);
27 | OrientGraphNoTx transactionlessGraph = new OrientGraphNoTx(url);
28 | for (int i = 0; i < NUMBER_OF_ORIENT_CLUSTERS; ++i)
29 | {
30 | transactionlessGraph.getVertexBaseType().addCluster("v_" + i);
31 | transactionlessGraph.getEdgeBaseType().addCluster("e_" + i);
32 | }
33 | transactionlessGraph.shutdown();
34 |
35 | graph = new OGraphBatchInsertBasic(url);
36 | graph.setAverageEdgeNumberPerNode(AVERAGE_NUMBER_OF_EDGES_PER_NODE);
37 | graph.setEstimatedEntries(ESTIMATED_ENTRIES);
38 | graph.setIdPropertyName("nodeId");
39 | graph.begin();
40 | }
41 |
42 | @Override
43 | protected void post() {
44 | graph.end();
45 | }
46 |
47 | @Override
48 | protected Long getOrCreate(String value)
49 | {
50 | final long v = Long.parseLong(value);
51 | graph.createVertex(v);
52 | return v;
53 | }
54 |
55 | @Override
56 | protected void relateNodes(Long src, Long dest)
57 | {
58 | graph.createEdge(src, dest);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/BenchmarkBase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.io.File;
4 |
5 | import org.apache.logging.log4j.LogManager;
6 | import org.apache.logging.log4j.Logger;
7 |
8 | import eu.socialsensor.main.BenchmarkConfiguration;
9 | import eu.socialsensor.main.BenchmarkType;
10 | import eu.socialsensor.main.GraphDatabaseType;
11 | import eu.socialsensor.utils.Utils;
12 |
13 | /**
14 | * Base class for benchmarks.
15 | *
16 | * @author Alexander Patrikalakis
17 | */
18 | public abstract class BenchmarkBase implements Benchmark
19 | {
20 | private static final Logger logger = LogManager.getLogger();
21 | protected final BenchmarkConfiguration bench;
22 | protected final File outputFile;
23 | protected final BenchmarkType type;
24 |
25 | protected BenchmarkBase(BenchmarkConfiguration bench, BenchmarkType type)
26 | {
27 | this.bench = bench;
28 | this.outputFile = new File(bench.getResultsPath(), type.getResultsFileName());
29 | this.type = type;
30 | }
31 |
32 | @Override
33 | public final void startBenchmark()
34 | {
35 | startBenchmarkInternal();
36 | }
37 |
38 | public abstract void startBenchmarkInternal();
39 |
40 | protected final void createDatabases()
41 | {
42 | for (GraphDatabaseType type : bench.getSelectedDatabases())
43 | {
44 | logger.info(String.format("creating %s database from %s dataset", type.getShortname(), bench.getDataset()
45 | .getName()));
46 | File dbpath = Utils.generateStorageDirectory(type, bench.getDbStorageDirectory());
47 | if (dbpath.exists())
48 | {
49 | throw new IllegalStateException(String.format(
50 | "Database from a previous run exist: %s; clean up and try again.", dbpath.getAbsolutePath()));
51 | }
52 | Utils.createMassiveLoadDatabase(type, bench);
53 | }
54 | }
55 |
56 | protected final void deleteDatabases()
57 | {
58 | for (GraphDatabaseType type : bench.getSelectedDatabases())
59 | {
60 | Utils.deleteDatabase(type, bench);
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/SingleInsertionBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import eu.socialsensor.graphdatabases.GraphDatabase;
4 | import eu.socialsensor.main.BenchmarkConfiguration;
5 | import eu.socialsensor.main.BenchmarkType;
6 | import eu.socialsensor.main.GraphDatabaseType;
7 | import eu.socialsensor.utils.Utils;
8 |
9 | import java.io.File;
10 | import java.util.List;
11 |
12 | import org.apache.logging.log4j.LogManager;
13 | import org.apache.logging.log4j.Logger;
14 |
15 | /**
16 | * SingleInsertionBenchmak implementation
17 | *
18 | * @author sotbeis, sotbeis@iti.gr
19 | * @author Alexander Patrikalakis
20 | */
21 | public class SingleInsertionBenchmark extends PermutingBenchmarkBase implements InsertsGraphData
22 | {
23 | public static final String INSERTION_TIMES_OUTPUT_FILE_NAME_BASE = "SINGLE_INSERTIONResults";
24 | private static final Logger LOG = LogManager.getLogger();
25 |
26 | public SingleInsertionBenchmark(BenchmarkConfiguration bench)
27 | {
28 | super(bench, BenchmarkType.SINGLE_INSERTION);
29 | }
30 |
31 | @Override
32 | public void post()
33 | {
34 | LOG.info("Write results to " + outputFile.getAbsolutePath());
35 | for (GraphDatabaseType type : bench.getSelectedDatabases())
36 | {
37 | String prefix = outputFile.getParentFile().getAbsolutePath() + File.separator
38 | + INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname();
39 | List> insertionTimesOfEachScenario = Utils.getDocumentsAs2dList(prefix, bench.getScenarios());
40 | times.put(type, Utils.calculateMeanList(insertionTimesOfEachScenario));
41 | Utils.deleteMultipleFiles(prefix, bench.getScenarios());
42 | }
43 | // use the logic of the superclass method after populating the times map
44 | super.post();
45 | }
46 |
47 | @Override
48 | public void benchmarkOne(GraphDatabaseType type, int scenarioNumber)
49 | {
50 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
51 | graphDatabase.createGraphForSingleLoad();
52 | graphDatabase.singleModeLoading(bench.getDataset(), bench.getResultsPath(), scenarioNumber);
53 | graphDatabase.shutdown();
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientSingleInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 |
5 | import com.orientechnologies.orient.core.db.record.OIdentifiable;
6 | import com.orientechnologies.orient.core.index.OIndex;
7 | import com.tinkerpop.blueprints.TransactionalGraph;
8 | import com.tinkerpop.blueprints.Vertex;
9 | import com.tinkerpop.blueprints.impls.orient.OrientGraph;
10 |
11 | import eu.socialsensor.main.GraphDatabaseType;
12 |
13 | /**
14 | * Implementation of single Insertion in OrientDB graph database
15 | *
16 | * @author sotbeis, sotbeis@iti.gr
17 | * @author Alexander Patrikalakis
18 | *
19 | */
20 | public final class OrientSingleInsertion extends InsertionBase
21 | {
22 | protected final OrientGraph orientGraph;
23 | protected final OIndex> index;
24 |
25 | public OrientSingleInsertion(OrientGraph orientGraph, File resultsPath)
26 | {
27 | super(GraphDatabaseType.ORIENT_DB, resultsPath);
28 | this.orientGraph = orientGraph;
29 | this.index = this.orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId");
30 | }
31 |
32 | @Override
33 | protected void relateNodes(Vertex src, Vertex dest)
34 | {
35 | orientGraph.addEdge(null, src, dest, "similar");
36 |
37 | // TODO why commit twice? is this a nested transaction?
38 | if (orientGraph instanceof TransactionalGraph)
39 | {
40 | orientGraph.commit();
41 | orientGraph.commit();
42 | }
43 | }
44 |
45 | @Override
46 | protected Vertex getOrCreate(final String value)
47 | {
48 | final int key = Integer.parseInt(value);
49 |
50 | Vertex v;
51 | final OIdentifiable rec = (OIdentifiable) index.get(key);
52 | if (rec != null)
53 | {
54 | return orientGraph.getVertex(rec);
55 | }
56 |
57 | v = orientGraph.addVertex(key, "nodeId", key);
58 |
59 | if (orientGraph instanceof TransactionalGraph)
60 | {
61 | orientGraph.commit();
62 | }
63 |
64 | return v;
65 | }
66 |
67 | @Override
68 | protected void post()
69 | {
70 | super.post();
71 | if (orientGraph instanceof TransactionalGraph)
72 | {
73 | orientGraph.commit();
74 | }
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/OrientAbstractInsertion.java:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com)
4 | * *
5 | * * Licensed under the Apache License, Version 2.0 (the "License");
6 | * * you may not use this file except in compliance with the License.
7 | * * You may obtain a copy of the License at
8 | * *
9 | * * http://www.apache.org/licenses/LICENSE-2.0
10 | * *
11 | * * Unless required by applicable law or agreed to in writing, software
12 | * * distributed under the License is distributed on an "AS IS" BASIS,
13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * * See the License for the specific language governing permissions and
15 | * * limitations under the License.
16 | * *
17 | * * For more information: http://www.orientechnologies.com
18 | *
19 | */
20 |
21 | package eu.socialsensor.insert;
22 |
23 | import org.apache.log4j.Logger;
24 |
25 | import com.orientechnologies.orient.core.db.record.OIdentifiable;
26 | import com.orientechnologies.orient.core.index.OIndex;
27 | import com.tinkerpop.blueprints.TransactionalGraph;
28 | import com.tinkerpop.blueprints.Vertex;
29 | import com.tinkerpop.blueprints.impls.orient.OrientExtendedGraph;
30 |
31 | /**
32 | * Implementation of single Insertion in OrientDB graph database
33 | *
34 | * @author sotbeis
35 | * @email sotbeis@iti.gr
36 | *
37 | */
38 | public abstract class OrientAbstractInsertion implements Insertion {
39 |
40 | public static String INSERTION_TIMES_OUTPUT_PATH = null;
41 |
42 | protected OrientExtendedGraph orientGraph = null;
43 | protected Logger logger = Logger.getLogger(OrientAbstractInsertion.class);
44 |
45 | protected OIndex> index;
46 |
47 | public OrientAbstractInsertion(OrientExtendedGraph orientGraph) {
48 | this.orientGraph = orientGraph;
49 | }
50 |
51 | protected Vertex getOrCreate(final String value) {
52 | final int key = Integer.parseInt(value);
53 |
54 | Vertex v;
55 | if (index == null) {
56 | index = orientGraph.getRawGraph().getMetadata().getIndexManager().getIndex("V.nodeId");
57 | }
58 |
59 | final OIdentifiable rec = (OIdentifiable) index.get(key);
60 | if (rec != null) {
61 | return orientGraph.getVertex(rec);
62 | }
63 |
64 | v = orientGraph.addVertex(key, "nodeId", key);
65 |
66 | if (orientGraph instanceof TransactionalGraph) {
67 | ((TransactionalGraph) orientGraph).commit();
68 | }
69 |
70 | return v;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/PermutingBenchmarkBase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collection;
5 | import java.util.HashMap;
6 | import java.util.List;
7 | import java.util.Map;
8 |
9 | import org.apache.commons.collections4.iterators.PermutationIterator;
10 | import org.apache.logging.log4j.Logger;
11 | import org.apache.logging.log4j.LogManager;
12 |
13 | import eu.socialsensor.main.BenchmarkConfiguration;
14 | import eu.socialsensor.main.BenchmarkType;
15 | import eu.socialsensor.main.GraphDatabaseType;
16 | import eu.socialsensor.utils.Utils;
17 |
18 | /**
19 | * Base class abstracting the logic of permutations
20 | *
21 | * @author Alexander Patrikalakis
22 | */
23 | public abstract class PermutingBenchmarkBase extends BenchmarkBase
24 | {
25 | protected final Map> times;
26 | private static final Logger LOG = LogManager.getLogger();
27 |
28 | protected PermutingBenchmarkBase(BenchmarkConfiguration bench, BenchmarkType typeIn)
29 | {
30 | super(bench, typeIn);
31 | times = new HashMap>();
32 | for (GraphDatabaseType type : bench.getSelectedDatabases())
33 | {
34 | times.put(type, new ArrayList(bench.getScenarios()));
35 | }
36 | }
37 |
38 | @Override
39 | public void startBenchmarkInternal()
40 | {
41 | LOG.info(String.format("Executing %s Benchmark . . . .", type.longname()));
42 |
43 | if (bench.permuteBenchmarks())
44 | {
45 | PermutationIterator iter = new PermutationIterator(
46 | bench.getSelectedDatabases());
47 | int cntPermutations = 1;
48 | while (iter.hasNext())
49 | {
50 | LOG.info("Scenario " + cntPermutations);
51 | startBenchmarkInternalOnePermutation(iter.next(), cntPermutations);
52 | cntPermutations++;
53 | }
54 | }
55 | else
56 | {
57 | startBenchmarkInternalOnePermutation(bench.getSelectedDatabases(), 1);
58 | }
59 |
60 | LOG.info(String.format("%s Benchmark finished", type.longname()));
61 | post();
62 | }
63 |
64 | private void startBenchmarkInternalOnePermutation(Collection types, int cntPermutations)
65 | {
66 | for (GraphDatabaseType type : types)
67 | {
68 | benchmarkOne(type, cntPermutations);
69 | }
70 | }
71 |
72 | public abstract void benchmarkOne(GraphDatabaseType type, int scenarioNumber);
73 |
74 | public void post()
75 | {
76 | Utils.writeResults(outputFile, times, type.longname());
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/Neo4jSingleInsertion.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 | import java.util.HashMap;
5 | import java.util.Map;
6 |
7 | import org.neo4j.cypher.javacompat.ExecutionEngine;
8 | import org.neo4j.graphdb.GraphDatabaseService;
9 | import org.neo4j.graphdb.Node;
10 | import org.neo4j.graphdb.ResourceIterator;
11 | import org.neo4j.graphdb.Transaction;
12 | import org.neo4j.kernel.GraphDatabaseAPI;
13 |
14 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
15 | import eu.socialsensor.main.BenchmarkingException;
16 | import eu.socialsensor.main.GraphDatabaseType;
17 |
18 | /**
19 | * Implementation of single Insertion in Neo4j graph database
20 | *
21 | * @author sotbeis, sotbeis@iti.gr
22 | * @author Alexander Patrikalakis
23 | *
24 | */
25 | @SuppressWarnings("deprecation")
26 | public class Neo4jSingleInsertion extends InsertionBase
27 | {
28 | private final GraphDatabaseService neo4jGraph;
29 | private final ExecutionEngine engine;
30 |
31 | public Neo4jSingleInsertion(GraphDatabaseService neo4jGraph, File resultsPath)
32 | {
33 | super(GraphDatabaseType.NEO4J, resultsPath);
34 | this.neo4jGraph = neo4jGraph;
35 | engine = new ExecutionEngine(this.neo4jGraph);
36 | }
37 |
38 | public Node getOrCreate(String nodeId)
39 | {
40 | Node result = null;
41 |
42 | try(final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin())
43 | {
44 | try
45 | {
46 | String queryString = "MERGE (n:Node {nodeId: {nodeId}}) RETURN n";
47 | Map parameters = new HashMap();
48 | parameters.put("nodeId", nodeId);
49 | ResourceIterator resultIterator = engine.execute(queryString, parameters).columnAs("n");
50 | result = resultIterator.next();
51 | tx.success();
52 | }
53 | catch (Exception e)
54 | {
55 | tx.failure();
56 | throw new BenchmarkingException("unable to get or create node " + nodeId, e);
57 | }
58 | }
59 |
60 | return result;
61 | }
62 |
63 | @Override
64 | public void relateNodes(Node src, Node dest)
65 | {
66 | try (final Transaction tx = ((GraphDatabaseAPI) neo4jGraph).tx().unforced().begin())
67 | {
68 | try
69 | {
70 | src.createRelationshipTo(dest, Neo4jGraphDatabase.RelTypes.SIMILAR);
71 | tx.success();
72 | }
73 | catch (Exception e)
74 | {
75 | tx.failure();
76 | throw new BenchmarkingException("unable to relate nodes", e);
77 | }
78 | }
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/Metrics.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.utils;
2 |
3 | import java.util.List;
4 | import java.util.Map;
5 |
6 | /**
7 | * This class implements the metrics we use for the evaluation of the predicted
8 | * clustering. For now we use only the NMI
9 | *
10 | * @author sbeis
11 | * @email sotbeis@gmail.com
12 | *
13 | */
14 | public class Metrics
15 | {
16 | public double normalizedMutualInformation(int numberOfNodes, Map> actualPartitions,
17 | Map> predictedPartitions)
18 | {
19 | double nmi;
20 | double numOfNodes = (double) numberOfNodes;
21 | int[][] confusionMatrix = confusionMatrix(actualPartitions, predictedPartitions);
22 | int[] confusionMatrixActual = new int[actualPartitions.size()];
23 | int[] confusionMatrixPredicted = new int[predictedPartitions.size()];
24 | for (int i = 0; i < confusionMatrixActual.length; i++)
25 | {
26 | int sum = 0;
27 | for (int j = 0; j < confusionMatrixPredicted.length; j++)
28 | {
29 | sum = sum + confusionMatrix[i][j];
30 | }
31 | confusionMatrixActual[i] = sum;
32 | }
33 | for (int j = 0; j < confusionMatrixPredicted.length; j++)
34 | {
35 | int sum = 0;
36 | for (int i = 0; i < confusionMatrixActual.length; i++)
37 | {
38 | sum = sum + confusionMatrix[i][j];
39 | }
40 | confusionMatrixPredicted[j] = sum;
41 | }
42 |
43 | double term1 = 0;
44 | for (int i = 0; i < confusionMatrixActual.length; i++)
45 | {
46 | for (int j = 0; j < confusionMatrixPredicted.length; j++)
47 | {
48 | if (confusionMatrix[i][j] > 0)
49 | {
50 | term1 += -2.0
51 | * confusionMatrix[i][j]
52 | * Math.log((confusionMatrix[i][j] * numOfNodes)
53 | / (confusionMatrixActual[i] * confusionMatrixPredicted[j]));
54 | }
55 | }
56 | }
57 | double term2 = 0;
58 | for (int i = 0; i < confusionMatrixActual.length; i++)
59 | {
60 | term2 += confusionMatrixActual[i] * Math.log(confusionMatrixActual[i] / numOfNodes);
61 | }
62 | double term3 = 0;
63 | for (int j = 0; j < confusionMatrixPredicted.length; j++)
64 | {
65 | term3 += confusionMatrixPredicted[j] * Math.log(confusionMatrixPredicted[j] / numOfNodes);
66 | }
67 | nmi = term1 / (term2 + term3);
68 | return nmi;
69 | }
70 |
71 | private int[][] confusionMatrix(Map> actualPartitions,
72 | Map> predictedPartitions)
73 | {
74 | int actualPartitionsSize = actualPartitions.size();
75 | int predictedPartitionsSize = predictedPartitions.size();
76 | int[][] confusionMatrix = new int[actualPartitionsSize][];
77 | int actualPartitionsKeys[] = new int[actualPartitionsSize];
78 | int predictedPartitionsKeys[] = new int[predictedPartitionsSize];
79 |
80 | int actualPartitionsIndex = 0;
81 | for (int key : actualPartitions.keySet())
82 | {
83 | actualPartitionsKeys[actualPartitionsIndex] = key;
84 | actualPartitionsIndex++;
85 | }
86 | int predictedPartitionsIndex = 0;
87 | for (int key : predictedPartitions.keySet())
88 | {
89 | predictedPartitionsKeys[predictedPartitionsIndex] = key;
90 | predictedPartitionsIndex++;
91 | }
92 |
93 | for (int i = 0; i < actualPartitionsSize; i++)
94 | {
95 | confusionMatrix[i] = new int[predictedPartitionsSize];
96 | for (int j = 0; j < predictedPartitionsSize; j++)
97 | {
98 | int commonNodes = 0;
99 | for (int node : predictedPartitions.get(predictedPartitionsKeys[j]))
100 | {
101 | if (actualPartitions.get(actualPartitionsKeys[i]).contains(node))
102 | {
103 | commonNodes++;
104 | }
105 | }
106 | confusionMatrix[i][j] = commonNodes;
107 | }
108 | }
109 | return confusionMatrix;
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/insert/InsertionBase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.insert;
2 |
3 | import java.io.File;
4 | import java.util.ArrayList;
5 | import java.util.List;
6 | import java.util.concurrent.TimeUnit;
7 |
8 | import org.apache.logging.log4j.LogManager;
9 | import org.apache.logging.log4j.Logger;
10 |
11 | import com.codahale.metrics.Timer;
12 | import com.google.common.base.Stopwatch;
13 |
14 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark;
15 | import eu.socialsensor.dataset.Dataset;
16 | import eu.socialsensor.dataset.DatasetFactory;
17 | import eu.socialsensor.main.GraphDatabaseBenchmark;
18 | import eu.socialsensor.main.GraphDatabaseType;
19 | import eu.socialsensor.utils.Utils;
20 |
21 | /**
22 | * Base class for business logic of insertion workloads
23 | *
24 | * @author Alexander Patrikalakis
25 | *
26 | * @param
27 | * the Type of vertexes (graph database vendor specific)
28 | */
29 | public abstract class InsertionBase implements Insertion
30 | {
31 | private static final Logger logger = LogManager.getLogger();
32 | public static final String INSERTION_CONTEXT = ".eu.socialsensor.insertion.";
33 | private final Timer getOrCreateTimes;
34 | private final Timer relateNodesTimes;
35 |
36 | protected final GraphDatabaseType type;
37 | protected final List insertionTimes;
38 | private final boolean single;
39 |
40 | // to write intermediate times for SingleInsertion subclasses
41 | protected final File resultsPath;
42 |
43 | protected InsertionBase(GraphDatabaseType type, File resultsPath)
44 | {
45 | this.type = type;
46 | this.insertionTimes = new ArrayList();
47 | this.resultsPath = resultsPath;
48 | this.single = resultsPath != null;
49 | final String insertionTypeCtxt = type.getShortname() + INSERTION_CONTEXT + (single ? "adhoc." : "batch.");
50 | this.getOrCreateTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "getOrCreate");
51 | this.relateNodesTimes = GraphDatabaseBenchmark.metrics.timer(insertionTypeCtxt + "relateNodes");
52 | }
53 |
54 | /**
55 | * Gets or creates a vertex
56 | *
57 | * @param value
58 | * the identifier of the vertex
59 | * @return the id of the created vertex
60 | */
61 | protected abstract T getOrCreate(final String value);
62 |
63 | /**
64 | *
65 | * @param src
66 | * @param dest
67 | */
68 | protected abstract void relateNodes(final T src, final T dest);
69 |
70 | /**
71 | * sometimes a transaction needs to be committed at the end of a batch run.
72 | * this is the hook.
73 | */
74 | protected void post()
75 | {
76 | // NOOP
77 | }
78 |
79 | public final void createGraph(File datasetFile, int scenarioNumber)
80 | {
81 | logger.info("Loading data in {} mode in {} database . . . .", single ? "single" : "massive",
82 | type.name());
83 | Dataset dataset = DatasetFactory.getInstance().getDataset(datasetFile);
84 |
85 | T srcNode, dstNode;
86 | Stopwatch thousandWatch = new Stopwatch(), watch = new Stopwatch();
87 | thousandWatch.start();
88 | watch.start();
89 | int i = 4;
90 | for (List line : dataset)
91 | {
92 | final Timer.Context contextSrc = getOrCreateTimes.time();
93 | try {
94 | srcNode = getOrCreate(line.get(0));
95 | } finally {
96 | contextSrc.stop();
97 | }
98 |
99 | final Timer.Context contextDest = getOrCreateTimes.time();
100 | try {
101 | dstNode = getOrCreate(line.get(1));
102 | } finally {
103 | contextDest.stop();
104 | }
105 |
106 | final Timer.Context contextRelate = relateNodesTimes.time();
107 | try {
108 | relateNodes(srcNode, dstNode);
109 | } finally {
110 | contextRelate.stop();
111 | }
112 |
113 | if (i % 1000 == 0)
114 | {
115 | insertionTimes.add((double) thousandWatch.elapsed(TimeUnit.MILLISECONDS));
116 | thousandWatch.stop();
117 | thousandWatch = new Stopwatch();
118 | thousandWatch.start();
119 | }
120 | i++;
121 | }
122 | post();
123 | insertionTimes.add((double) watch.elapsed(TimeUnit.MILLISECONDS));
124 |
125 | if (single)
126 | {
127 | Utils.writeTimes(insertionTimes, new File(resultsPath,
128 | SingleInsertionBenchmark.INSERTION_TIMES_OUTPUT_FILE_NAME_BASE + "." + type.getShortname() + "."
129 | + Integer.toString(scenarioNumber)));
130 | }
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/src/test/resources/META-INF/input.properties:
--------------------------------------------------------------------------------
1 | # Choose which data sets you want to include in the benchmark by removing the contents.
2 | #eu.socialsensor.dataset=data/Email-Enron.txt
3 | #eu.socialsensor.dataset=data/com-youtube.ungraph.txt
4 | #eu.socialsensor.dataset=data/Amazon0601.txt
5 | #eu.socialsensor.dataset=data/com-lj.ungraph.txt
6 | #can change the number in the filename of the synthetic datasets to 1000, 5000, 10000, 20000, 30000, 40000, 50000
7 | eu.socialsensor.dataset=data/network1000.dat
8 | eu.socialsensor.actual-communities=data/community1000.dat
9 |
10 | eu.socialsensor.database-storage-directory=storage
11 | # Sample meters this frequently (milliseconds)
12 | eu.socialsensor.metrics.csv.interval=1000
13 | # for the csv reporter
14 | eu.socialsensor.metrics.csv.directory=metrics
15 | # for the graphite reporter
16 | #eu.socialsensor.metrics.graphite.hostname=192.168.59.103
17 |
18 | # Choose which databases you want to in the benchmark by removing the comments.
19 | # Available dbs are:
20 | eu.socialsensor.databases=tbdb
21 | eu.socialsensor.databases=tddb
22 | #eu.socialsensor.databases=tc
23 | #eu.socialsensor.databases=thb
24 | #eu.socialsensor.databases=tce
25 | #eu.socialsensor.databases=tp
26 | #eu.socialsensor.databases=orient
27 | #eu.socialsensor.databases=neo4j
28 | #eu.socialsensor.databases=sparksee
29 |
30 | # Database specific options
31 | # Titan options
32 | # page-size - Number of results to pull when iterating over a storage backend (default 100)
33 | eu.socialsensor.titan.page-size=100
34 | # to disable buffering on mutations, set to zero. Default 1024. This will set the queue size as well
35 | eu.socialsensor.titan.buffer-size=10000
36 | # id block size default 10000
37 | eu.socialsensor.titan.ids.block-size=10000
38 | # Titan DynamoDB options
39 | # when warm-tables is set to true, the benchmark will create tables of the specified data model in parallel
40 | eu.socialsensor.dynamodb.precreate-tables=true
41 | # DynamoDBDelegate worker thread pool size. should not be larger than the number of HTTP connections
42 | # assuming a round trip time of 10ms for writes, one thread can do 50tps. When using the MULTI data
43 | # model, items are usually small, so the round trip time is close to that.
44 | eu.socialsensor.dynamodb.workers=15
45 | # TPS (both read and write to set per table). 750 R and W TPS = 1 DynamoDB table partition
46 | # http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GuidelinesForTables.html#GuidelinesForTables.Partitions
47 | eu.socialsensor.dynamodb.tps=750
48 | # Data model for the Titan DynamoDB plugin. Can be SINGLE or MULTI
49 | eu.socialsensor.dynamodb.data-model=MULTI
50 | # Whether to allow eventually consistent reads or not
51 | # (allowing eventually consistent reads allows queries to happen faster)
52 | eu.socialsensor.dynamodb.force-consistent-read=true
53 | # Credentials. You can set credentials to any value when running against DynamoDBLocal
54 | # Needs to be the fully qualified class name of a class implementing
55 | # com.amazonaws.auth.AWSCredentials or com.amazonaws.auth.AWSCredentialsProvider.
56 | eu.socialsensor.dynamodb.credentials.class-name=com.amazonaws.auth.DefaultAWSCredentialsProviderChain
57 | # Comma separated list of strings to pass to the constructor of the class specified above.
58 | eu.socialsensor.dynamodb.credentials.constructor-args=
59 | # Endpoint. the titan-dynamodb database option above requires you to specify an endpoint.
60 | # This endpoint could be DynamoDBLocal running in a separate process (eg, http://127.0.0.1:4567),
61 | # or the https endpoint of a production region of the DynamoDB service.
62 | eu.socialsensor.dynamodb.endpoint=http://127.0.0.1:4567
63 | #eu.socialsensor.dynamodb.endpoint=https://dynamodb.us-east-1.amazonaws.com
64 |
65 | # OrientDB options
66 | eu.socialsensor.orient.lightweight-edges=true
67 |
68 | # Sparksee options
69 | eu.socialsensor.sparksee.license-key=DEADBEEF
70 |
71 | # The following five benchmarks are permutable (that is, the suite can run them
72 | # many times in different database order). To turn on permutations, set
73 | # eu.socialsensor.permute-benchmarks=true
74 | eu.socialsensor.permute-benchmarks=false
75 |
76 | # Choose which benchmark you want to run by removing the comments. Choose one Insertion
77 | # workload and then query/clustering workloads afterward.
78 | eu.socialsensor.benchmarks=MASSIVE_INSERTION
79 | #eu.socialsensor.benchmarks=SINGLE_INSERTION
80 | eu.socialsensor.benchmarks=FIND_NEIGHBOURS
81 | eu.socialsensor.benchmarks=FIND_ADJACENT_NODES
82 | eu.socialsensor.benchmarks=FIND_SHORTEST_PATH
83 | eu.socialsensor.shortest-path-random-nodes=100
84 |
85 | # The clustering benchmark is not permutable even if eu.socialsensor.permute-benchmarks=true
86 | #eu.socialsensor.benchmarks=CLUSTERING
87 | eu.socialsensor.randomize-clustering=false
88 | eu.socialsensor.nodes-count=1000
89 |
90 | # Choose the cache values you want run the CW benchmark, or have them generated. To choose:
91 | eu.socialsensor.cache-values=25
92 | eu.socialsensor.cache-values=50
93 | eu.socialsensor.cache-values=75
94 | eu.socialsensor.cache-values=100
95 | eu.socialsensor.cache-values=125
96 | eu.socialsensor.cache-values=150
97 |
98 | # To have the cache values generated for the CW benchmark.
99 | #eu.socialsensor.cache-increment-factor=1
100 | #eu.socialsensor.cache-values-count=6
101 |
102 | # This benchmark measures the time it takes to delete the database
103 | #eu.socialsensor.benchmarks=DELETION
104 |
105 | # Results folder path
106 | eu.socialsensor.results-path=results
107 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/clustering/LouvainMethod.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.clustering;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Random;
6 | import java.util.Set;
7 | import java.util.concurrent.ExecutionException;
8 |
9 | import eu.socialsensor.graphdatabases.GraphDatabase;
10 |
11 | /**
12 | * Implementation of Louvain Method on top of graph databases. Gephi Toolkit
13 | * (https://gephi.org/toolkit/) java implementation was used as guide.
14 | *
15 | * @author sotbeis
16 | * @email sotbeis@iti.gr
17 | */
18 | public class LouvainMethod
19 | {
20 | boolean isRandomized;
21 | private double resolution = 1.0;
22 | private double graphWeightSum;
23 | private int N;
24 | private List communityWeights;
25 | private boolean communityUpdate = false;
26 |
27 | GraphDatabase,?,?,?> graphDatabase;
28 | Cache cache;
29 |
30 | public LouvainMethod(GraphDatabase,?,?,?> graphDatabase, int cacheSize, boolean isRandomized) throws ExecutionException
31 | {
32 | this.graphDatabase = graphDatabase;
33 | this.isRandomized = isRandomized;
34 | initialize();
35 | cache = new Cache(graphDatabase, cacheSize);
36 | }
37 |
38 | private void initialize()
39 | {
40 | this.N = this.graphDatabase.getNodeCount();// this step takes a long
41 | // time on dynamodb.
42 | this.graphWeightSum = this.graphDatabase.getGraphWeightSum() / 2;
43 |
44 | this.communityWeights = new ArrayList(this.N);
45 | for (int i = 0; i < this.N; i++)
46 | {
47 | this.communityWeights.add(0.0);
48 | }
49 |
50 | this.graphDatabase.initCommunityProperty();
51 | }
52 |
53 | public void computeModularity() throws ExecutionException
54 | {
55 | Random rand = new Random();
56 | boolean someChange = true;
57 | while (someChange)
58 | {
59 | someChange = false;
60 | boolean localChange = true;
61 | while (localChange)
62 | {
63 | localChange = false;
64 | int start = 0;
65 | if (this.isRandomized)
66 | {
67 | start = Math.abs(rand.nextInt()) % this.N;
68 | }
69 | int step = 0;
70 | for (int i = start; step < this.N; i = (i + 1) % this.N)
71 | {
72 | step++;
73 | int bestCommunity = updateBestCommunity(i);
74 | if ((this.cache.getCommunity(i) != bestCommunity) && (this.communityUpdate))
75 | {
76 |
77 | this.cache.moveNodeCommunity(i, bestCommunity);
78 | this.graphDatabase.moveNode(i, bestCommunity);
79 |
80 | double bestCommunityWeight = this.communityWeights.get(bestCommunity);
81 |
82 | bestCommunityWeight += cache.getNodeCommunityWeight(i);
83 | this.communityWeights.set(bestCommunity, bestCommunityWeight);
84 | localChange = true;
85 | }
86 |
87 | this.communityUpdate = false;
88 | }
89 | someChange = localChange || someChange;
90 | }
91 | if (someChange)
92 | {
93 | zoomOut();
94 | }
95 | }
96 | }
97 |
98 | private int updateBestCommunity(int node) throws ExecutionException
99 | {
100 | int bestCommunity = 0;
101 | double best = 0;
102 | Set communities = this.cache.getCommunitiesConnectedToNodeCommunities(node);
103 | for (int community : communities)
104 | {
105 | double qValue = q(node, community);
106 | if (qValue > best)
107 | {
108 | best = qValue;
109 | bestCommunity = community;
110 | this.communityUpdate = true;
111 | }
112 | }
113 | return bestCommunity;
114 | }
115 |
116 | private double q(int nodeCommunity, int community) throws ExecutionException
117 | {
118 | double edgesInCommunity = this.cache.getEdgesInsideCommunity(nodeCommunity, community);
119 | double communityWeight = this.communityWeights.get(community);
120 | double nodeWeight = this.cache.getNodeCommunityWeight(nodeCommunity);
121 | double qValue = this.resolution * edgesInCommunity - (nodeWeight * communityWeight)
122 | / (2.0 * this.graphWeightSum);
123 | int actualNodeCom = this.cache.getCommunity(nodeCommunity);
124 | int communitySize = this.cache.getCommunitySize(community);
125 |
126 | if ((actualNodeCom == community) && (communitySize > 1))
127 | {
128 | qValue = this.resolution * edgesInCommunity - (nodeWeight * (communityWeight - nodeWeight))
129 | / (2.0 * this.graphWeightSum);
130 | }
131 | if ((actualNodeCom == community) && (communitySize == 1))
132 | {
133 | qValue = 0.;
134 | }
135 | return qValue;
136 | }
137 |
138 | public void zoomOut()
139 | {
140 | this.N = this.graphDatabase.reInitializeCommunities();
141 | this.cache.reInitializeCommunities();
142 | this.communityWeights = new ArrayList(this.N);
143 | for (int i = 0; i < this.N; i++)
144 | {
145 | this.communityWeights.add(graphDatabase.getCommunityWeight(i));
146 | }
147 | }
148 |
149 | public int getN()
150 | {
151 | return this.N;
152 | }
153 |
154 | }
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/benchmarks/ClusteringBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.benchmarks;
2 |
3 | import java.io.BufferedWriter;
4 | import java.io.FileWriter;
5 | import java.io.IOException;
6 | import java.util.ArrayList;
7 | import java.util.HashMap;
8 | import java.util.List;
9 | import java.util.Map;
10 | import java.util.SortedMap;
11 | import java.util.TreeMap;
12 | import java.util.concurrent.ExecutionException;
13 | import java.util.concurrent.TimeUnit;
14 |
15 | import org.apache.logging.log4j.Logger;
16 | import org.apache.logging.log4j.LogManager;
17 |
18 | import com.google.common.base.Stopwatch;
19 |
20 | import eu.socialsensor.clustering.LouvainMethod;
21 | import eu.socialsensor.graphdatabases.GraphDatabase;
22 | import eu.socialsensor.main.BenchmarkConfiguration;
23 | import eu.socialsensor.main.BenchmarkType;
24 | import eu.socialsensor.main.BenchmarkingException;
25 | import eu.socialsensor.main.GraphDatabaseType;
26 | import eu.socialsensor.utils.Metrics;
27 | import eu.socialsensor.utils.Utils;
28 |
29 | /**
30 | * ClusteringBenchmark implementation
31 | *
32 | * @author sotbeis, sotbeis@iti.gr
33 | * @author Alexander Patrikalakis
34 | */
35 | public class ClusteringBenchmark extends BenchmarkBase implements RequiresGraphData
36 | {
37 | private static final Logger LOG = LogManager.getLogger();
38 | private final List cacheValues;
39 |
40 | public ClusteringBenchmark(BenchmarkConfiguration config)
41 | {
42 | super(config, BenchmarkType.CLUSTERING);
43 | this.cacheValues = new ArrayList();
44 | if (config.getCacheValues() == null)
45 | {
46 | int cacheValueMultiplier = config.getCacheIncrementFactor().intValue() * config.getNodesCount();
47 | for (int i = 1; i <= config.getCacheValuesCount(); i++)
48 | {
49 | cacheValues.add(i * cacheValueMultiplier);
50 | }
51 | }
52 | else
53 | {
54 | cacheValues.addAll(config.getCacheValues());
55 | }
56 | }
57 |
58 | @Override
59 | public void startBenchmarkInternal()
60 | {
61 | LOG.info("Executing Clustering Benchmark . . . .");
62 | SortedMap> typeTimesMap = new TreeMap>();
63 | try
64 | {
65 | for (GraphDatabaseType type : bench.getSelectedDatabases())
66 | {
67 | typeTimesMap.put(type, clusteringBenchmark(type));
68 | }
69 | }
70 | catch (ExecutionException e)
71 | {
72 | throw new BenchmarkingException("Unable to run clustering benchmark: " + e.getMessage(), e);
73 | }
74 |
75 | try (BufferedWriter out = new BufferedWriter(new FileWriter(outputFile)))
76 | {
77 | out.write("DB,Cache Size (measured in nodes),Clustering Benchmark Time (s)\n");
78 | for (GraphDatabaseType type : bench.getSelectedDatabases())
79 | {
80 | for (Integer cacheSize : typeTimesMap.get(type).keySet())
81 | {
82 | out.write(String.format("%s,%d,%f\n", type.getShortname(), cacheSize,
83 | typeTimesMap.get(type).get(cacheSize)));
84 | }
85 | }
86 | }
87 | catch (IOException e)
88 | {
89 | throw new BenchmarkingException("Unable to write clustering results to file");
90 | }
91 | LOG.info("Clustering Benchmark finished");
92 | }
93 |
94 | private SortedMap clusteringBenchmark(GraphDatabaseType type) throws ExecutionException
95 | {
96 | GraphDatabase,?,?,?> graphDatabase = Utils.createDatabaseInstance(bench, type);
97 | graphDatabase.open();
98 |
99 | SortedMap timeMap = new TreeMap();
100 | for (int cacheSize : cacheValues)
101 | {
102 | LOG.info("Graph Database: " + type.getShortname() + ", Dataset: " + bench.getDataset().getName()
103 | + ", Cache Size: " + cacheSize);
104 |
105 | Stopwatch watch = new Stopwatch();
106 | watch.start();
107 | LouvainMethod louvainMethodCache = new LouvainMethod(graphDatabase, cacheSize, bench.randomizedClustering());
108 | louvainMethodCache.computeModularity();
109 | timeMap.put(cacheSize, watch.elapsed(TimeUnit.MILLISECONDS) / 1000.0);
110 |
111 | // evaluation with NMI
112 | Map> predictedCommunities = graphDatabase.mapCommunities(louvainMethodCache.getN());
113 | Map> actualCommunities = mapNodesToCommunities(Utils.readTabulatedLines(
114 | bench.getActualCommunitiesFile(), 4 /* numberOfLinesToSkip */));
115 | Metrics metrics = new Metrics();
116 | double NMI = metrics.normalizedMutualInformation(bench.getNodesCount(), actualCommunities,
117 | predictedCommunities);
118 | LOG.info("NMI value: " + NMI);
119 | }
120 | graphDatabase.shutdown();
121 | return timeMap;
122 | }
123 |
124 | private static Map> mapNodesToCommunities(List> tabulatedLines)
125 | {
126 | Map> communities = new HashMap>();
127 | // http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760
128 | // the format of the communityNNNN.dat files have node and community
129 | // number separated by a tab.
130 | // community number starts at 1 and not zero.
131 | for (List line : tabulatedLines)
132 | {
133 | int node = Integer.valueOf(line.get(0));
134 | int community = Integer.valueOf(line.get(1).trim()) - 1;
135 | if (!communities.containsKey(community))
136 | {
137 | communities.put(community, new ArrayList());
138 | }
139 | communities.get(community).add(node);
140 | }
141 | return communities;
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/clustering/Cache.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.clustering;
2 |
3 | import java.util.HashSet;
4 | import java.util.Set;
5 | import java.util.concurrent.ExecutionException;
6 |
7 | import com.google.common.cache.CacheBuilder;
8 | import com.google.common.cache.CacheLoader;
9 | import com.google.common.cache.LoadingCache;
10 |
11 | import eu.socialsensor.graphdatabases.GraphDatabase;
12 |
13 | /**
14 | * Cache implementation for the temporary storage of required information of the
15 | * graph databases in order to execute the Louvain Method
16 | *
17 | * @author sotbeis
18 | * @email sotbeis@iti.gr
19 | */
20 | public class Cache
21 | {
22 |
23 | LoadingCache> nodeCommunitiesMap; // key=nodeCommunity
24 | // value=nodeIds
25 | // contained in
26 | // nodeCommunityC
27 | LoadingCache> communitiesMap; // key=community
28 | // value=nodeIds
29 | // contained in
30 | // community
31 | LoadingCache nodeCommunitiesToCommunities; // key=nodeCommunity
32 | // value=community
33 | LoadingCache> nodeNeighbours; // key=nodeId
34 | // value=nodeId
35 | // neighbors
36 | LoadingCache nodeToCommunityMap; // key=nodeId
37 | // value=communityId
38 |
39 | public Cache(final GraphDatabase,?,?,?> graphDatabase, int cacheSize) throws ExecutionException
40 | {
41 | nodeNeighbours = CacheBuilder.newBuilder().maximumSize(cacheSize)
42 | .build(new CacheLoader>() {
43 | public Set load(Integer nodeId)
44 | {
45 | return graphDatabase.getNeighborsIds(nodeId);
46 | }
47 | });
48 |
49 | nodeCommunitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
50 | .build(new CacheLoader>() {
51 | public Set load(Integer nodeCommunityId)
52 | {
53 | return graphDatabase.getNodesFromNodeCommunity(nodeCommunityId);
54 | }
55 | });
56 |
57 | communitiesMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
58 | .build(new CacheLoader>() {
59 | public Set load(Integer communityId)
60 | {
61 | return graphDatabase.getNodesFromCommunity(communityId);
62 | }
63 | });
64 |
65 | nodeToCommunityMap = CacheBuilder.newBuilder().maximumSize(cacheSize)
66 | .build(new CacheLoader() {
67 | public Integer load(Integer nodeId)
68 | {
69 | return graphDatabase.getCommunityFromNode(nodeId);
70 | }
71 | });
72 |
73 | nodeCommunitiesToCommunities = CacheBuilder.newBuilder().maximumSize(cacheSize)
74 | .build(new CacheLoader() {
75 | public Integer load(Integer nodeCommunity)
76 | {
77 | return graphDatabase.getCommunity(nodeCommunity);
78 | }
79 | });
80 | }
81 |
82 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunity) throws ExecutionException
83 | {
84 | Set nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity);
85 | Set communities = new HashSet();
86 | for (int nodeFromNodeCommunity : nodesFromNodeCommunity)
87 | {
88 | Set neighbors = nodeNeighbours.get(nodeFromNodeCommunity);
89 | for (int neighbor : neighbors)
90 | {
91 | communities.add(nodeToCommunityMap.get(neighbor));
92 | }
93 | }
94 | return communities;
95 | }
96 |
97 | public void moveNodeCommunity(int nodeCommunity, int toCommunity) throws ExecutionException
98 | {
99 | int fromCommunity = nodeCommunitiesToCommunities.get(nodeCommunity);
100 | nodeCommunitiesToCommunities.put(nodeCommunity, toCommunity);
101 | Set nodesFromCommunity = communitiesMap.get(fromCommunity);
102 | communitiesMap.invalidate(fromCommunity);
103 | communitiesMap.get(toCommunity).addAll(nodesFromCommunity);
104 | Set nodesFromNodeCommunity = nodeCommunitiesMap.get(nodeCommunity);
105 | for (int nodeFromCommunity : nodesFromNodeCommunity)
106 | {
107 | nodeToCommunityMap.put(nodeFromCommunity, toCommunity);
108 | }
109 | }
110 |
111 | public double getNodeCommunityWeight(int nodeCommunity) throws ExecutionException
112 | {
113 | Set nodes = nodeCommunitiesMap.get(nodeCommunity);
114 | double weight = 0;
115 | for (int node : nodes)
116 | {
117 | weight += nodeNeighbours.get(node).size();
118 | }
119 | return weight;
120 | }
121 |
122 | public int getCommunity(int community) throws ExecutionException
123 | {
124 | return nodeCommunitiesToCommunities.get(community);
125 | }
126 |
127 | public int getCommunitySize(int community) throws ExecutionException
128 | {
129 | return communitiesMap.get(community).size();
130 | }
131 |
132 | public double getEdgesInsideCommunity(int nodeCommunity, int community) throws ExecutionException
133 | {
134 | Set nodeCommunityNodes = nodeCommunitiesMap.get(nodeCommunity);
135 | Set communityNodes = communitiesMap.get(community);
136 | double edges = 0;
137 | for (int nodeCommunityNode : nodeCommunityNodes)
138 | {
139 | for (int communityNode : communityNodes)
140 | {
141 | if (nodeNeighbours.get(nodeCommunityNode).contains(communityNode))
142 | {
143 | edges++;
144 | }
145 | }
146 | }
147 | return edges;
148 | }
149 |
150 | public void reInitializeCommunities()
151 | {
152 | nodeCommunitiesMap.invalidateAll();
153 | communitiesMap.invalidateAll();
154 | nodeToCommunityMap.invalidateAll();
155 | nodeCommunitiesToCommunities.invalidateAll();
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/GraphDatabase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.graphdatabases;
2 |
3 | import java.io.File;
4 | import java.util.List;
5 | import java.util.Map;
6 | import java.util.Set;
7 |
8 | /**
9 | * Represents a graph database
10 | *
11 | * @author sotbeis, sotbeis@iti.gr
12 | * @author Alexander Patrikalakis
13 | */
14 | public interface GraphDatabase
15 | {
16 | //edge and vertex operations
17 | public VertexType getOtherVertexFromEdge(EdgeType r, VertexType oneVertex);
18 | public VertexType getSrcVertexFromEdge(EdgeType edge);
19 | public VertexType getDestVertexFromEdge(EdgeType edge);
20 | public VertexType getVertex(Integer i);
21 |
22 | //edge iterators
23 | public EdgeIteratorType getAllEdges();
24 | public EdgeIteratorType getNeighborsOfVertex(VertexType v);
25 | public boolean edgeIteratorHasNext(EdgeIteratorType it);
26 | public EdgeType nextEdge(EdgeIteratorType it);
27 | public void cleanupEdgeIterator(EdgeIteratorType it);
28 |
29 | //vertex iterators
30 | public VertexIteratorType getVertexIterator();
31 | public boolean vertexIteratorHasNext(VertexIteratorType it);
32 | public VertexType nextVertex(VertexIteratorType it);
33 | public void cleanupVertexIterator(VertexIteratorType it);
34 |
35 | //benchmarks
36 | public void findAllNodeNeighbours();
37 | public void findNodesOfAllEdges();
38 |
39 | /**
40 | * Opens the graph database
41 | *
42 | * @param dbPath
43 | * - database path
44 | */
45 | public void open();
46 |
47 | /**
48 | * Creates a graph database and configures for single data insertion
49 | *
50 | * @param dbPath
51 | * - database path
52 | */
53 | public void createGraphForSingleLoad();
54 |
55 | /**
56 | * Inserts data in massive mode
57 | *
58 | * @param dataPath
59 | * - dataset path
60 | */
61 | public void massiveModeLoading(File dataPath);
62 |
63 | /**
64 | * Inserts data in single mode
65 | *
66 | * @param dataPath
67 | * - dataset path
68 | * @param resultsPath
69 | * @param scenarioNumber
70 | */
71 | public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber);
72 |
73 | /**
74 | * Creates a graph database and configures for bulk data insertion
75 | *
76 | * @param dataPath
77 | * - dataset path
78 | */
79 | public void createGraphForMassiveLoad();
80 |
81 | /**
82 | * Shut down the graph database
83 | */
84 | public void shutdown();
85 |
86 | /**
87 | * Delete the graph database
88 | */
89 | public void delete();
90 |
91 | /**
92 | * Shutdown the graph database, which configuration is for massive insertion
93 | * of data
94 | */
95 | public void shutdownMassiveGraph();
96 |
97 | /**
98 | * Find the shortest path between vertex 1 and each of the vertexes in the list
99 | *
100 | * @param nodes
101 | * any number of random nodes
102 | */
103 | public void shortestPaths(Set nodes);
104 |
105 | /**
106 | * Execute findShortestPaths query from the Query interface
107 | *
108 | * @param nodes
109 | * any number of random nodes
110 | */
111 | public void shortestPath(final VertexType fromNode, Integer node);
112 |
113 | /**
114 | * @return the number of nodes
115 | */
116 | public int getNodeCount();
117 |
118 | /**
119 | * @param nodeId
120 | * @return the neighbours of a particular node
121 | */
122 | public Set getNeighborsIds(int nodeId);
123 |
124 | /**
125 | * @param nodeId
126 | * @return the node degree
127 | */
128 | public double getNodeWeight(int nodeId);
129 |
130 | /**
131 | * Initializes the community and nodeCommunity property in each database
132 | */
133 | public void initCommunityProperty();
134 |
135 | /**
136 | * @param nodeCommunities
137 | * @return the communities (communityId) that are connected with a
138 | * particular nodeCommunity
139 | */
140 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities);
141 |
142 | /**
143 | * @param community
144 | * @return the nodes a particular community contains
145 | */
146 | public Set getNodesFromCommunity(int community);
147 |
148 | /**
149 | * @param nodeCommunity
150 | * @return the nodes a particular nodeCommunity contains
151 | */
152 | public Set getNodesFromNodeCommunity(int nodeCommunity);
153 |
154 | /**
155 | * @param nodeCommunity
156 | * @param communityNodes
157 | * @return the number of edges between a community and a nodeCommunity
158 | */
159 | public double getEdgesInsideCommunity(int nodeCommunity, int communityNodes);
160 |
161 | /**
162 | * @param community
163 | * @return the sum of node degrees
164 | */
165 | public double getCommunityWeight(int community);
166 |
167 | /**
168 | * @param nodeCommunity
169 | * @return the sum of node degrees
170 | */
171 | public double getNodeCommunityWeight(int nodeCommunity);
172 |
173 | /**
174 | * Moves a node from a community to another
175 | *
176 | * @param from
177 | * @param to
178 | */
179 | public void moveNode(int from, int to);
180 |
181 | /**
182 | * @return the number of edges of the graph database
183 | */
184 | public double getGraphWeightSum();
185 |
186 | /**
187 | * Reinitializes the community and nodeCommunity property
188 | *
189 | * @return the number of communities
190 | */
191 | public int reInitializeCommunities();
192 |
193 | /**
194 | * @param nodeId
195 | * @return in which community a particular node belongs
196 | */
197 | public int getCommunityFromNode(int nodeId);
198 |
199 | /**
200 | * @param nodeCommunity
201 | * @return in which community a particular nodeCommunity belongs
202 | */
203 | public int getCommunity(int nodeCommunity);
204 |
205 | /**
206 | * @param community
207 | * @return the number of nodeCommunities a particular community contains
208 | */
209 | public int getCommunitySize(int community);
210 |
211 | /**
212 | * @param numberOfCommunities
213 | * @return a map where the key is the community id and the value is the
214 | * nodes each community has.
215 | */
216 | public Map> mapCommunities(int numberOfCommunities);
217 |
218 | /**
219 | *
220 | * @param nodeId
221 | * @return return true if node exist, false if not
222 | */
223 | public boolean nodeExists(int nodeId);
224 | }
225 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/main/GraphDatabaseBenchmark.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.main;
2 |
3 | import eu.socialsensor.benchmarks.Benchmark;
4 | import eu.socialsensor.benchmarks.ClusteringBenchmark;
5 | import eu.socialsensor.benchmarks.DeleteGraphBenchmark;
6 | import eu.socialsensor.benchmarks.FindNeighboursOfAllNodesBenchmark;
7 | import eu.socialsensor.benchmarks.FindNodesOfAllEdgesBenchmark;
8 | import eu.socialsensor.benchmarks.FindShortestPathBenchmark;
9 | import eu.socialsensor.benchmarks.MassiveInsertionBenchmark;
10 | import eu.socialsensor.benchmarks.SingleInsertionBenchmark;
11 |
12 | import org.apache.commons.configuration.Configuration;
13 | import org.apache.commons.configuration.ConfigurationException;
14 | import org.apache.commons.configuration.PropertiesConfiguration;
15 | import org.apache.commons.io.FileDeleteStrategy;
16 | import org.apache.logging.log4j.Logger;
17 | import org.apache.logging.log4j.LogManager;
18 |
19 | import com.codahale.metrics.CsvReporter;
20 | import com.codahale.metrics.MetricFilter;
21 | import com.codahale.metrics.MetricRegistry;
22 | import com.codahale.metrics.graphite.Graphite;
23 | import com.codahale.metrics.graphite.GraphiteReporter;
24 |
25 | import java.io.File;
26 | import java.io.IOException;
27 | import java.net.InetSocketAddress;
28 | import java.net.URL;
29 | import java.util.Locale;
30 | import java.util.concurrent.ExecutionException;
31 | import java.util.concurrent.TimeUnit;
32 |
33 | /**
34 | * Main class for the execution of GraphDatabaseBenchmark.
35 | *
36 | * @author sotbeis, sotbeis@iti.gr
37 | * @author Alexander Patrikalakis
38 | */
39 | public class GraphDatabaseBenchmark
40 | {
41 | public static final Logger logger = LogManager.getLogger();
42 | public static final MetricRegistry metrics = new MetricRegistry();
43 | public static final String DEFAULT_INPUT_PROPERTIES = "META-INF/input.properties";
44 | private final BenchmarkConfiguration config;
45 |
46 | public static final Configuration getAppconfigFromClasspath()
47 | {
48 | Configuration appconfig;
49 | try
50 | {
51 | ClassLoader classLoader = GraphDatabaseBenchmark.class.getClassLoader();
52 | URL resource = classLoader.getResource(DEFAULT_INPUT_PROPERTIES);
53 | appconfig = new PropertiesConfiguration(resource);
54 | }
55 | catch (ConfigurationException e)
56 | {
57 | throw new IllegalArgumentException(String.format(
58 | "Unable to load properties file from classpath because %s", e.getMessage()));
59 | }
60 | return appconfig;
61 | }
62 |
63 | public GraphDatabaseBenchmark(String inputPath) throws IllegalArgumentException
64 | {
65 | final Configuration appconfig;
66 | try
67 | {
68 | appconfig = inputPath == null ? getAppconfigFromClasspath() : new PropertiesConfiguration(new File(
69 | inputPath));
70 | }
71 | catch (ConfigurationException e)
72 | {
73 | throw new IllegalArgumentException(String.format("Unable to load properties file %s because %s", inputPath,
74 | e.getMessage()));
75 | }
76 | config = new BenchmarkConfiguration(appconfig);
77 | if(config.publishCsvMetrics()) {
78 | final CsvReporter reporter = CsvReporter.forRegistry(metrics)
79 | .formatFor(Locale.US)
80 | .convertRatesTo(TimeUnit.SECONDS)
81 | .convertDurationsTo(TimeUnit.MILLISECONDS)
82 | .build(config.getCsvDir());
83 | reporter.start(config.getCsvReportingInterval(), TimeUnit.MILLISECONDS);
84 | }
85 | if(config.publishGraphiteMetrics()) {
86 | final Graphite graphite = new Graphite(new InetSocketAddress(config.getGraphiteHostname(), 80 /*port*/));
87 | final GraphiteReporter reporter = GraphiteReporter.forRegistry(metrics)
88 | .convertRatesTo(TimeUnit.SECONDS)
89 | .convertDurationsTo(TimeUnit.MILLISECONDS)
90 | .filter(MetricFilter.ALL)
91 | .build(graphite);
92 | reporter.start(config.getGraphiteReportingInterval(), TimeUnit.MILLISECONDS);
93 | }
94 | }
95 |
96 | public void run()
97 | {
98 | //MetricRegistry registry = MetricRegistry.name(klass, names)
99 | for (BenchmarkType type : config.getBenchmarkTypes())
100 | {
101 | runBenchmark(type);
102 | }
103 | }
104 |
105 | private final void runBenchmark(BenchmarkType type)
106 | {
107 | final Benchmark benchmark;
108 | logger.info(type.longname() + " Benchmark Selected");
109 | switch (type)
110 | {
111 | case MASSIVE_INSERTION:
112 | benchmark = new MassiveInsertionBenchmark(config);
113 | break;
114 | case SINGLE_INSERTION:
115 | benchmark = new SingleInsertionBenchmark(config);
116 | break;
117 | case FIND_ADJACENT_NODES:
118 | benchmark = new FindNodesOfAllEdgesBenchmark(config);
119 | break;
120 | case CLUSTERING:
121 | benchmark = new ClusteringBenchmark(config);
122 | break;
123 | case FIND_NEIGHBOURS:
124 | benchmark = new FindNeighboursOfAllNodesBenchmark(config);
125 | break;
126 | case FIND_SHORTEST_PATH:
127 | benchmark = new FindShortestPathBenchmark(config);
128 | break;
129 | case DELETION:
130 | benchmark = new DeleteGraphBenchmark(config);
131 | break;
132 | default:
133 | throw new UnsupportedOperationException("unsupported benchmark " + type == null ? "null"
134 | : type.toString());
135 | }
136 | benchmark.startBenchmark();
137 | }
138 |
139 | /**
140 | * This is the main function. Set the proper property file and run
141 | *
142 | * @throws ExecutionException
143 | */
144 | public static void main(String[] args) throws ExecutionException
145 | {
146 | final String inputPath = args.length != 1 ? null : args[0];
147 | GraphDatabaseBenchmark benchmarks = new GraphDatabaseBenchmark(inputPath);
148 | try
149 | {
150 | benchmarks.run();
151 | }
152 | catch (Throwable t)
153 | {
154 | logger.fatal(t.getMessage());
155 | System.exit(1);
156 | }
157 | System.exit(0);
158 | }
159 |
160 | public void cleanup()
161 | {
162 | try
163 | {
164 | FileDeleteStrategy.FORCE.delete(config.getDbStorageDirectory());
165 | }
166 | catch (IOException e)
167 | {
168 | logger.fatal("Unable to clean up db storage directory: " + e.getMessage());
169 | System.exit(1);
170 | }
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/GraphDatabaseBase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.graphdatabases;
2 |
3 | import java.io.File;
4 | import java.util.Set;
5 |
6 | import org.neo4j.graphdb.Transaction;
7 | import org.neo4j.kernel.GraphDatabaseAPI;
8 |
9 | import com.codahale.metrics.MetricRegistry;
10 | import com.codahale.metrics.Timer;
11 |
12 | import eu.socialsensor.main.GraphDatabaseBenchmark;
13 | import eu.socialsensor.main.GraphDatabaseType;
14 |
15 | @SuppressWarnings("deprecation")
16 | public abstract class GraphDatabaseBase implements GraphDatabase
17 | {
18 | public static final String SIMILAR = "similar";
19 | public static final String QUERY_CONTEXT = ".eu.socialsensor.query.";
20 | public static final String NODE_ID = "nodeId";
21 | public static final String NODE_COMMUNITY = "nodeCommunity";
22 | public static final String COMMUNITY = "community";
23 | protected final File dbStorageDirectory;
24 | protected final MetricRegistry metrics = new MetricRegistry();
25 | protected final GraphDatabaseType type;
26 | private final Timer nextVertexTimes;
27 | private final Timer getNeighborsOfVertexTimes;
28 | private final Timer nextEdgeTimes;
29 | private final Timer getOtherVertexFromEdgeTimes;
30 | private final Timer getAllEdgesTimes;
31 | private final Timer shortestPathTimes;
32 |
33 | protected GraphDatabaseBase(GraphDatabaseType type, File dbStorageDirectory)
34 | {
35 | this.type = type;
36 | final String queryTypeContext = type.getShortname() + QUERY_CONTEXT;
37 | this.nextVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextVertex");
38 | this.getNeighborsOfVertexTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getNeighborsOfVertex");
39 | this.nextEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "nextEdge");
40 | this.getOtherVertexFromEdgeTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getOtherVertexFromEdge");
41 | this.getAllEdgesTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "getAllEdges");
42 | this.shortestPathTimes = GraphDatabaseBenchmark.metrics.timer(queryTypeContext + "shortestPath");
43 |
44 | this.dbStorageDirectory = dbStorageDirectory;
45 | if (!this.dbStorageDirectory.exists())
46 | {
47 | this.dbStorageDirectory.mkdirs();
48 | }
49 | }
50 |
51 | @Override
52 | public void findAllNodeNeighbours() {
53 | //get the iterator
54 | Object tx = null;
55 | if(GraphDatabaseType.NEO4J == type) { //TODO fix this
56 | tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
57 | }
58 | try {
59 | VertexIteratorType vertexIterator = this.getVertexIterator();
60 | while(vertexIteratorHasNext(vertexIterator)) {
61 | VertexType vertex;
62 | Timer.Context ctxt = nextVertexTimes.time();
63 | try {
64 | vertex = nextVertex(vertexIterator);
65 | } finally {
66 | ctxt.stop();
67 | }
68 |
69 | final EdgeIteratorType edgeNeighborIterator;
70 | ctxt = getNeighborsOfVertexTimes.time();
71 | try {
72 | edgeNeighborIterator = this.getNeighborsOfVertex(vertex);
73 | } finally {
74 | ctxt.stop();
75 | }
76 | while(edgeIteratorHasNext(edgeNeighborIterator)) {
77 | EdgeType edge;
78 | ctxt = nextEdgeTimes.time();
79 | try {
80 | edge = nextEdge(edgeNeighborIterator);
81 | } finally {
82 | ctxt.stop();
83 | }
84 | @SuppressWarnings("unused")
85 | Object other;
86 | ctxt = getOtherVertexFromEdgeTimes.time();
87 | try {
88 | other = getOtherVertexFromEdge(edge, vertex);
89 | } finally {
90 | ctxt.stop();
91 | }
92 | }
93 | this.cleanupEdgeIterator(edgeNeighborIterator);
94 | }
95 | this.cleanupVertexIterator(vertexIterator);
96 | if(this instanceof Neo4jGraphDatabase) {
97 | ((Transaction) tx).success();
98 | }
99 | } finally {//TODO fix this
100 | if(GraphDatabaseType.NEO4J == type) {
101 | ((Transaction) tx).finish();
102 | }
103 | }
104 | }
105 |
106 | @Override
107 | public void findNodesOfAllEdges() {
108 | Object tx = null;
109 | if(GraphDatabaseType.NEO4J == type) {//TODO fix this
110 | tx = ((GraphDatabaseAPI) ((Neo4jGraphDatabase) this).neo4jGraph).tx().unforced().begin();
111 | }
112 | try {
113 |
114 | EdgeIteratorType edgeIterator;
115 | Timer.Context ctxt = getAllEdgesTimes.time();
116 | try {
117 | edgeIterator = this.getAllEdges();
118 | } finally {
119 | ctxt.stop();
120 | }
121 |
122 | while(edgeIteratorHasNext(edgeIterator)) {
123 | EdgeType edge;
124 | ctxt = nextEdgeTimes.time();
125 | try {
126 | edge = nextEdge(edgeIterator);
127 | } finally {
128 | ctxt.stop();
129 | }
130 | @SuppressWarnings("unused")
131 | VertexType source = this.getSrcVertexFromEdge(edge);
132 | @SuppressWarnings("unused")
133 | VertexType destination = this.getDestVertexFromEdge(edge);
134 | }
135 | } finally {//TODO fix this
136 | if(GraphDatabaseType.NEO4J == type) {
137 | ((Transaction) tx).close();
138 | }
139 | }
140 | }
141 |
142 | @Override
143 | public void shortestPaths(Set nodes) {
144 | Object tx = null;
145 | if(GraphDatabaseType.NEO4J == type) {//TODO fix this
146 | tx = ((Neo4jGraphDatabase) this).neo4jGraph.beginTx();
147 | }
148 | try {
149 | //TODO(amcp) change this to use 100+1 random node list and then to use a sublist instead of always choosing node # 1
150 | VertexType from = getVertex(1);
151 | Timer.Context ctxt;
152 | for(Integer i : nodes) {
153 | //time this
154 | ctxt = shortestPathTimes.time();
155 | try {
156 | shortestPath(from, i);
157 | } finally {
158 | ctxt.stop();
159 | }
160 | }
161 | if(this instanceof Neo4jGraphDatabase) {
162 | ((Transaction) tx).success();
163 | }
164 | } finally {//TODO fix this
165 | if(GraphDatabaseType.NEO4J == type) {
166 | ((Transaction) tx).finish();
167 | }
168 | }
169 | }
170 | }
171 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | graphdb-benchmarks
2 | ==================
3 | The project graphdb-benchmarks is a benchmark between popular graph dataases. Currently the framework supports [Titan](http://thinkaurelius.github.io/titan/), [OrientDB](http://www.orientechnologies.com/orientdb/), [Neo4j](http://neo4j.com/) and [Sparksee](http://www.sparsity-technologies.com/). The purpose of this benchmark is to examine the performance of each graph database in terms of execution time. The benchmark is composed of four workloads, Clustering, Massive Insertion, Single Insertion and Query Workload. Every workload has been designed to simulate common operations in graph database systems.
4 |
5 | - *Clustering Workload (CW)*: CW consists of a well-known community detection algorithm for modularity optimization, the Louvain Method. We adapt the algorithm on top of the benchmarked graph databases and employ cache techniques to take advantage of both graph database capabilities and in-memory execution speed. We measure the time the algorithm needs to converge.
6 | - *Massive Insertion Workload (MIW)*: we create the graph database and configure it for massive loading, then we populate it with a particular dataset. We measure the time for the creation of the whole graph.
7 | - *Single Insertion Workload (SIW)*: we create the graph database and load it with a particular dataset. Every object insertion (node or edge) is committed directly and the graph is constructed incrementally. We measure the insertion time per block, which consists of one thousand edges and the nodes that appear during the insertion of these edges.
8 | - *Query Workload (QW)*: we execute three common queries:
9 | * FindNeighbours (FN): finds the neighbours of all nodes.
10 | * FindAdjacentNodes (FA): finds the adjacent nodes of all edges.
11 | * FindShortestPath (FS): finds the shortest path between the first node and 100 randomly picked nodes.
12 |
13 | Here we measure the execution time of each query.
14 |
15 | For our evaluation we use both synthetic and real data. More specifically, we execute MIW, SIW and QW with real data derived from the SNAP dataset collection ([Enron Dataset](http://snap.stanford.edu/data/email-Enron.html), [Amazon dataset](http://snap.stanford.edu/data/amazon0601.html), [Youtube dataset](http://snap.stanford.edu/data/com-Youtube.html) and [LiveJournal dataset](http://snap.stanford.edu/data/com-LiveJournal.html)). On the other hand, with the CW we use synthetic data generated with the [LFR-Benchmark generator](https://sites.google.com/site/andrealancichinetti/files) which produces networks with power-law degree distribution and implanted communities within the network. The synthetic data can be downloaded form [here](http://figshare.com/articles/Synthetic_Data_for_graphdb_benchmark/1221760).
16 |
17 | For further information about the study please refer to the [published paper](http://link.springer.com/chapter/10.1007/978-3-319-10518-5_1) on Springer site and the presentation on [Slideshare](http://www.slideshare.net/sympapadopoulos/adbis2014-presentation).
18 |
19 | **Note 1:** The published paper contains the experimental study of Titan, OrientDB and Neo4j. After the publication we included the Sparksee graph database.
20 |
21 | **Note 2:** After the very useful comments and contributions of OrientDB developers, we updated the benchmark implementations and re-run the experiments. We have updated the initial presentation with the new results and uploaded a new version of the paper in the following [link](http://mklab.iti.gr/files/beis_adbis2014_corrected.pdf).
22 |
23 | **Note 3:** Alexander Patrikalakis, a software developer at Amazon Web Services, refactored the benchmark, added support for Blueprints 2.5 and added support for the DynamoDB Storage Backend for Titan.
24 |
25 | Instructions
26 | ------------
27 | To run the project at first you have to choose one of the aforementioned datasets. Of course you can select any dataset, but because there is not any utility class to convert the dataset in the appropriate format (for now), the format of the data must be identical with the tested datasets. The input parameters are configured from the src/test/resources/input.properties file. Please follow the instructions in this file to select the correct parameters. Then, run `mvn dependency:copy-dependencies && mvn test -Pbench` to execute the benchmarking run.
28 |
29 | Results
30 | -------
31 | This section contains the results of each benchmark. All the measurements are in seconds.
32 |
33 |
34 | ####CW results
35 | Below we list the results of the CW for graphs with 1,000, 5,000, 10,0000, 20,000, 30,000, 40,000, 50,000 nodes.
36 |
37 | | Graph-Cache | Titan | OrientDB | Neo4j |
38 | | ----------- | ----- | -------- | ----- |
39 | |Graph1k-5% |2.39 |**0.92** |2.46 |
40 | |Graph1k-10% |1.45 |**0.59** |2.07 |
41 | |Graph1k-15% |1.30 |**0.58** |1.88 |
42 | |Graph1k-20% |1.25 |**0.55** |1.72 |
43 | |Graph1k-25% |1.19 |**0.49** |1.67 |
44 | |Graph1k-30% |1.15 |**0.48** |1.55 |
45 | | |
46 | |Graph5k-5% |16.01 |**5.88** |12.80 |
47 | |Graph5k-10% |15.10 |**5.67** |12.13 |
48 | |Graph5k-15% |14.63 |**4.81** |11.91 |
49 | |Graph5k-20% |14.16 |**4.62** |11.68 |
50 | |Graph5k-25% |13.76 |**4.51** |11.31 |
51 | |Graph5k-30% |13.38 |**4.45** |10.94 |
52 | | |
53 | |Graph10k-5% |46.06 |**18.20** |34.05 |
54 | |Graph10k-10% |44.59 |**17.92** |32.88 |
55 | |Graph10k-15% |43.68 |**17.31** |31.91 |
56 | |Graph10k-20% |42.48 |**16.88** |31.01 |
57 | |Graph10k-25% |41.32 |**16.58** |30.74 |
58 | |Graph10k-30% |39.98 |**16.34** |30.13 |
59 | | |
60 | |Graph20k-5% |140.46 |**54.01** |87.04 |
61 | |Graph20k-10% |138.10 |**52.51** |85.49 |
62 | |Graph20k-15% |137.25 |**52.12** |82.88 |
63 | |Graph20k-20% |133.11 |**51.68** |82.16 |
64 | |Graph20k-25% |122.48 |**50.79** |79.87 |
65 | |Graph20k-30% |120.94 |**50.49** |78.81 |
66 | | |
67 | |Graph30k-5% |310.25 |**96.38** |154.60 |
68 | |Graph30k-10% |301.80 |**94.98** |151.81 |
69 | |Graph30k-15% |299.27 |**94.85** |151.12 |
70 | |Graph30k-20% |296.43 |**94.67** |146.25 |
71 | |Graph30k-25% |294.33 |**92.62** |144.08 |
72 | |Graph30k-30% |288.50 |**90.13** |142.33 |
73 | | |
74 | |Graph40k-5% |533.29 |**201.19**|250.79 |
75 | |Graph40k-10% |505.91 |**199.18**|244.79 |
76 | |Graph40k-15% |490.39 |**194.34**|242.55 |
77 | |Graph40k-20% |478.31 |**183.14**|241.47 |
78 | |Graph40k-25% |467.18 |**177.55**|237.29 |
79 | |Graph40k-30% |418.07 |**174.65**|229.65 |
80 | | |
81 | |Graph50k-5% |642.42 |**240.58**|348.33 |
82 | |Graph50k-10% |624.36 |**238.35**|344.06 |
83 | |Graph50k-15% |611.70 |**237.65**|340.20 |
84 | |Graph50k-20% |610.40 |**230.76**|337.36 |
85 | |Graph50k-25% |596.29 |**230.03**|332.01 |
86 | |Graph50k-30% |580.44 |**226.31**|325.88 |
87 |
88 |
89 | ####MIW & QW results
90 | Below we list the results of MIW and QW for each dataset.
91 |
92 | | Dataset | Workload | Titan | OrientDB | Neo4j |
93 | | ------- | -------- | ----- | -------- | ----- |
94 | | EN | MIW |9.36 |62.77 |**6.77** |
95 | | AM | MIW |34.00 |97.00 |**10.61** |
96 | | YT | MIW |104.27 |252.15 |**24.69** |
97 | | LJ | MIW |663.03 |9416.74 |**349.55**|
98 | | |
99 | | EN | QW-FN |1.87 |**0.56** |0.95 |
100 | | AM | QW-FN |6.47 |3.50 |**1.85** |
101 | | YT | QW-FN |20.71 |9.34 |**4.51** |
102 | | LJ | QW-FN |213.41 |303.09 |**47.07** |
103 | | |
104 | | EN | QW-FA |3.78 |0.71 |**0.16** |
105 | | AM | QW-FA |13.77 |2.30 |**0.36** |
106 | | YT | QW-FA |42.82 |6.15 |**1.46** |
107 | | LJ | QW-FA |460.25 |518.12 |**16.53** |
108 | | |
109 | | EN | QW-FS |1.63 |3.09 |**0.16** |
110 | | AM | QW-FS |0.12 |83.29 |**0.302** |
111 | | YT | QW-FS |24.87 |23.47 |**0.08** |
112 | | LJ | QW-FS |123.50 |86.87 |**18.13** |
113 |
114 |
115 | ####SIW results
116 | Below we list the results of SIW for each dataset.
117 |
118 | 
119 |
125 |
126 | Contact
127 | -------
128 | For more information or support, please contact: sotbeis@iti.gr, sot.beis@gmail.com, papadop@iti.gr or amcp@me.com.
129 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/utils/Utils.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.utils;
2 |
3 | import java.io.BufferedWriter;
4 | import java.io.File;
5 | import java.io.FileWriter;
6 | import java.io.IOException;
7 | import java.util.ArrayList;
8 | import java.util.Arrays;
9 | import java.util.LinkedList;
10 | import java.util.List;
11 | import java.util.Map;
12 | import java.util.Map.Entry;
13 | import java.util.SortedMap;
14 | import java.util.TreeMap;
15 |
16 | import org.apache.commons.io.FileUtils;
17 | import org.apache.commons.io.LineIterator;
18 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
19 | import org.apache.commons.math3.util.MathArrays;
20 | import org.apache.logging.log4j.LogManager;
21 | import org.apache.logging.log4j.Logger;
22 |
23 | import eu.socialsensor.graphdatabases.GraphDatabase;
24 | import eu.socialsensor.graphdatabases.Neo4jGraphDatabase;
25 | import eu.socialsensor.graphdatabases.OrientGraphDatabase;
26 | import eu.socialsensor.graphdatabases.SparkseeGraphDatabase;
27 | import eu.socialsensor.graphdatabases.TitanGraphDatabase;
28 | import eu.socialsensor.main.BenchmarkConfiguration;
29 | import eu.socialsensor.main.BenchmarkingException;
30 | import eu.socialsensor.main.GraphDatabaseType;
31 |
32 | /**
33 | * This class contains all the required utility functions for the benchmark
34 | *
35 | * @author sotbeis, sotbeis@iti.gr
36 | * @author Alexander Patrikalakis
37 | *
38 | */
39 | public class Utils
40 | {
41 | public static final Logger logger = LogManager.getLogger();
42 |
43 | public static List> getDocumentsAs2dList(String docPath, int scenarios)
44 | {
45 | List> data = new ArrayList>(scenarios);
46 | for (int i = 0; i < scenarios; i++)
47 | {
48 | File intermediateFile = new File(docPath + "." + (i + 1));
49 | if (!intermediateFile.exists())
50 | {
51 | throw new IllegalStateException("file " + intermediateFile.getAbsolutePath() + " does not exist");
52 | }
53 | data.add(getListFromTextDoc(intermediateFile));
54 | }
55 | return data;
56 | }
57 |
58 | public static final List readlines(File file)
59 | {
60 | if (file == null || !file.exists())
61 | {
62 | throw new IllegalArgumentException("file object must not be null and must exist: " + file.getAbsolutePath());
63 | }
64 | if (!file.isFile() || !(file.isFile() && file.canRead()))
65 | {
66 | throw new IllegalArgumentException("file object must be a readable file: " + file.getAbsolutePath());
67 | }
68 | LineIterator it;
69 | try
70 | {
71 | it = FileUtils.lineIterator(file, "UTF-8");
72 | }
73 | catch (IOException e)
74 | {
75 | throw new BenchmarkingException("Unable to read lines from file: " + file.getAbsolutePath(), e);
76 | }
77 | List result = new LinkedList();
78 | try
79 | {
80 | while (it.hasNext())
81 | {
82 | result.add(it.nextLine());
83 | }
84 | }
85 | finally
86 | {
87 | LineIterator.closeQuietly(it);
88 | }
89 |
90 | return result;
91 | }
92 |
93 | public static final List> parseTabulatedLines(List lines, int numberOfLinesToSkip)
94 | {
95 | if (numberOfLinesToSkip < 0 || numberOfLinesToSkip > lines.size())
96 | {
97 | throw new IllegalArgumentException("can skip at least zero and at most lines.size lines");
98 | }
99 | List> result = new LinkedList>();
100 |
101 | lines.subList(numberOfLinesToSkip, lines.size()).parallelStream()
102 | .forEachOrdered(line -> result.add(Arrays.asList(line.split("\t"))));
103 | return result;
104 | }
105 |
106 | public static final List> readTabulatedLines(File file, int numberOfLinesToSkip)
107 | {
108 | return parseTabulatedLines(readlines(file), numberOfLinesToSkip);
109 | }
110 |
111 | public static void deleteRecursively(File file)
112 | {
113 | if (!file.exists())
114 | {
115 | return;
116 | }
117 | if (file.isDirectory())
118 | {
119 | for (File child : file.listFiles())
120 | {
121 | deleteRecursively(child);
122 | }
123 | }
124 | if (!file.delete())
125 | {
126 | throw new RuntimeException("Couldn't empty database.");
127 | }
128 | }
129 |
130 | public static void deleteMultipleFiles(String filePath, int numberOfFiles)
131 | {
132 | for (int i = 0; i < numberOfFiles; i++)
133 | {
134 | deleteRecursively(new File(filePath + "." + (i + 1)));
135 | }
136 | }
137 |
138 | public static void writeTimes(List insertionTimes, File outputPath)
139 | {
140 | try (BufferedWriter out = new BufferedWriter(new FileWriter(outputPath)))
141 | {
142 | for (Double insertionTime : insertionTimes)
143 | {
144 | out.write(insertionTime.toString());
145 | out.write("\n");
146 | }
147 | }
148 | catch (IOException e)
149 | {
150 | throw new BenchmarkingException("unable to write times to: " + outputPath.getAbsolutePath(), e);
151 | }
152 | }
153 |
154 | public static List getListFromTextDoc(File file)
155 | {
156 | List lines = readlines(file);
157 | List values = new ArrayList(lines.size());
158 | for (String line : lines)
159 | {
160 | values.add(Double.valueOf(line.trim()));
161 | }
162 | return values;
163 | }
164 |
165 | public static T getKeyByValue(Map map, E value)
166 | {
167 | for (Entry entry : map.entrySet())
168 | {
169 | if (value.equals(entry.getValue()))
170 | {
171 | return entry.getKey();
172 | }
173 | }
174 | return null;
175 | }
176 |
177 | public static final File generateStorageDirectory(GraphDatabaseType type, File storageBaseDir)
178 | {
179 | return new File(storageBaseDir, type.getShortname());
180 | }
181 |
182 | public static final GraphDatabase,?,?,?> createDatabaseInstance(BenchmarkConfiguration config, GraphDatabaseType type)
183 | {
184 | final GraphDatabase,?,?,?> graphDatabase;
185 | final File dbStorageDirectory = generateStorageDirectory(type, config.getDbStorageDirectory());
186 | if (GraphDatabaseType.TITAN_FLAVORS.contains(type))
187 | {
188 | graphDatabase = new TitanGraphDatabase(type, config, dbStorageDirectory);
189 | }
190 | else if (GraphDatabaseType.NEO4J == type)
191 | {
192 | graphDatabase = new Neo4jGraphDatabase(dbStorageDirectory);
193 | }
194 | else if (GraphDatabaseType.ORIENT_DB == type)
195 | {
196 | graphDatabase = new OrientGraphDatabase(config, dbStorageDirectory);
197 | }
198 | else if (GraphDatabaseType.SPARKSEE == type)
199 | {
200 | graphDatabase = new SparkseeGraphDatabase(config, dbStorageDirectory);
201 | }
202 | else
203 | {
204 | // For safety, will handle the null case
205 | throw new IllegalArgumentException("Unknown type: " + type == null ? "null" : type.toString());
206 | }
207 |
208 | return graphDatabase;
209 | }
210 |
211 | public static void createMassiveLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
212 | {
213 | final GraphDatabase,?,?,?> graphDatabase = createDatabaseInstance(config, type);
214 | graphDatabase.createGraphForMassiveLoad();
215 | graphDatabase.massiveModeLoading(config.getDataset());
216 | graphDatabase.shutdownMassiveGraph();
217 | }
218 |
219 | /**
220 | * Returns a graph database loaded with data in individual transactions.
221 | * Does not shut down the database after the data load
222 | *
223 | * @param type
224 | * database type
225 | * @param config
226 | * for individual databases.
227 | * @return
228 | */
229 | public static GraphDatabase,?,?,?> createSingleLoadDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
230 | {
231 | final GraphDatabase,?,?,?> graphDatabase = createDatabaseInstance(config, type);
232 | graphDatabase.createGraphForSingleLoad();
233 | graphDatabase.singleModeLoading(config.getDataset(), null /* resultsPath */, 0);
234 | return graphDatabase;
235 | }
236 |
237 | public static void deleteDatabase(GraphDatabaseType type, BenchmarkConfiguration config)
238 | {
239 | logger.info(String.format("Deleting graph database %s . . . .", type.getShortname()));
240 |
241 | final GraphDatabase,?,?,?> graphDatabase = createDatabaseInstance(config, type);
242 | graphDatabase.delete();
243 | }
244 |
245 | public static double[] convert(List list)
246 | {
247 | if (list == null || list.isEmpty())
248 | {
249 | return new double[0];
250 | }
251 | double[] result = new double[list.size()];
252 | for (int i = 0; i < list.size(); i++)
253 | {
254 | result[i] = list.get(i).doubleValue();
255 | }
256 |
257 | return result;
258 | }
259 |
260 | /**
261 | *
262 | * @param output
263 | * @param times
264 | * in milliseconds
265 | * @param benchmarkTitle
266 | */
267 | public static void writeResults(File output, Map> times, String benchmarkTitle)
268 | {
269 | logger.info("Write results to " + output);
270 | if (output.isDirectory())
271 | {
272 | throw new IllegalArgumentException("output was a directory: " + output.getAbsolutePath());
273 | }
274 |
275 | SortedMap means = new TreeMap();
276 | SortedMap standardDeviations = new TreeMap();
277 | for (GraphDatabaseType type : times.keySet())
278 | {
279 | final double[] scaledTimesArray = MathArrays.scale(0.001, convert(times.get(type)));
280 | DescriptiveStatistics stats = new DescriptiveStatistics();
281 | for (double val : scaledTimesArray)
282 | {
283 | stats.addValue(val);
284 | }
285 | means.put(type, stats.getMean());
286 | standardDeviations.put(type, stats.getStandardDeviation());
287 | }
288 |
289 | // use two passes so the compute is not interleaved with disk IO
290 | try (BufferedWriter out = new BufferedWriter(new FileWriter(output)))
291 | {
292 | // TODO(amcp) add other sample means p50 p90 p99 p99.9 etc
293 | out.write(String.format("DB,%s p100 Mean (s),Sample Size,Standard Deviation\n", benchmarkTitle));
294 | for (GraphDatabaseType type : times.keySet())
295 | {
296 | out.write(String.format("%s,%f,%d,%f\n", type.getShortname(), means.get(type), times.get(type).size(),
297 | standardDeviations.get(type)));
298 | }
299 | }
300 | catch (IOException e)
301 | {
302 | throw new BenchmarkingException(String.format("Exception thrown when writing output to %s: %s", output,
303 | e.getMessage()));
304 | }
305 | }
306 |
307 | public static List calculateMeanList(List> lists)
308 | {
309 | if (lists == null || lists.isEmpty())
310 | {
311 | return new ArrayList(0);
312 | }
313 | List result = new ArrayList();
314 | for (List list : lists)
315 | {
316 | result.add(new DescriptiveStatistics(convert(list)).getMean());
317 | }
318 | return result;
319 | }
320 | }
321 |
--------------------------------------------------------------------------------
/src/main/java/eu/socialsensor/graphdatabases/OrientGraphDatabase.java:
--------------------------------------------------------------------------------
1 | package eu.socialsensor.graphdatabases;
2 |
3 | import com.google.common.collect.Iterables;
4 | import com.orientechnologies.common.collection.OMultiCollectionIterator;
5 | import com.orientechnologies.common.util.OCallable;
6 | import com.orientechnologies.orient.core.command.OBasicCommandContext;
7 | import com.orientechnologies.orient.core.config.OGlobalConfiguration;
8 | import com.orientechnologies.orient.core.id.ORID;
9 | import com.orientechnologies.orient.core.metadata.schema.OType;
10 | import com.orientechnologies.orient.graph.sql.functions.OSQLFunctionShortestPath;
11 | import com.tinkerpop.blueprints.Direction;
12 | import com.tinkerpop.blueprints.Edge;
13 | import com.tinkerpop.blueprints.Parameter;
14 | import com.tinkerpop.blueprints.Vertex;
15 | import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph;
16 | import com.tinkerpop.blueprints.impls.orient.OrientEdgeType;
17 | import com.tinkerpop.blueprints.impls.orient.OrientGraph;
18 | import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory;
19 | import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx;
20 | import com.tinkerpop.blueprints.impls.orient.OrientVertex;
21 | import com.tinkerpop.blueprints.impls.orient.OrientVertexType;
22 |
23 | import eu.socialsensor.insert.Insertion;
24 | import eu.socialsensor.insert.OrientMassiveInsertion;
25 | import eu.socialsensor.insert.OrientSingleInsertion;
26 | import eu.socialsensor.main.BenchmarkConfiguration;
27 | import eu.socialsensor.main.GraphDatabaseType;
28 | import eu.socialsensor.utils.Utils;
29 |
30 | import java.io.File;
31 | import java.util.ArrayList;
32 | import java.util.HashMap;
33 | import java.util.HashSet;
34 | import java.util.Iterator;
35 | import java.util.List;
36 | import java.util.Map;
37 | import java.util.Set;
38 |
39 | /**
40 | * OrientDB graph database implementation
41 | *
42 | * @author sotbeis, sotbeis@iti.gr
43 | * @author Alexander Patrikalakis
44 | */
45 | public class OrientGraphDatabase extends GraphDatabaseBase, Iterator, Vertex, Edge>
46 | {
47 |
48 | private OrientGraph graph = null;
49 | private boolean useLightWeightEdges;
50 |
51 | //
52 | public OrientGraphDatabase(BenchmarkConfiguration config, File dbStorageDirectoryIn)
53 | {
54 | super(GraphDatabaseType.ORIENT_DB, dbStorageDirectoryIn);
55 | OGlobalConfiguration.STORAGE_COMPRESSION_METHOD.setValue("nothing");
56 | this.useLightWeightEdges = config.orientLightweightEdges() == null ? true : config.orientLightweightEdges()
57 | .booleanValue();
58 | }
59 |
60 | @Override
61 | public void open()
62 | {
63 | graph = getGraph(dbStorageDirectory);
64 | }
65 |
66 | @SuppressWarnings("deprecation")
67 | @Override
68 | public void createGraphForSingleLoad()
69 | {
70 | OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
71 | graph = getGraph(dbStorageDirectory);
72 | createSchema();
73 | }
74 |
75 | @SuppressWarnings("deprecation")
76 | @Override
77 | public void createGraphForMassiveLoad()
78 | {
79 | OGlobalConfiguration.STORAGE_KEEP_OPEN.setValue(false);
80 | graph = getGraph(dbStorageDirectory);
81 | createSchema();
82 | }
83 |
84 | @Override
85 | public void massiveModeLoading(File dataPath)
86 | {
87 | OrientMassiveInsertion orientMassiveInsertion = new OrientMassiveInsertion(this.graph.getRawGraph().getURL());
88 | orientMassiveInsertion.createGraph(dataPath, 0 /* scenarioNumber */);
89 | }
90 |
91 | @Override
92 | public void singleModeLoading(File dataPath, File resultsPath, int scenarioNumber)
93 | {
94 | Insertion orientSingleInsertion = new OrientSingleInsertion(this.graph, resultsPath);
95 | orientSingleInsertion.createGraph(dataPath, scenarioNumber);
96 | }
97 |
98 | @Override
99 | public void shutdown()
100 | {
101 | if (graph == null)
102 | {
103 | return;
104 | }
105 | graph.shutdown();
106 | graph = null;
107 | }
108 |
109 | @Override
110 | public void delete()
111 | {
112 | OrientGraphNoTx g = new OrientGraphNoTx("plocal:" + dbStorageDirectory.getAbsolutePath());
113 | g.drop();
114 |
115 | Utils.deleteRecursively(dbStorageDirectory);
116 | }
117 |
118 | @Override
119 | public void shutdownMassiveGraph()
120 | {
121 | shutdown();
122 | }
123 |
124 | @Override
125 | public void shortestPath(final Vertex v1, Integer i)
126 | {
127 | final OrientVertex v2 = (OrientVertex) getVertex(i);
128 |
129 | List result = new OSQLFunctionShortestPath().execute(graph,
130 | null, null, new Object[] { ((OrientVertex) v1).getRecord(), v2.getRecord(), Direction.OUT, 5 },
131 | new OBasicCommandContext());
132 |
133 | result.size();
134 | }
135 |
136 | @Override
137 | public int getNodeCount()
138 | {
139 | return (int) graph.countVertices();
140 | }
141 |
142 | @Override
143 | public Set getNeighborsIds(int nodeId)
144 | {
145 | Set neighbours = new HashSet();
146 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
147 | for (Vertex v : vertex.getVertices(Direction.IN, SIMILAR))
148 | {
149 | Integer neighborId = v.getProperty(NODE_ID);
150 | neighbours.add(neighborId);
151 | }
152 | return neighbours;
153 | }
154 |
155 | @Override
156 | public double getNodeWeight(int nodeId)
157 | {
158 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
159 | double weight = getNodeOutDegree(vertex);
160 | return weight;
161 | }
162 |
163 | public double getNodeInDegree(Vertex vertex)
164 | {
165 | @SuppressWarnings("rawtypes")
166 | OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.IN, SIMILAR);
167 | return (double) result.size();
168 | }
169 |
170 | public double getNodeOutDegree(Vertex vertex)
171 | {
172 | @SuppressWarnings("rawtypes")
173 | OMultiCollectionIterator result = (OMultiCollectionIterator) vertex.getVertices(Direction.OUT, SIMILAR);
174 | return (double) result.size();
175 | }
176 |
177 | @Override
178 | public void initCommunityProperty()
179 | {
180 | int communityCounter = 0;
181 | for (Vertex v : graph.getVertices())
182 | {
183 | ((OrientVertex) v).setProperties(NODE_COMMUNITY, communityCounter, COMMUNITY, communityCounter);
184 | ((OrientVertex) v).save();
185 | communityCounter++;
186 | }
187 | }
188 |
189 | @Override
190 | public Set getCommunitiesConnectedToNodeCommunities(int nodeCommunities)
191 | {
192 | Set communities = new HashSet();
193 | Iterable vertices = graph.getVertices(NODE_COMMUNITY, nodeCommunities);
194 | for (Vertex vertex : vertices)
195 | {
196 | for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
197 | {
198 | int community = v.getProperty(COMMUNITY);
199 | if (!communities.contains(community))
200 | {
201 | communities.add(community);
202 | }
203 | }
204 | }
205 | return communities;
206 | }
207 |
208 | @Override
209 | public Set getNodesFromCommunity(int community)
210 | {
211 | Set nodes = new HashSet();
212 | Iterable iter = graph.getVertices(COMMUNITY, community);
213 | for (Vertex v : iter)
214 | {
215 | Integer nodeId = v.getProperty(NODE_ID);
216 | nodes.add(nodeId);
217 | }
218 | return nodes;
219 | }
220 |
221 | @Override
222 | public Set getNodesFromNodeCommunity(int nodeCommunity)
223 | {
224 | Set nodes = new HashSet();
225 | Iterable iter = graph.getVertices("nodeCommunity", nodeCommunity);
226 | for (Vertex v : iter)
227 | {
228 | Integer nodeId = v.getProperty(NODE_ID);
229 | nodes.add(nodeId);
230 | }
231 | return nodes;
232 | }
233 |
234 | @Override
235 | public double getEdgesInsideCommunity(int vertexCommunity, int communityVertices)
236 | {
237 | double edges = 0;
238 | Iterable vertices = graph.getVertices(NODE_COMMUNITY, vertexCommunity);
239 | Iterable comVertices = graph.getVertices(COMMUNITY, communityVertices);
240 | for (Vertex vertex : vertices)
241 | {
242 | for (Vertex v : vertex.getVertices(Direction.OUT, SIMILAR))
243 | {
244 | if (Iterables.contains(comVertices, v))
245 | {
246 | edges++;
247 | }
248 | }
249 | }
250 | return edges;
251 | }
252 |
253 | @Override
254 | public double getCommunityWeight(int community)
255 | {
256 | double communityWeight = 0;
257 | Iterable iter = graph.getVertices(COMMUNITY, community);
258 | if (Iterables.size(iter) > 1)
259 | {
260 | for (Vertex vertex : iter)
261 | {
262 | communityWeight += getNodeOutDegree(vertex);
263 | }
264 | }
265 | return communityWeight;
266 | }
267 |
268 | @Override
269 | public double getNodeCommunityWeight(int nodeCommunity)
270 | {
271 | double nodeCommunityWeight = 0;
272 | Iterable iter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
273 | for (Vertex vertex : iter)
274 | {
275 | nodeCommunityWeight += getNodeOutDegree(vertex);
276 | }
277 | return nodeCommunityWeight;
278 | }
279 |
280 | @Override
281 | public void moveNode(int nodeCommunity, int toCommunity)
282 | {
283 | Iterable fromIter = graph.getVertices(NODE_COMMUNITY, nodeCommunity);
284 | for (Vertex vertex : fromIter)
285 | {
286 | vertex.setProperty(COMMUNITY, toCommunity);
287 | }
288 | }
289 |
290 | @Override
291 | public double getGraphWeightSum()
292 | {
293 | long edges = 0;
294 | for (Vertex o : graph.getVertices())
295 | {
296 | edges += ((OrientVertex) o).countEdges(Direction.OUT, SIMILAR);
297 | }
298 | return (double) edges;
299 | }
300 |
301 | @Override
302 | public int reInitializeCommunities()
303 | {
304 | Map initCommunities = new HashMap();
305 | int communityCounter = 0;
306 | for (Vertex v : graph.getVertices())
307 | {
308 | int communityId = v.getProperty(COMMUNITY);
309 | if (!initCommunities.containsKey(communityId))
310 | {
311 | initCommunities.put(communityId, communityCounter);
312 | communityCounter++;
313 | }
314 | int newCommunityId = initCommunities.get(communityId);
315 | ((OrientVertex) v).setProperties(COMMUNITY, newCommunityId, NODE_COMMUNITY, newCommunityId);
316 | ((OrientVertex) v).save();
317 | }
318 | return communityCounter;
319 | }
320 |
321 | @Override
322 | public int getCommunity(int nodeCommunity)
323 | {
324 | final Iterator result = graph.getVertices(NODE_COMMUNITY, nodeCommunity).iterator();
325 | if (!result.hasNext())
326 | throw new IllegalArgumentException("node community not found: " + nodeCommunity);
327 |
328 | Vertex vertex = result.next();
329 | int community = vertex.getProperty(COMMUNITY);
330 | return community;
331 | }
332 |
333 | @Override
334 | public int getCommunityFromNode(int nodeId)
335 | {
336 | Vertex vertex = graph.getVertices(NODE_ID, nodeId).iterator().next();
337 | return vertex.getProperty(COMMUNITY);
338 | }
339 |
340 | @Override
341 | public int getCommunitySize(int community)
342 | {
343 | Iterable vertices = graph.getVertices(COMMUNITY, community);
344 | Set nodeCommunities = new HashSet();
345 | for (Vertex v : vertices)
346 | {
347 | int nodeCommunity = v.getProperty(NODE_COMMUNITY);
348 | if (!nodeCommunities.contains(nodeCommunity))
349 | {
350 | nodeCommunities.add(nodeCommunity);
351 | }
352 | }
353 | return nodeCommunities.size();
354 | }
355 |
356 | @Override
357 | public Map> mapCommunities(int numberOfCommunities)
358 | {
359 | Map> communities = new HashMap>();
360 | for (int i = 0; i < numberOfCommunities; i++)
361 | {
362 | Iterator verticesIter = graph.getVertices(COMMUNITY, i).iterator();
363 | List vertices = new ArrayList();
364 | while (verticesIter.hasNext())
365 | {
366 | Integer nodeId = verticesIter.next().getProperty(NODE_ID);
367 | vertices.add(nodeId);
368 | }
369 | communities.put(i, vertices);
370 | }
371 | return communities;
372 | }
373 |
374 | protected void createSchema()
375 | {
376 | graph.executeOutsideTx(new OCallable