├── .gitignore
├── example-resources
├── data-csv
│ ├── edges
│ │ └── knowsFile.csv
│ └── vertices
│ │ └── person.csv
└── janusgraph-properties
│ └── janusgraph-hbase.properties
├── pom.xml
└── src
└── main
└── java
└── net
└── mpolonioli
└── janusgraphimporter
├── core
├── JanusGraphImporter.java
├── LoadEdgesThread.java
└── LoadVerticiesThread.java
└── examples
└── ExampleApp.java
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 |
3 | .classpath
4 | .project
5 | .settings/
6 |
--------------------------------------------------------------------------------
/example-resources/data-csv/edges/knowsFile.csv:
--------------------------------------------------------------------------------
1 | id|id
2 | 1|2
--------------------------------------------------------------------------------
/example-resources/data-csv/vertices/person.csv:
--------------------------------------------------------------------------------
1 | id|name|surname|email|birthdate
2 | 1|Mario|Rossi|m.rossi@email.com;mario.rossi@email.com|1994-06-21
3 | 2|Emanuele|rosati|emanuele.rosati@email.com;e.rosati@email.com|1988-07-01
--------------------------------------------------------------------------------
/example-resources/janusgraph-properties/janusgraph-hbase.properties:
--------------------------------------------------------------------------------
1 | # JanusGraph configuration sample: HBase
2 | #
3 | # This file connects to HBase using a Zookeeper quorum
4 | # (storage.hostname) consisting solely of localhost. Zookeeper and
5 | # the HBase services must already be running and available before
6 | # starting JanusGraph with this file.
7 |
8 | gremlin.graph=org.janusgraph.core.JanusGraphFactory
9 |
10 | # The primary persistence provider used by JanusGraph. This is required.
11 | # It should be set one of JanusGraph's built-in shorthand names for its
12 | # standard storage backends (shorthands: berkeleyje, cassandrathrift,
13 | # cassandra, astyanax, embeddedcassandra, cql, hbase, inmemory) or to the
14 | # full package and classname of a custom/third-party StoreManager
15 | # implementation.
16 | #
17 | # Default: (no default value)
18 | # Data Type: String
19 | # Mutability: LOCAL
20 | storage.backend=hbase
21 |
22 | # The hostname or comma-separated list of hostnames of storage backend
23 | # servers. This is only applicable to some storage backends, such as
24 | # cassandra and hbase.
25 | #
26 | # Default: 127.0.0.1
27 | # Data Type: class java.lang.String[]
28 | # Mutability: LOCAL
29 | storage.hostname=127.0.0.1
30 |
31 | # Whether to enable JanusGraph's database-level cache, which is shared
32 | # across all transactions. Enabling this option speeds up traversals by
33 | # holding hot graph elements in memory, but also increases the likelihood
34 | # of reading stale data. Disabling it forces each transaction to
35 | # independently fetch graph elements from storage before reading/writing
36 | # them.
37 | #
38 | # Default: false
39 | # Data Type: Boolean
40 | # Mutability: MASKABLE
41 | cache.db-cache = true
42 |
43 | # How long, in milliseconds, database-level cache will keep entries after
44 | # flushing them. This option is only useful on distributed storage
45 | # backends that are capable of acknowledging writes without necessarily
46 | # making them immediately visible.
47 | #
48 | # Default: 50
49 | # Data Type: Integer
50 | # Mutability: GLOBAL_OFFLINE
51 | #
52 | # Settings with mutability GLOBAL_OFFLINE are centrally managed in
53 | # JanusGraph's storage backend. After starting the database for the first
54 | # time, this file's copy of this setting is ignored. Use JanusGraph's
55 | # Management System to read or modify this value after bootstrapping.
56 | cache.db-cache-clean-wait = 20
57 |
58 | # Default expiration time, in milliseconds, for entries in the
59 | # database-level cache. Entries are evicted when they reach this age even
60 | # if the cache has room to spare. Set to 0 to disable expiration (cache
61 | # entries live forever or until memory pressure triggers eviction when set
62 | # to 0).
63 | #
64 | # Default: 10000
65 | # Data Type: Long
66 | # Mutability: GLOBAL_OFFLINE
67 | #
68 | # Settings with mutability GLOBAL_OFFLINE are centrally managed in
69 | # JanusGraph's storage backend. After starting the database for the first
70 | # time, this file's copy of this setting is ignored. Use JanusGraph's
71 | # Management System to read or modify this value after bootstrapping.
72 | cache.db-cache-time = 180000
73 |
74 | # Size of JanusGraph's database level cache. Values between 0 and 1 are
75 | # interpreted as a percentage of VM heap, while larger values are
76 | # interpreted as an absolute size in bytes.
77 | #
78 | # Default: 0.3
79 | # Data Type: Double
80 | # Mutability: MASKABLE
81 | cache.db-cache-size = 0.5
82 |
83 | # Others configurations
84 | storage.hbase.table=janusgraph
85 | storage.batch-loading=true
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | net.mpolonioli
6 | janusgraph-csv-importer
7 | 0.0.1-SNAPSHOT
8 | jar
9 |
10 | janusgraph-csv-importer
11 | http://maven.apache.org
12 |
13 |
14 | UTF-8
15 |
16 |
17 |
18 |
19 | org.janusgraph
20 | janusgraph-all
21 | 0.1.1
22 |
23 |
24 | jdk.tools
25 | jdk.tools
26 |
27 |
28 |
29 |
30 |
31 | com.google.guava
32 | guava
33 | 16.0.1
34 |
35 |
36 |
37 |
38 |
39 |
40 | org.apache.maven.plugins
41 | maven-compiler-plugin
42 | 3.3
43 |
44 | 1.8
45 | 1.8
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/src/main/java/net/mpolonioli/janusgraphimporter/core/JanusGraphImporter.java:
--------------------------------------------------------------------------------
1 | package net.mpolonioli.janusgraphimporter.core;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.util.ArrayList;
6 | import java.util.HashMap;
7 | import java.util.List;
8 | import java.util.Scanner;
9 |
10 | import org.apache.tinkerpop.gremlin.structure.Vertex;
11 | import org.janusgraph.core.Cardinality;
12 | import org.janusgraph.core.JanusGraph;
13 | import org.janusgraph.core.JanusGraphFactory;
14 | import org.janusgraph.core.Multiplicity;
15 | import org.janusgraph.core.PropertyKey;
16 | import org.janusgraph.core.schema.JanusGraphManagement;
17 |
18 | public class JanusGraphImporter {
19 |
20 | private static final long TX_MAX_RETRIES = 1000;
21 | private static JanusGraph graph;
22 |
23 | public JanusGraphImporter(String conf)
24 | {
25 | graph = JanusGraphFactory.open(conf);
26 | }
27 |
28 | /*
29 | * define the schema into JanusGraph
30 | */
31 | public void defineSchema(
32 | List vertexLabels,
33 | List edgeLabels,
34 | List propertyKeys,
35 | HashMap propertyHasCardinality,
36 | @SuppressWarnings("rawtypes") HashMap propertyHasType,
37 | List propertiesWithIndex
38 | )
39 | {
40 |
41 | openConnection();
42 |
43 | JanusGraphManagement mgmt;
44 |
45 | // Declare all vertex labels
46 | System.out.println("Declaring all vertex labels");
47 | for( String vLabel : vertexLabels ) {
48 | System.out.print(vLabel + " ");
49 | mgmt = graph.openManagement();
50 | mgmt.makeVertexLabel(vLabel).make();
51 | mgmt.commit();
52 | }
53 |
54 | // Declare all edge labels
55 | System.out.println("\nDeclaring all edge labels");
56 | for( String eLabel : edgeLabels ) {
57 | System.out.print(eLabel + " ");
58 | mgmt = graph.openManagement();
59 | mgmt.makeEdgeLabel(eLabel).multiplicity(Multiplicity.SIMPLE).make();
60 | mgmt.commit();
61 | }
62 |
63 | // Declare all properties and the relatives composite indexes
64 | System.out.println("\nDeclaring all properties with Cardinality.SINGLE");
65 | for ( String propKey : propertyKeys ) {
66 | System.out.print(propKey + " ");
67 | mgmt = graph.openManagement();
68 | PropertyKey property = mgmt.makePropertyKey(propKey).dataType(propertyHasType.get(propKey))
69 | .cardinality(propertyHasCardinality.get(propKey)).make();
70 | if(propertiesWithIndex.contains(propKey))
71 | {
72 | String indexLabel = propKey + "Index";
73 | System.out.print(indexLabel + " ");
74 | mgmt.buildIndex(indexLabel, Vertex.class).addKey(property).buildCompositeIndex();
75 | }
76 | mgmt.commit();
77 | }
78 |
79 | graph.tx().commit();
80 | System.out.println();
81 | }
82 |
83 | /*
84 | * open a connection to the DBMS if close
85 | */
86 | public void openConnection() {
87 | if(graph.isClosed())
88 | {
89 | graph = JanusGraphFactory.open(graph.configuration());
90 | }
91 | }
92 |
93 | /*
94 | * close a connection to the DBMS if open
95 | */
96 | public void closeConnection() {
97 | if(graph.isOpen())
98 | {
99 | graph.close();
100 | }
101 | }
102 |
103 | /*
104 | * clear the existing graph
105 | */
106 | public void clearGraph() {
107 | closeConnection();
108 | org.janusgraph.core.util.JanusGraphCleanup.clear(graph);
109 | openConnection();
110 | }
111 |
112 | /*
113 | * load the vertices contained in the given file
114 | */
115 | public void loadVertices(
116 | File file,
117 | boolean printLoadingDots,
118 | int batchSize,
119 | long progReportPeriod,
120 | int threadCount,
121 | @SuppressWarnings("rawtypes") HashMap propertyHasType,
122 | HashMap propertyHasCardinality
123 | ) throws IOException, java.text.ParseException, InterruptedException {
124 |
125 | openConnection();
126 |
127 | String fileName = file.getName();
128 | String vertexLabel = fileName.substring(0, fileName.length() - 4);
129 |
130 | Scanner fileScanner = new Scanner(file);
131 |
132 | final String[] colNames = fileScanner.nextLine().split("\\|");
133 |
134 | long lineCount = 0;
135 |
136 | // For progress reporting
137 | long startTime = System.currentTimeMillis();
138 | long nextProgReportTime = startTime + progReportPeriod*1000;
139 | long lastLineCount = 0;
140 |
141 | while(fileScanner.hasNextLine())
142 | {
143 | int batchIndex = 0;
144 | List batchLines = new ArrayList<>();
145 | while(batchIndex < batchSize && fileScanner.hasNextLine())
146 | {
147 | batchLines.add(fileScanner.nextLine());
148 | batchIndex++;
149 | }
150 |
151 | lineCount += batchLines.size();
152 |
153 | List threads = new ArrayList<>();
154 | for(int t = 0; t < threadCount; t++)
155 | {
156 | int threadStartIndex = ((batchSize / threadCount) * t);
157 | if (threadStartIndex >= batchLines.size())
158 | {
159 | break;
160 | }
161 | final List threadLines = batchLines.subList(
162 | threadStartIndex,
163 | Math.min(threadStartIndex + (batchSize / threadCount), batchLines.size())
164 | );
165 |
166 | Thread thread = new LoadVerticiesThread(
167 | graph,
168 | colNames,
169 | vertexLabel,
170 | TX_MAX_RETRIES,
171 | threadLines.toArray(new String[0]),
172 | lineCount,
173 | propertyHasType,
174 | propertyHasCardinality
175 | );
176 |
177 | thread.setName("t" + t);
178 | threads.add(thread);
179 | thread.start();
180 | }
181 | for(Thread thread : threads)
182 | {
183 | thread.join();
184 | }
185 |
186 |
187 | if (printLoadingDots &&
188 | (System.currentTimeMillis() > nextProgReportTime)) {
189 | long timeElapsed = System.currentTimeMillis() - startTime;
190 | long linesLoaded = lineCount - lastLineCount;
191 | System.out.println(String.format(
192 | "Time Elapsed: %03dm.%02ds, Lines Loaded: +%d",
193 | (timeElapsed/1000)/60, (timeElapsed/1000) % 60, linesLoaded));
194 | nextProgReportTime += progReportPeriod*1000;
195 | lastLineCount = lineCount;
196 | }
197 | }
198 | fileScanner.close();
199 |
200 | long timeElapsed = System.currentTimeMillis() - startTime;
201 | long linesLoaded = lineCount - lastLineCount;
202 | System.out.println(String.format(
203 | "Time Elapsed: %03dm.%02ds, Lines Loaded: +%d",
204 | (timeElapsed/1000)/60, (timeElapsed/1000) % 60, linesLoaded));
205 | nextProgReportTime += progReportPeriod*1000;
206 | lastLineCount = lineCount;
207 | }
208 |
209 | /*
210 | * load the edges contained in the given file
211 | */
212 | public void loadEdges(
213 | File file,
214 | HashMap edgeHasLabel,
215 | boolean undirected,
216 | boolean printLoadingDots,
217 | int batchSize,
218 | long progReportPeriod,
219 | int threadCount,
220 | @SuppressWarnings("rawtypes") HashMap propertyHasType
221 | ) throws IOException, java.text.ParseException, InterruptedException {
222 |
223 | openConnection();
224 |
225 | String fileName = file.getName();
226 | String edgeName = fileName.substring(0, fileName.length() - 4);
227 |
228 | String edgeLabel;
229 | if(edgeHasLabel.containsKey(edgeName))
230 | {
231 | edgeLabel = edgeHasLabel.get(edgeName);
232 |
233 | }else
234 | {
235 | edgeLabel = edgeName;
236 | }
237 |
238 | Scanner fileScanner = new Scanner(file);
239 |
240 | final String[] colNames = fileScanner.nextLine().split("\\|");
241 |
242 | long lineCount = 0;
243 |
244 | // For progress reporting
245 | long startTime = System.currentTimeMillis();
246 | long nextProgReportTime = startTime + progReportPeriod*1000;
247 | long lastLineCount = 0;
248 |
249 | while(fileScanner.hasNextLine())
250 | {
251 | int batchIndex = 0;
252 | List batchLines = new ArrayList<>();
253 | while(batchIndex < batchSize && fileScanner.hasNextLine())
254 | {
255 | batchLines.add(fileScanner.nextLine());
256 | batchIndex++;
257 | }
258 |
259 | lineCount += batchLines.size();
260 |
261 | List threads = new ArrayList<>();
262 | for(int t = 0; t < threadCount; t++)
263 | {
264 | int threadStartIndex = ((batchSize / threadCount) * t);
265 | if (threadStartIndex >= batchLines.size())
266 | {
267 | break;
268 | }
269 | final List threadLines = batchLines.subList(
270 | threadStartIndex,
271 | Math.min(threadStartIndex + (batchSize / threadCount), batchLines.size())
272 | );
273 |
274 | Thread thread = new LoadEdgesThread(
275 | graph,
276 | edgeLabel,
277 | undirected,
278 | TX_MAX_RETRIES,
279 | threadLines.toArray(new String[0]),
280 | lastLineCount,
281 | colNames,
282 | propertyHasType
283 | );
284 |
285 | thread.setName("t" + t);
286 | threads.add(thread);
287 | thread.start();
288 | }
289 | for(Thread thread : threads)
290 | {
291 | thread.join();
292 | }
293 |
294 | if (printLoadingDots &&
295 | (System.currentTimeMillis() > nextProgReportTime)) {
296 | long timeElapsed = System.currentTimeMillis() - startTime;
297 | long linesLoaded = lineCount - lastLineCount;
298 | System.out.println(String.format(
299 | "Time Elapsed: %03dm.%02ds, Lines Loaded: +%d",
300 | (timeElapsed/1000)/60, (timeElapsed/1000) % 60, linesLoaded));
301 | nextProgReportTime += progReportPeriod*1000;
302 | lastLineCount = lineCount;
303 | }
304 | }
305 | fileScanner.close();
306 |
307 | long timeElapsed = System.currentTimeMillis() - startTime;
308 | long linesLoaded = lineCount - lastLineCount;
309 | System.out.println(String.format(
310 | "Time Elapsed: %03dm.%02ds, Lines Loaded: +%d",
311 | (timeElapsed/1000)/60, (timeElapsed/1000) % 60, linesLoaded));
312 | nextProgReportTime += progReportPeriod*1000;
313 | lastLineCount = lineCount;
314 | }
315 |
316 | }
317 |
--------------------------------------------------------------------------------
/src/main/java/net/mpolonioli/janusgraphimporter/core/LoadEdgesThread.java:
--------------------------------------------------------------------------------
1 | package net.mpolonioli.janusgraphimporter.core;
2 |
3 | import java.text.ParseException;
4 | import java.text.SimpleDateFormat;
5 | import java.util.ArrayList;
6 | import java.util.Date;
7 | import java.util.HashMap;
8 | import java.util.List;
9 | import java.util.NoSuchElementException;
10 | import java.util.TimeZone;
11 |
12 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
13 | import org.apache.tinkerpop.gremlin.structure.Vertex;
14 | import org.janusgraph.core.JanusGraph;
15 | import org.janusgraph.core.JanusGraphTransaction;
16 |
17 | public class LoadEdgesThread extends Thread {
18 |
19 | private JanusGraph graph;
20 | private long txMaxRetries;
21 | private String[] threadLines;
22 | private long lineCount;
23 | private String edgeLabel;
24 | private boolean undirected;
25 | private String[] colNames;
26 | private @SuppressWarnings("rawtypes") HashMap propertyHasType;
27 |
28 | public LoadEdgesThread(
29 | JanusGraph graph,
30 | String edgeLabel,
31 | boolean undirected,
32 | long txMaxRetries,
33 | String[] threadLines,
34 | long lineCount,
35 | String[] colNames,
36 | @SuppressWarnings("rawtypes") HashMap propertyHasType)
37 | {
38 | this.graph = graph;
39 | this.txMaxRetries = txMaxRetries;
40 | this.threadLines = threadLines;
41 | this.lineCount = lineCount;
42 | this.edgeLabel = edgeLabel;
43 | this.undirected = undirected;
44 | this.propertyHasType = propertyHasType;
45 | this.colNames = colNames;
46 | }
47 |
48 | @Override
49 | public void run() {
50 |
51 | SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
52 | dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
53 |
54 | boolean txSucceeded = false;
55 | int txFailCount = 0;
56 |
57 | String keyLabelV1 = colNames[0];
58 | String keyLabelV2 = colNames[1];
59 |
60 | do {
61 | JanusGraphTransaction tx = graph.newTransaction();
62 | for (int i = 0; i < threadLines.length; i++) {
63 | String line = threadLines[i];
64 |
65 | String[] colVals = line.split("\\|");
66 | String keyValueV1 = colVals[0];
67 | String keyValueV2 = colVals[1];
68 |
69 | GraphTraversalSource g = tx.traversal();
70 |
71 | try
72 | {
73 | // find the vertices
74 | Vertex vertex1 =
75 | g.V().has(keyLabelV1, keyValueV1).next();
76 | Vertex vertex2 =
77 | g.V().has(keyLabelV2, keyValueV2).next();
78 |
79 | // add the properties to the edge if exists
80 | List