├── .gitignore ├── NOTICE.txt ├── batch.properties ├── changelog.txt ├── generate.sh ├── import-mvn.sh ├── import.bat ├── import.sh ├── import_csv.sh ├── pom.xml ├── readme.md ├── run.sh ├── sample ├── batch.properties ├── import.sh ├── nodes.csv ├── nodes2.csv └── rels.csv ├── settings.sh ├── sort.sh └── src ├── main ├── java │ └── org │ │ └── neo4j │ │ └── batchimport │ │ ├── CSVParser.java │ │ ├── CSVReader.java │ │ ├── Importer.java │ │ ├── IndexInfo.java │ │ ├── LineData.java │ │ ├── Report.java │ │ ├── StdOutReport.java │ │ ├── Utils.java │ │ ├── importer │ │ ├── AbstractLineData.java │ │ ├── ChunkerLineData.java │ │ ├── CsvLineData.java │ │ ├── RelType.java │ │ ├── RowData.java │ │ └── Type.java │ │ ├── index │ │ ├── LongIterableIndexHits.java │ │ └── MapDbCachingIndexProvider.java │ │ └── utils │ │ ├── Chunker.java │ │ ├── Config.java │ │ ├── FileIterator.java │ │ ├── Params.java │ │ ├── RelationshipSorter.java │ │ └── RelationshipSorter2.java └── resources │ └── log4j.properties └── test └── java ├── DataTest.java └── org └── neo4j └── batchimport ├── ImporterIntegrationTest.java ├── ImporterTest.java ├── IndexInfoTest.java ├── RelationshipMatcher.java ├── TestDataGenerator.java ├── TestImporter.java ├── csv ├── ChunkerPerformanceTest.java ├── ChunkerRowDataTest.java ├── CsvLineDataTest.java ├── OpenCSVPerformanceTest.java ├── OpenCSVTest.java ├── PerformanceTestFile.java ├── RowDataPerformanceTest.java ├── RowDataTest.java └── StreamTokenizerTest.java ├── importer └── AbstractLineDataTest.java └── utils ├── ChunkerTest.java ├── ConfigTest.java ├── FileIteratorTest.java ├── ParamsTest.java ├── RelStartEndComparatorTest.java └── RelationshipSorterTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | zip.sh 2 | *.db/ 3 | lib/ 4 | *.tsv 5 | .project 6 | .shell_history 7 | *.ipr 8 | *.iws 9 | *.iml 10 | .idea 11 | target 12 | *.csv 13 | .DS_Store 14 | .settings 15 | *.gz 16 | gc.log 17 | *.zip 18 | zip19.sh -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | Neo4j 2 | Copyright © 2002-2011 Network Engine for Objects in Lund AB (referred to 3 | in this notice as “Neo Technology”) 4 | [http://neotechnology.com] 5 | 6 | This product includes software ("Software") developed by Neo Technology. 7 | 8 | The copyright in the bundled Neo4j graph database (including the 9 | Software) is owned by Neo Technology. The Software developed and owned 10 | by Neo Technology is licensed under the GNU GENERAL PUBLIC LICENSE 11 | Version 3 (http://www.fsf.org/licensing/licenses/gpl-3.0.html) ("GPL") 12 | to all third parties and that license, as required by the GPL, is 13 | included in the LICENSE.txt file. 14 | 15 | However, if you have executed an End User Software License and Services 16 | Agreement or an OEM Software License and Support Services Agreement, or 17 | another commercial license agreement with Neo Technology or one of its 18 | affiliates (each, a "Commercial Agreement"), the terms of the license in 19 | such Commercial Agreement will supersede the GPL and you may use the 20 | software solely pursuant to the terms of the relevant Commercial 21 | Agreement. 22 | 23 | 24 | Third party libraries 25 | --------------------- 26 | 27 | Full license texts are found in LICENSES.txt. 28 | 29 | 30 | The bundled JAX-RS (JSR311) API is licensed under the GNU General 31 | Public License Version 2 with classpath exception. Alternatively 32 | under the Common Development and Distribution License, version 1.1. 33 | 34 | The bundled Jersey library, containing Jersey Core, Jersey Server, and 35 | Jersey Client, is licensed under the GNU General Public License 36 | Version 2 with classpath exception. Alternatively under the Common 37 | Development and Distribution License, version 1.1. 38 | 39 | The Mime streaming plugin library is licensed under the GNU General 40 | Public License Version 2 with classpath exception. Alternatively 41 | under the Common Development and Distribution License, version 1.0. 42 | 43 | Other bundled libraries are licenced according to the following listing. 44 | 45 | The Apache Software License, Version 2.0: 46 | Apache ServiceMix :: Bundles :: lucene, 47 | Apache Commons: 48 | Commons BeanUtils, Commons BeanUtils Core, Commons Collections, Commons IO, 49 | Commons Configuration, Commons Digester, Commons Lang, Commons Logging, 50 | Apache Log4j, 51 | Apache Felix: Felix FileInstall, Felix Framework, Felix Main, 52 | JSON.simple, 53 | RRD4J, 54 | Geronimo Java Transaction API, 55 | Groovy, 56 | Jackson: Jackson Core, Jackson JAX-RS, Data Mapper for Jackson, 57 | Jansi, 58 | Jetty: Jetty, Jetty Util, Jetty Servlet Specification API, 59 | 60 | MIT License: 61 | SLF4J API Module, SLF4J Log4j-12 Binding, SLF4J JDK1.4 Logging Binding, 62 | SLF4J Jakarta Commons Logging Binding, 63 | Base64.js, 64 | jTemplates, 65 | jQuery, 66 | jQuery BBQ, 67 | jQuery hashchange event, 68 | SimpleModal, 69 | jQuery flot including colorhelpers 70 | 71 | BSD licence: 72 | ASM: ASM Core, ASM Tree, ASM Commons, ASM Util, ASM Analysis, 73 | Blueprints: Data Models and their Implementations, 74 | Gremlin: A Graph-Based Programming Language, 75 | Pipes: A Data Flow Framework using Process Graphs, 76 | JLine, Scala library 77 | 78 | provided without support or warranty: JSON (JavaScript Object Notation) 79 | 80 | Public domain: 81 | Dough Lea's util.concurrent package, 82 | ANTLR 2.7.7, 83 | JSON2.js 84 | -------------------------------------------------------------------------------- /batch.properties: -------------------------------------------------------------------------------- 1 | dump_configuration=false 2 | cache_type=none 3 | use_memory_mapped_buffers=true 4 | neostore.propertystore.db.index.keys.mapped_memory=5M 5 | neostore.propertystore.db.index.mapped_memory=5M 6 | neostore.nodestore.db.mapped_memory=200M 7 | neostore.relationshipstore.db.mapped_memory=500M 8 | neostore.propertystore.db.mapped_memory=200M 9 | neostore.propertystore.db.strings.mapped_memory=200M 10 | batch_array_separator=, 11 | #batch_import.csv.quotes=true 12 | #batch_import.csv.delim=, 13 | -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | 2013-06-27 2 | ========== 3 | * supports array types 4 | 5 | 2013-06-19 6 | ========== 7 | * import files can be now compressed as .gz or .zip 8 | * supports multiple csv files for nodes, relationships, comma separated 9 | * supports automatic indexing with headers like "name:string:users" 10 | * supports index lookups for relationships for start/end fields "name:string:users" and the litaral values 11 | * supports now config file based setup 12 | * supports keeping the database instead of cleaning 13 | * supports opencsv as reader, alternative delimiters, quotes 14 | * supports caching in index lookups using MapDB in front of lucene 15 | * added faster default CSV reader 16 | * added new type LABEL that will also be used for node-labels in the future -------------------------------------------------------------------------------- /generate.sh: -------------------------------------------------------------------------------- 1 | source ./settings.sh 2 | 3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.TestDataGenerator -Dexec.classpathScope=test \ 4 | -Dexec.args="$1 $2 $3 $4" | grep -iv '\[\(INFO\|debug\)\]' 5 | -------------------------------------------------------------------------------- /import-mvn.sh: -------------------------------------------------------------------------------- 1 | DB=${1-target/graph.db} 2 | shift 3 | NODES=${1-nodes.csv} 4 | shift 5 | RELS=${1-rels.csv} 6 | shift 7 | mvn compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" \ 8 | -Dexec.args="batch.properties $DB $NODES $RELS $*" | grep -iv '\[\(INFO\|debug\)\]' 9 | -------------------------------------------------------------------------------- /import.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | set ERROR_CODE=0 4 | set HEAP=4G 5 | 6 | :init 7 | @REM Decide how to startup depending on the version of windows 8 | 9 | @REM -- Win98ME 10 | if NOT "%OS%"=="Windows_NT" goto Win9xArg 11 | 12 | @REM set local scope for the variables with windows NT shell 13 | if "%OS%"=="Windows_NT" @setlocal 14 | 15 | @REM -- 4NT shell 16 | if "%eval[2+2]" == "4" goto 4NTArgs 17 | 18 | @REM -- Regular WinNT shell 19 | set CMD_LINE_ARGS=%* 20 | goto WinNTGetScriptDir 21 | 22 | @REM The 4NT Shell from jp software 23 | :4NTArgs 24 | set CMD_LINE_ARGS=%$ 25 | goto WinNTGetScriptDir 26 | 27 | :Win9xArg 28 | @REM Slurp the command line arguments. This loop allows for an unlimited number 29 | @REM of agruments (up to the command line limit, anyway). 30 | set CMD_LINE_ARGS= 31 | :Win9xApp 32 | if %1a==a goto Win9xGetScriptDir 33 | set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1 34 | shift 35 | goto Win9xApp 36 | 37 | :Win9xGetScriptDir 38 | set SAVEDIR=%CD% 39 | %0\ 40 | cd %0\..\.. 41 | set BASEDIR=%CD% 42 | cd %SAVEDIR% 43 | set SAVE_DIR= 44 | goto repoSetup 45 | 46 | :WinNTGetScriptDir 47 | set BASEDIR=%~dp0\. 48 | 49 | :repoSetup 50 | 51 | if "%JAVACMD%"=="" set JAVACMD=java 52 | 53 | if "%REPO%"=="" set REPO=%BASEDIR%\lib 54 | 55 | rem Setup the classpath 56 | set LIBPATH="" 57 | pushd "%REPO%" 58 | for %%G in (*.jar) do call:APPEND_TO_LIBPATH %%G 59 | popd 60 | goto LIBPATH_END 61 | 62 | : APPEND_TO_LIBPATH 63 | set filename=%~1 64 | set suffix=%filename:~-4% 65 | if %suffix% equ .jar set LIBPATH=%LIBPATH%;"%REPO%\%filename%" 66 | goto :EOF 67 | 68 | :LIBPATH_END 69 | 70 | set CLASSPATH=%LIBPATH% 71 | 72 | set EXTRA_JVM_ARGUMENTS=-Dfile.encoding=UTF-8 -Xmx%HEAP% -Xms%HEAP% 73 | goto endInit 74 | 75 | @REM Reaching here means variables are defined and arguments have been captured 76 | :endInit 77 | 78 | %JAVACMD% %JAVA_OPTS% %EXTRA_JVM_ARGUMENTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="batch-import" -Dapp.repo="%REPO%" -Dbasedir="%BASEDIR%" org.neo4j.batchimport.Importer %CMD_LINE_ARGS% 79 | if ERRORLEVEL 1 goto error 80 | goto end 81 | 82 | :error 83 | if "%OS%"=="Windows_NT" @endlocal 84 | set ERROR_CODE=1 85 | 86 | :end 87 | @REM set local scope for the variables with windows NT shell 88 | if "%OS%"=="Windows_NT" goto endNT 89 | 90 | @REM For old DOS remove the set variables from ENV - we assume they were not set 91 | @REM before we started - at least we don't leave any baggage around 92 | set CMD_LINE_ARGS= 93 | goto postExec 94 | 95 | :endNT 96 | @endlocal 97 | 98 | :postExec 99 | 100 | if "%FORCE_EXIT_ON_ERROR%" == "on" ( 101 | if %ERROR_CODE% NEQ 0 exit %ERROR_CODE% 102 | ) 103 | 104 | exit /B %ERROR_CODE% 105 | -------------------------------------------------------------------------------- /import.sh: -------------------------------------------------------------------------------- 1 | if [ ! -d lib ]; then 2 | echo lib directory of binary download missing. Please download the zip or run import-mvn.sh 3 | exit 1 4 | fi 5 | 6 | HEAP=4G 7 | 8 | # Detect Cygwin 9 | case `uname -s` in 10 | CYGWIN*) 11 | cygwin=1 12 | esac 13 | 14 | DB=${1-target/graph.db} 15 | shift 16 | NODES=${1-nodes.csv} 17 | shift 18 | RELS=${1-rels.csv} 19 | shift 20 | CP="" 21 | base=`dirname "$0"` 22 | if [ \! -z "$cygwin" ]; then 23 | wbase=`cygpath -w "$base"` 24 | fi 25 | curdir=`pwd` 26 | cd "$base" 27 | for i in lib/*.jar; do 28 | if [ -z "$cygwin" ]; then 29 | CP="$CP":"$base/$i" 30 | else 31 | i=`cygpath -w "$i"` 32 | CP="$CP;$wbase/$i" 33 | fi 34 | done 35 | cd "$curdir" 36 | #echo java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.Importer batch.properties "$DB" "$NODES" "$RELS" "$@" 37 | java -classpath "$CP" -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.Importer batch.properties "$DB" "$NODES" "$RELS" "$@" 38 | -------------------------------------------------------------------------------- /import_csv.sh: -------------------------------------------------------------------------------- 1 | source ./settings.sh 2 | 3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.ParallelImporter -Dexec.classpathScope=test -Dexec.args="/mnt/parallel.db nodes.csv rels.csv 100000000 4 50 100 2 ONE,TWO,THREE,FOUR,FIVE,SIX,SEVEN,EIGHT,NINE,TEN" 4 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | org.neo4j 4 | batch-import 5 | 3.0.4 6 | Neo4j Batch Importer 7 | 8 | UTF-8 9 | 3.0.4 10 | GPL-3-header.txt 11 | 12 | 13 | 14 | Neo4j Snapshots 15 | http://m2.neo4j.org/content/repositories/snapshots 16 | 17 | 18 | 19 | 20 | 21 | GNU General Public License, Version 3 22 | http://www.gnu.org/licenses/gpl-3.0-standalone.html 23 | The software ("Software") developed and owned by Network Engine for 24 | Objects in Lund AB (referred to in this notice as "Neo Technology") is 25 | licensed under the GNU GENERAL PUBLIC LICENSE Version 3 to all third 26 | parties and that license is included below. 27 | 28 | However, if you have executed an End User Software License and Services 29 | Agreement or an OEM Software License and Support Services Agreement, or 30 | another commercial license agreement with Neo Technology or one of its 31 | affiliates (each, a "Commercial Agreement"), the terms of the license in 32 | such Commercial Agreement will supersede the GNU GENERAL PUBLIC LICENSE 33 | Version 3 and you may use the Software solely pursuant to the terms of 34 | the relevant Commercial Agreement. 35 | 36 | 37 | 38 | 39 | 40 | 41 | net.sf.opencsv 42 | opencsv 43 | 2.3 44 | 45 | 46 | org.mapdb 47 | mapdb 48 | 0.9.3 49 | 50 | 51 | junit 52 | junit 53 | 4.8.1 54 | test 55 | 56 | 57 | log4j 58 | log4j 59 | 1.2.17 60 | 61 | 62 | org.mockito 63 | mockito-core 64 | 1.8.5 65 | test 66 | 67 | 68 | org.neo4j 69 | neo4j-kernel 70 | ${neo4j.version} 71 | 72 | 73 | org.neo4j 74 | neo4j-enterprise 75 | ${neo4j.version} 76 | 77 | 78 | org.neo4j 79 | neo4j-lucene-index 80 | ${neo4j.version} 81 | 82 | 83 | 84 | 85 | 86 | org.apache.maven.plugins 87 | maven-compiler-plugin 88 | 2.1 89 | 90 | 1.7 91 | 1.7 92 | 93 | 94 | 95 | maven-assembly-plugin 96 | 97 | 98 | batch-import 99 | 100 | 101 | 102 | org.neo4j.batchimport.Importer 103 | 104 | 105 | 106 | jar-with-dependencies 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Neo4j (CSV) Batch Importer 2 | 3 | ## Neo4j 2.2+ neo4j-import tool 4 | 5 | 此工具的具体使用步骤可参见本人博客:https://my.oschina.net/u/2538940/blog/883829 6 | 7 | 直接下载已编译好的工具: 8 | https://github.com/mo9527/batch-import-tool 9 | 10 | 与原始版本比较,本版本主要做了以下修改:\ 11 | 1、修复了导入.gz关系压缩文件时,win环境会出现关系无法导入的情况。\ 12 | 2、增加程序对csv文件的容错性,即使csv文件的某一行出现编码或断行问题,也不会影响接下来数据的\ 13 | 导入,前提是每一行数据的字节长度不超过5000。如果超过5000,可自行修改org.neo4j.batchimport.CSVParser\ 14 | 文件的第171行。 15 | 16 | 17 | Since version 2.2.0 Neo4j comes with an **high performance import tool** out of the box that takes many ideas of this one, but is way more scalable across CPUs and has little memory requirements. 18 | 19 | The only cases that are not covered are repeated imports in existing stores and population of manual indexes. Please consider the built-in and officially supported tool first, before falling back onto this one. 20 | 21 | The simplest invocation is `/path/to/neo4j/bin/neo4j-import --into graph.db --nodes nodes.csv --relationships rels.csv` with the header format being similar to this one. For a quick intro check the [developer pages](http://neo4j.com/developer/guide-import-csv/#_super_fast_batch_importer_for_huge_datasets). 22 | There is much more to it, please see the [Neo4j reference manual](http://neo4j.com/docs/stable/import-tool.html). 23 | 24 | ## Licensing 25 | 26 | This software is licensed under the [GPLv3](http://www.gnu.org/licenses/gpl-3.0.en.html) for now. 27 | You can ask [Neo Technology](http://neotechnology.com) about a different licensing agreement. 28 | 29 | __Works with Neo4j 2.x__ 30 | 31 | ## Binary Download 32 | 33 | To simply use it (no source/git/maven required): 34 | * [download 2.2 zip](https://dl.dropboxusercontent.com/u/14493611/batch_importer_22.zip) 35 | * unzip 36 | * run `import.sh test.db nodes.csv rels.csv` (on Windows: `import.bat`) 37 | * after the import point your `/path/to/neo4j/conf/neo4j-server.properties` to this `test.db` directory, 38 | or copy the data over to your server `cp -r test.db/* /path/to/neo4j/data/graph.db/` 39 | 40 | You provide one **tab separated** csv file for nodes and one for relationships (optionally more for indexes) 41 | 42 | Example data for the files is a small family network 43 | 44 | ## File format 45 | 46 | * **tab separated** csv files 47 | * Property names in first row. 48 | * If only one file is initially imported, the row number corresponds to the node-id (*starting with 0*) 49 | * Property values not listed will not be set on the nodes or relationships. 50 | * Optionally property fields can have a type (defaults to String) indicated with name:type where type is one of 51 | (int, long, float, double, boolean, byte, short, char, string). The string value is then converted to that type. 52 | Conversion failure will result in abort of the import operation. 53 | * There is a separate "label" type, which should be used for relationship types and/or node labels, (`labels:label`) 54 | * Property fields may also be arrays by adding "_array" to the types above and separating the data with commas. 55 | * for non-ascii characters make sure to add `-Dfile.encoding=UTF-8` to the commandline arguments 56 | * Optionally automatic indexing of properties can be configured with a header like `name:string:users` and a configured index in `batch.properties` like `batch_import.node_index=exact` 57 | then the property `name` will be indexed in the `users` index for each row with a value there 58 | * multiple files for nodes and rels, comma separated, without spaces like "node1.csv,node2.csv" 59 | * you can specify concrete, externally provided node-id's with: `i:id`, both in the node and relationship-files 60 | * csv files can be zipped individually as *.gz or *.zip 61 | 62 | ## Examples 63 | 64 | There is also a `sample` directory, please run from the main directory `./import.sh test.db sample/nodes.csv sample/rels.csv` 65 | 66 | ### nodes.csv 67 | 68 | name l:label age works_on 69 | Michael Person,Father 37 neo4j 70 | Selina Person,Child 14 71 | Rana Person,Child 6 72 | Selma Person,Child 4 73 | 74 | ### rels.csv 75 | 76 | Note that the node-id references are numbered from 0 (since Neo4j 2.0) 77 | 78 | start end type since counter:int 79 | 0 1 FATHER_OF 1998-07-10 1 80 | 0 2 FATHER_OF 2007-09-15 2 81 | 0 3 FATHER_OF 2008-05-03 3 82 | 2 3 SISTER_OF 2008-05-03 5 83 | 1 2 SISTER_OF 2007-09-15 7 84 | 85 | 86 | ## Execution 87 | 88 | Just use the provided shell script `import.sh` or `import.bat` on Windows 89 | 90 | import.sh test.db nodes.csv rels.csv 91 | 92 | 93 | ### For Developers 94 | 95 | If you want to work on the code and run the importer after making changes: 96 | 97 | mvn clean compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" -Dexec.args="neo4j/data/graph.db nodes.csv rels.csv" 98 | 99 | or 100 | 101 | java -server -Dfile.encoding=UTF-8 -Xmx4G -jar target/batch-import-jar-with-dependencies.jar neo4j/data/graph.db nodes.csv rels.csv 102 | 103 | 104 | ynagzet:batchimport mh$ rm -rf target/db 105 | ynagzet:batchimport mh$ mvn clean compile assembly:single 106 | [INFO] Scanning for projects... 107 | [INFO] ------------------------------------------------------------------------ 108 | [INFO] Building Simple Batch Importer 109 | [INFO] task-segment: [clean, compile, assembly:single] 110 | [INFO] ------------------------------------------------------------------------ 111 | ... 112 | [INFO] Building jar: /Users/mh/java/neo/batchimport/target/batch-import-jar-with-dependencies.jar 113 | [INFO] ------------------------------------------------------------------------ 114 | [INFO] BUILD SUCCESSFUL 115 | [INFO] ------------------------------------------------------------------------ 116 | ynagzet:batchimport mh$ java -server -Xmx4G -jar target/batch-import-jar-with-dependencies.jar target/db nodes.csv rels.csv 117 | Physical mem: 16384MB, Heap size: 3640MB 118 | 119 | Configuration: 120 | use_memory_mapped_buffers=false 121 | neostore.nodestore.db.mapped_memory=200M 122 | neostore.relationshipstore.db.mapped_memory=1000M 123 | neostore.propertystore.db.mapped_memory=1000M 124 | neostore.propertystore.db.strings.mapped_memory=100M 125 | neostore.propertystore.db.arrays.mapped_memory=215M 126 | neo_store=/Users/mh/java/neo/batchimport/test.db 127 | dump_configuration=true 128 | cache_type=none 129 | 130 | ........................................................................... 131 | Importing 7500000 Nodes took 17 seconds 132 | ....................................................................................................35818 ms 133 | ....................................................................................................39343 ms 134 | ....................................................................................................41788 ms 135 | ....................................................................................................48897 ms 136 | ............ 137 | Importing 41246740 Relationships took 170 seconds 138 | Total 212 seconds 139 | ynagzet:batchimport mh$ du -sh test.db 140 | 3,2G test.db 141 | 142 | ## Parameters 143 | 144 | *First parameter* MIGHT be the property-file name, if so it has to end with `.properties`, then this file will be used and all other parameters are consumed as usual 145 | 146 | *First parameter* - the graph database directory, a new db will be created in the directory except when `batch_import.keep_db=true` is set in `batch.properties`. 147 | 148 | *Second parameter* - a comma separated list of *node-csv-files* 149 | 150 | *Third parameter* - a comma separated list of *relationship-csv-files* 151 | 152 | It is also possible to specify those two file-lists in the config: 153 | 154 | ```` 155 | batch_import.nodes_files=nodes1.csv[,nodes2.csv] 156 | batch_import.rels_files=rels1.csv[,rels2.csv] 157 | ```` 158 | 159 | *Fourth parameter* - index configuration each a set of 4 values: `node_index users fulltext nodes_index.csv` or more generally: `node-or-rel-index index-name index-type index-file` 160 | 161 | This parameter set can be repeatedly used, see below. It is also possible to configure this in the config (`batch.properties`) 162 | 163 | ```` 164 | batch_import.node_index.users=exact 165 | ```` 166 | 167 | ## Schema indexes 168 | 169 | Currently schema indexes are not created by the batch-inserter, you could create them upfront and use `batch_import.keep_db=true` to work with the existing database. 170 | You then have the option of specifying labels for your nodes using a column header like `type:label` and a comma separated list of label values. 171 | Then on shutdown of the import Neo4j will populate the schema indexes with nodes with the appropriate labels and properties automatically. 172 | (The index creation is As a rough estimate the index creation will 173 | 174 | ## (Legacy) Indexing 175 | 176 | ### Indexing of inserted properties 177 | 178 | You can automatically index properties of nodes and relationships by adding ":indexName" to the property-column-header. 179 | Just configure the indexes in `batch.properties` like so: 180 | 181 | ```` 182 | batch_import.node_index.users=exact 183 | ```` 184 | 185 | ```` 186 | name:string:users age works_on 187 | Michael 37 neo4j 188 | Selina 14 189 | Rana 6 190 | Selma 4 191 | ```` 192 | 193 | **If you use `node_auto_index` as the index name, you can also initially populate Neo4j's automatic node index which is then 194 | later used and and updated while working with the database.** 195 | 196 | 197 | In the relationships-file you can optionally specify that the start and end-node should be looked up from the index in the same way 198 | 199 | ```` 200 | name:string:users name:string:users type since counter:int 201 | Michael Selina FATHER_OF 1998-07-10 1 202 | Michael Rana FATHER_OF 2007-09-15 2 203 | Michael Selma FATHER_OF 2008-05-03 3 204 | Rana Selma SISTER_OF 2008-05-03 5 205 | Selina Rana SISTER_OF 2007-09-15 7 206 | ```` 207 | 208 | ### Explicit Indexing 209 | 210 | Optionally you can add nodes and relationships to indexes. 211 | 212 | Add four arguments per each index to command line: 213 | 214 | To create a full text node index called users using nodes_index.csv: 215 | 216 | ```` 217 | node_index users fulltext nodes_index.csv 218 | ```` 219 | 220 | To create an exact relationship index called worked using rels_index.csv: 221 | 222 | ```` 223 | rel_index worked exact rels_index.csv 224 | ```` 225 | 226 | Example command line: 227 | 228 | ```` 229 | ./import.sh test.db nodes.csv rels.csv node_index users fulltext nodes_index.csv rel_index worked exact rels_index.csv 230 | ```` 231 | 232 | ### Using Neo4j's Automatic Indexing 233 | 234 | The auto-indexing elsewhere in this file pertains to the *batch inserter's* ability to automatically index. If you want to 235 | use this cool feature from the batch inserter, there's a little gotcha. You still need to enable the batch inserter's feature 236 | with `batch_import.node_index` but then instead of specifying the name of a regular index, specify the auto index's name like so: 237 | 238 | ```` 239 | batch_import.node_index.node_auto_index=exact 240 | ```` 241 | 242 | And you have to make sure to also enable automatic indexing in your regular Neo4j database's (`conf/neo4j.properties`) and 243 | specify the correct node properties to be indexed. 244 | 245 | ## Examples 246 | 247 | ### nodes_index.csv 248 | 249 | ```` 250 | id name language 251 | 0 Victor Richards West Frisian 252 | 1 Virginia Shaw Korean 253 | 2 Lois Simpson Belarusian 254 | 3 Randy Bishop Hiri Motu 255 | 4 Lori Mendoza Tok Pisin 256 | ```` 257 | 258 | ### rels_index.csv 259 | 260 | ```` 261 | id property1 property2 262 | 0 cwqbnxrv rpyqdwhk 263 | 1 qthnrret tzjmmhta 264 | 2 dtztaqpy pbmcdqyc 265 | ```` 266 | 267 | ## Configuration 268 | 269 | The Importer uses a supplied `batch.properties` file to be configured: 270 | 271 | #### Memory Mapping I/O Config 272 | 273 | Most important is the memory config, you should try to have enough RAM map as much of your store-files to memory as possible. 274 | 275 | At least the node-store and large parts of the relationship-store should be mapped. The property- and string-stores are mostly 276 | append only so don't need that much RAM. Below is an example for about 6GB RAM, to leave room for the heap and also OS and OS caches. 277 | 278 | ```` 279 | cache_type=none 280 | use_memory_mapped_buffers=true 281 | # 14 bytes per node 282 | neostore.nodestore.db.mapped_memory=200M 283 | # 33 bytes per relationships 284 | neostore.relationshipstore.db.mapped_memory=3G 285 | # 38 bytes per property 286 | neostore.propertystore.db.mapped_memory=500M 287 | # 60 bytes per long-string block 288 | neostore.propertystore.db.strings.mapped_memory=500M 289 | neostore.propertystore.db.index.keys.mapped_memory=5M 290 | neostore.propertystore.db.index.mapped_memory=5M 291 | ```` 292 | 293 | #### Indexes (experimental) 294 | 295 | ```` 296 | batch_import.node_index.users=exact 297 | batch_import.node_index.articles=fulltext 298 | batch_import.relationship_index.friends=exact 299 | ```` 300 | 301 | #### CSV (experimental) 302 | 303 | ```` 304 | batch_import.csv.quotes=true // default, set to false for faster, experimental csv-reader 305 | batch_import.csv.delim=, 306 | ```` 307 | 308 | ##### Index-Cache (experimental) 309 | 310 | ```` 311 | batch_import.mapdb_cache.disable=true 312 | ```` 313 | 314 | ##### Keep Database (experimental) 315 | 316 | ```` 317 | batch_import.keep_db=true 318 | ```` 319 | 320 | ## Utilities 321 | 322 | ### TestDataGenerator 323 | 324 | It is a dumb random test data generator (`org.neo4j.batchimport.TestDataGenerator`) that you can run with 325 | 326 | ./generate.sh #nodes #max-rels-per-node REL1,REL2,REL3 LABEL1,LABEL2,LABEL3 327 | 328 | Will generate nodes.csv and rels.csv for those numbers 329 | 330 | 331 | ### Relationship-Sorter 332 | 333 | Sorts a given relationship-CSV file by min(start,end) as required for the parallel sorter. Uses the data-pump sorter from mapdb 334 | for the actual sorting with a custom Comparator. 335 | 336 | `org.neo4j.batchimport.utils.RelationshipSorter` rels-input.csv rels-output.csv 337 | 338 | 339 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | . ./settings.sh 2 | 3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.DisruptorTest -Dexec.classpathScope=test -------------------------------------------------------------------------------- /sample/batch.properties: -------------------------------------------------------------------------------- 1 | dump_configuration=false 2 | cache_type=none 3 | use_memory_mapped_buffers=true 4 | neostore.propertystore.db.index.keys.mapped_memory=5M 5 | neostore.propertystore.db.index.mapped_memory=5M 6 | neostore.nodestore.db.mapped_memory=200M 7 | neostore.relationshipstore.db.mapped_memory=500M 8 | neostore.propertystore.db.mapped_memory=200M 9 | neostore.propertystore.db.strings.mapped_memory=200M 10 | 11 | batch_import.node_index.users=exact -------------------------------------------------------------------------------- /sample/import.sh: -------------------------------------------------------------------------------- 1 | echo "Run in main directory sh sample/import.sh" 2 | mvn test-compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" \ 3 | -Dexec.args="sample/batch.properties target/graph.db sample/nodes.csv,sample/nodes2.csv sample/rels.csv" -------------------------------------------------------------------------------- /sample/nodes.csv: -------------------------------------------------------------------------------- 1 | name:string:users age works_on 2 | Michael 37 neo4j 3 | Selina 14 -------------------------------------------------------------------------------- /sample/nodes2.csv: -------------------------------------------------------------------------------- 1 | name:string:users age works_on 2 | Rana 6 3 | Selma 4 -------------------------------------------------------------------------------- /sample/rels.csv: -------------------------------------------------------------------------------- 1 | name:string:users name:string:users type since counter:int 2 | Michael Selina FATHER_OF 1998-07-10 1 3 | Michael Rana FATHER_OF 2007-09-15 2 4 | Michael Selma FATHER_OF 2008-05-03 3 5 | Rana Selma SISTER_OF 2008-05-03 5 6 | Selina Rana SISTER_OF 2007-09-15 7 -------------------------------------------------------------------------------- /settings.sh: -------------------------------------------------------------------------------- 1 | MEMORY_OPTS="-Xmx50G -Xms50G -server -d64 -Xmn3g -XX:SurvivorRatio=2" 2 | GC_OPTS="-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:ParallelCMSThreads=4 -XX:+CMSParallelRemarkEnabled -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycle=10 -XX:CMSFullGCsBeforeCompaction=1 " 3 | 4 | PRINT_GC_OPTS="-XX:+PrintTenuringDistribution -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:gc.log" 5 | 6 | # PROFILE_OPTS="-agentpath:/root/yourkit/bin/linux-x86-64/libyjpagent.so=port=10001" 7 | 8 | #-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintHeapAtGC -XX:+PrintGCTaskTimeStamps 9 | 10 | export MAVEN_OPTS="$PROFILE_OPTS $MEMORY_OPTS $GC_OPTS $PRINT_GC_OPTS" -------------------------------------------------------------------------------- /sort.sh: -------------------------------------------------------------------------------- 1 | HEAP=4G 2 | IN=${1-rels.csv} 3 | shift 4 | OUT=${1-rels-sorted.csv} 5 | CP="" 6 | for i in lib/*.jar; do CP="$CP":"$i"; done 7 | 8 | echo java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.utils.RelationshipSorter "$IN" "$OUT" 9 | java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.utils.RelationshipSorter "$IN" "$OUT" 10 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/CSVParser.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | /** 4 | Copyright 2005 Bytecode Pty Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | import java.io.IOException; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | /** 24 | * A very simple CSV parser released under a commercial-friendly license. 25 | * This just implements splitting a single line into fields. 26 | * 27 | * @author Glen Smith 28 | * @author Rainer Pruy 29 | */ 30 | public class CSVParser { 31 | 32 | private final char separator; 33 | 34 | private final char quotechar; 35 | 36 | private final char escape; 37 | 38 | private final boolean strictQuotes; 39 | 40 | private String pending; 41 | private boolean inField = false; 42 | 43 | private final boolean ignoreLeadingWhiteSpace; 44 | 45 | /** 46 | * The default separator to use if none is supplied to the constructor. 47 | */ 48 | public static final char DEFAULT_SEPARATOR = ','; 49 | 50 | public static final int INITIAL_READ_SIZE = 128; 51 | 52 | /** 53 | * The default quote character to use if none is supplied to the 54 | * constructor. 55 | */ 56 | public static final char DEFAULT_QUOTE_CHARACTER = '"'; 57 | 58 | 59 | /** 60 | * The default escape character to use if none is supplied to the 61 | * constructor. 62 | */ 63 | public static final char DEFAULT_ESCAPE_CHARACTER = '\\'; 64 | 65 | /** 66 | * The default strict quote behavior to use if none is supplied to the 67 | * constructor 68 | */ 69 | public static final boolean DEFAULT_STRICT_QUOTES = false; 70 | 71 | /** 72 | * The default leading whitespace behavior to use if none is supplied to the 73 | * constructor 74 | */ 75 | public static final boolean DEFAULT_IGNORE_LEADING_WHITESPACE = true; 76 | 77 | /** 78 | * This is the "null" character - if a value is set to this then it is ignored. 79 | * I.E. if the quote character is set to null then there is no quote character. 80 | */ 81 | public static final char NULL_CHARACTER = '\0'; 82 | 83 | /** 84 | * Constructs CSVParser using a comma for the separator. 85 | */ 86 | public CSVParser() { 87 | this(DEFAULT_SEPARATOR, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER); 88 | } 89 | 90 | /** 91 | * Constructs CSVParser with supplied separator. 92 | * 93 | * @param separator the delimiter to use for separating entries. 94 | */ 95 | public CSVParser(char separator) { 96 | this(separator, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER); 97 | } 98 | 99 | 100 | /** 101 | * Constructs CSVParser with supplied separator and quote char. 102 | * 103 | * @param separator the delimiter to use for separating entries 104 | * @param quotechar the character to use for quoted elements 105 | */ 106 | public CSVParser(char separator, char quotechar) { 107 | this(separator, quotechar, DEFAULT_ESCAPE_CHARACTER); 108 | } 109 | 110 | /** 111 | * Constructs CSVReader with supplied separator and quote char. 112 | * 113 | * @param separator the delimiter to use for separating entries 114 | * @param quotechar the character to use for quoted elements 115 | * @param escape the character to use for escaping a separator or quote 116 | */ 117 | public CSVParser(char separator, char quotechar, char escape) { 118 | this(separator, quotechar, escape, DEFAULT_STRICT_QUOTES); 119 | } 120 | 121 | /** 122 | * Constructs CSVReader with supplied separator and quote char. 123 | * Allows setting the "strict quotes" flag 124 | * 125 | * @param separator the delimiter to use for separating entries 126 | * @param quotechar the character to use for quoted elements 127 | * @param escape the character to use for escaping a separator or quote 128 | * @param strictQuotes if true, characters outside the quotes are ignored 129 | */ 130 | public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) { 131 | this(separator, quotechar, escape, strictQuotes, DEFAULT_IGNORE_LEADING_WHITESPACE); 132 | } 133 | 134 | /** 135 | * Constructs CSVReader with supplied separator and quote char. 136 | * Allows setting the "strict quotes" and "ignore leading whitespace" flags 137 | * 138 | * @param separator the delimiter to use for separating entries 139 | * @param quotechar the character to use for quoted elements 140 | * @param escape the character to use for escaping a separator or quote 141 | * @param strictQuotes if true, characters outside the quotes are ignored 142 | * @param ignoreLeadingWhiteSpace if true, white space in front of a quote in a field is ignored 143 | */ 144 | public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) { 145 | if (anyCharactersAreTheSame(separator, quotechar, escape)) { 146 | throw new UnsupportedOperationException("The separator, quote, and escape characters must be different!"); 147 | } 148 | if (separator == NULL_CHARACTER) { 149 | throw new UnsupportedOperationException("The separator character must be defined!"); 150 | } 151 | this.separator = separator; 152 | this.quotechar = quotechar; 153 | this.escape = escape; 154 | this.strictQuotes = strictQuotes; 155 | this.ignoreLeadingWhiteSpace = ignoreLeadingWhiteSpace; 156 | } 157 | 158 | private boolean anyCharactersAreTheSame(char separator, char quotechar, char escape) { 159 | return isSameCharacter(separator, quotechar) || isSameCharacter(separator, escape) || isSameCharacter(quotechar, escape); 160 | } 161 | 162 | private boolean isSameCharacter(char c1, char c2) { 163 | return c1 != NULL_CHARACTER && c1 == c2; 164 | } 165 | 166 | /** 167 | * @return true if something was left over from last call(s) 168 | */ 169 | public boolean isPending() { 170 | //防止csv文件断行出现错误,这里进行适当的介入,当pengding超过5000的长度时,则假定csv的断行出现了问题,防止假死,杜绝吃死内存 171 | if (pending != null && pending.length() > 5000){ 172 | pending = null; 173 | return false; 174 | } 175 | return pending != null; 176 | } 177 | 178 | public String[] parseLineMulti(String nextLine) throws IOException { 179 | return parseLine(nextLine, true); 180 | } 181 | 182 | public String[] parseLine(String nextLine) throws IOException { 183 | return parseLine(nextLine, false); 184 | } 185 | 186 | /** 187 | * Parses an incoming String and returns an array of elements. 188 | * 189 | * @param nextLine the string to parse 190 | * @param multi 191 | * @return the comma-tokenized list of elements, or null if nextLine is null 192 | * @throws IOException if bad things happen during the read 193 | */ 194 | private String[] parseLine(String nextLine, boolean multi) throws IOException { 195 | 196 | if (!multi && pending != null) { 197 | pending = null; 198 | } 199 | 200 | if (nextLine == null) { 201 | if (pending != null) { 202 | String s = pending; 203 | pending = null; 204 | return new String[]{s}; 205 | } else { 206 | return null; 207 | } 208 | } 209 | 210 | List tokensOnThisLine = new ArrayList(); 211 | StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE); 212 | boolean inQuotes = false; 213 | if (pending != null) { 214 | sb.append(pending); 215 | pending = null; 216 | inQuotes = true; 217 | } 218 | for (int i = 0; i < nextLine.length(); i++) { 219 | 220 | char c = nextLine.charAt(i); 221 | if (c == this.escape) { 222 | if (isNextCharacterEscapable(nextLine, inQuotes || inField, i)) { 223 | sb.append(nextLine.charAt(i + 1)); 224 | i++; 225 | } 226 | } else if (c == quotechar) { 227 | if (isNextCharacterEscapedQuote(nextLine, inQuotes || inField, i)) { 228 | sb.append(nextLine.charAt(i + 1)); 229 | i++; 230 | } else { 231 | //inQuotes = !inQuotes; 232 | 233 | // the tricky case of an embedded quote in the middle: a,bc"d"ef,g 234 | if (!strictQuotes) { 235 | if (i > 2 //not on the beginning of the line 236 | && nextLine.charAt(i - 1) != this.separator //not at the beginning of an escape sequence 237 | && nextLine.length() > (i + 1) && 238 | nextLine.charAt(i + 1) != this.separator //not at the end of an escape sequence 239 | ) { 240 | 241 | if (ignoreLeadingWhiteSpace && sb.length() > 0 && isAllWhiteSpace(sb)) { 242 | sb.setLength(0); //discard white space leading up to quote 243 | } else { 244 | sb.append(c); 245 | //continue; 246 | } 247 | 248 | } 249 | } 250 | 251 | inQuotes = !inQuotes; 252 | } 253 | inField = !inField; 254 | } else if (c == separator && !inQuotes) { 255 | tokensOnThisLine.add(sb.toString()); 256 | sb.setLength(0); // start work on next token 257 | inField = false; 258 | } else { 259 | if (!strictQuotes || inQuotes) { 260 | sb.append(c); 261 | inField = true; 262 | } 263 | } 264 | } 265 | // line is done - check status 266 | if (inQuotes) { 267 | if (multi) { 268 | // continuing a quoted section, re-append newline 269 | sb.append("\n"); 270 | pending = sb.toString(); 271 | sb = null; // this partial content is not to be added to field list yet 272 | } else { 273 | throw new IOException("Un-terminated quoted field at end of CSV line"); 274 | } 275 | } 276 | if (sb != null) { 277 | tokensOnThisLine.add(sb.toString()); 278 | } 279 | return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]); 280 | 281 | } 282 | 283 | /** 284 | * precondition: the current character is a quote or an escape 285 | * 286 | * @param nextLine the current line 287 | * @param inQuotes true if the current context is quoted 288 | * @param i current index in line 289 | * @return true if the following character is a quote 290 | */ 291 | private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) { 292 | return inQuotes // we are in quotes, therefore there can be escaped quotes in here. 293 | && nextLine.length() > (i + 1) // there is indeed another character to check. 294 | && nextLine.charAt(i + 1) == quotechar; 295 | } 296 | 297 | /** 298 | * precondition: the current character is an escape 299 | * 300 | * @param nextLine the current line 301 | * @param inQuotes true if the current context is quoted 302 | * @param i current index in line 303 | * @return true if the following character is a quote 304 | */ 305 | protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) { 306 | return inQuotes // we are in quotes, therefore there can be escaped quotes in here. 307 | && nextLine.length() > (i + 1) // there is indeed another character to check. 308 | && (nextLine.charAt(i + 1) == quotechar || nextLine.charAt(i + 1) == this.escape); 309 | } 310 | 311 | /** 312 | * precondition: sb.length() > 0 313 | * 314 | * @param sb A sequence of characters to examine 315 | * @return true if every character in the sequence is whitespace 316 | */ 317 | protected boolean isAllWhiteSpace(CharSequence sb) { 318 | boolean result = true; 319 | for (int i = 0; i < sb.length(); i++) { 320 | char c = sb.charAt(i); 321 | 322 | if (!Character.isWhitespace(c)) { 323 | return false; 324 | } 325 | } 326 | return result; 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/CSVReader.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | /** 4 | Copyright 2005 Bytecode Pty Ltd. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | import java.io.BufferedReader; 20 | import java.io.Closeable; 21 | import java.io.IOException; 22 | import java.io.Reader; 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | /** 27 | * A very simple CSV reader released under a commercial-friendly license. 28 | * 29 | * @author Glen Smith 30 | * 31 | */ 32 | public class CSVReader implements Closeable { 33 | 34 | private BufferedReader br; 35 | 36 | private boolean hasNext = true; 37 | 38 | private CSVParser parser; 39 | 40 | private int skipLines; 41 | 42 | private boolean linesSkiped; 43 | 44 | /** 45 | * The default line to start reading. 46 | */ 47 | public static final int DEFAULT_SKIP_LINES = 0; 48 | 49 | /** 50 | * Constructs CSVReader using a comma for the separator. 51 | * 52 | * @param reader 53 | * the reader to an underlying CSV source. 54 | */ 55 | public CSVReader(Reader reader) { 56 | this(reader, CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER); 57 | } 58 | 59 | /** 60 | * Constructs CSVReader with supplied separator. 61 | * 62 | * @param reader 63 | * the reader to an underlying CSV source. 64 | * @param separator 65 | * the delimiter to use for separating entries. 66 | */ 67 | public CSVReader(Reader reader, char separator) { 68 | this(reader, separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER); 69 | } 70 | 71 | /** 72 | * Constructs CSVReader with supplied separator and quote char. 73 | * 74 | * @param reader 75 | * the reader to an underlying CSV source. 76 | * @param separator 77 | * the delimiter to use for separating entries 78 | * @param quotechar 79 | * the character to use for quoted elements 80 | */ 81 | public CSVReader(Reader reader, char separator, char quotechar) { 82 | this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES); 83 | } 84 | 85 | /** 86 | * Constructs CSVReader with supplied separator, quote char and quote handling 87 | * behavior. 88 | * 89 | * @param reader 90 | * the reader to an underlying CSV source. 91 | * @param separator 92 | * the delimiter to use for separating entries 93 | * @param quotechar 94 | * the character to use for quoted elements 95 | * @param strictQuotes 96 | * sets if characters outside the quotes are ignored 97 | */ 98 | public CSVReader(Reader reader, char separator, char quotechar, boolean strictQuotes) { 99 | this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, strictQuotes); 100 | } 101 | 102 | /** 103 | * Constructs CSVReader with supplied separator and quote char. 104 | * 105 | * @param reader 106 | * the reader to an underlying CSV source. 107 | * @param separator 108 | * the delimiter to use for separating entries 109 | * @param quotechar 110 | * the character to use for quoted elements 111 | * @param escape 112 | * the character to use for escaping a separator or quote 113 | */ 114 | 115 | public CSVReader(Reader reader, char separator, 116 | char quotechar, char escape) { 117 | this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES); 118 | } 119 | 120 | /** 121 | * Constructs CSVReader with supplied separator and quote char. 122 | * 123 | * @param reader 124 | * the reader to an underlying CSV source. 125 | * @param separator 126 | * the delimiter to use for separating entries 127 | * @param quotechar 128 | * the character to use for quoted elements 129 | * @param line 130 | * the line number to skip for start reading 131 | */ 132 | public CSVReader(Reader reader, char separator, char quotechar, int line) { 133 | this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, line, CSVParser.DEFAULT_STRICT_QUOTES); 134 | } 135 | 136 | /** 137 | * Constructs CSVReader with supplied separator and quote char. 138 | * 139 | * @param reader 140 | * the reader to an underlying CSV source. 141 | * @param separator 142 | * the delimiter to use for separating entries 143 | * @param quotechar 144 | * the character to use for quoted elements 145 | * @param escape 146 | * the character to use for escaping a separator or quote 147 | * @param line 148 | * the line number to skip for start reading 149 | */ 150 | public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) { 151 | this(reader, separator, quotechar, escape, line, CSVParser.DEFAULT_STRICT_QUOTES); 152 | } 153 | 154 | /** 155 | * Constructs CSVReader with supplied separator and quote char. 156 | * 157 | * @param reader 158 | * the reader to an underlying CSV source. 159 | * @param separator 160 | * the delimiter to use for separating entries 161 | * @param quotechar 162 | * the character to use for quoted elements 163 | * @param escape 164 | * the character to use for escaping a separator or quote 165 | * @param line 166 | * the line number to skip for start reading 167 | * @param strictQuotes 168 | * sets if characters outside the quotes are ignored 169 | */ 170 | public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) { 171 | this(reader, separator, quotechar, escape, line, strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE); 172 | } 173 | 174 | /** 175 | * Constructs CSVReader with supplied separator and quote char. 176 | * 177 | * @param reader 178 | * the reader to an underlying CSV source. 179 | * @param separator 180 | * the delimiter to use for separating entries 181 | * @param quotechar 182 | * the character to use for quoted elements 183 | * @param escape 184 | * the character to use for escaping a separator or quote 185 | * @param line 186 | * the line number to skip for start reading 187 | * @param strictQuotes 188 | * sets if characters outside the quotes are ignored 189 | * @param ignoreLeadingWhiteSpace 190 | * it true, parser should ignore white space before a quote in a field 191 | */ 192 | public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) { 193 | this.br = new BufferedReader(reader); 194 | this.parser = new CSVParser(separator, quotechar, escape, strictQuotes, ignoreLeadingWhiteSpace); 195 | this.skipLines = line; 196 | } 197 | 198 | /** 199 | * Reads the entire file into a List with each element being a String[] of 200 | * tokens. 201 | * 202 | * @return a List of String[], with each String[] representing a line of the 203 | * file. 204 | * 205 | * @throws IOException 206 | * if bad things happen during the read 207 | */ 208 | public List readAll() throws IOException { 209 | 210 | List allElements = new ArrayList(); 211 | while (hasNext) { 212 | String[] nextLineAsTokens = readNext(); 213 | if (nextLineAsTokens != null) 214 | allElements.add(nextLineAsTokens); 215 | } 216 | return allElements; 217 | 218 | } 219 | 220 | /** 221 | * Reads the next line from the buffer and converts to a string array. 222 | * 223 | * @return a string array with each comma-separated element as a separate 224 | * entry. 225 | * 226 | * @throws IOException 227 | * if bad things happen during the read 228 | */ 229 | public String[] readNext() throws IOException { 230 | 231 | String[] result = null; 232 | do { 233 | String nextLine = getNextLine(); 234 | if (!hasNext) { 235 | return result; // should throw if still pending? 236 | } 237 | String[] r = parser.parseLineMulti(nextLine); 238 | if (r.length > 0) { 239 | if (result == null) { 240 | result = r; 241 | } else { 242 | String[] t = new String[result.length+r.length]; 243 | System.arraycopy(result, 0, t, 0, result.length); 244 | System.arraycopy(r, 0, t, result.length, r.length); 245 | result = t; 246 | } 247 | } 248 | } while (parser.isPending()); 249 | return result; 250 | } 251 | 252 | /** 253 | * Reads the next line from the file. 254 | * 255 | * @return the next line from the file without trailing newline 256 | * @throws IOException 257 | * if bad things happen during the read 258 | */ 259 | private String getNextLine() throws IOException { 260 | if (!this.linesSkiped) { 261 | for (int i = 0; i < skipLines; i++) { 262 | br.readLine(); 263 | } 264 | this.linesSkiped = true; 265 | } 266 | String nextLine = br.readLine(); 267 | if (nextLine == null) { 268 | hasNext = false; 269 | } 270 | return hasNext ? nextLine : null; 271 | } 272 | 273 | /** 274 | * Closes the underlying reader. 275 | * 276 | * @throws IOException if the close fails 277 | */ 278 | public void close() throws IOException{ 279 | br.close(); 280 | } 281 | 282 | } 283 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/Importer.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.neo4j.batchimport.importer.ChunkerLineData; 4 | import org.neo4j.batchimport.importer.CsvLineData; 5 | import org.neo4j.batchimport.importer.RelType; 6 | import org.neo4j.batchimport.importer.Type; 7 | import org.neo4j.batchimport.index.MapDbCachingIndexProvider; 8 | import org.neo4j.batchimport.utils.Config; 9 | import org.neo4j.graphdb.DynamicLabel; 10 | import org.neo4j.graphdb.Label; 11 | import org.neo4j.graphdb.index.IndexManager; 12 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider; 13 | import org.neo4j.io.fs.FileUtils; 14 | import org.neo4j.unsafe.batchinsert.BatchInserter; 15 | import org.neo4j.unsafe.batchinsert.BatchInserters; 16 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider; 17 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex; 18 | 19 | import java.io.*; 20 | import java.util.*; 21 | import java.util.zip.GZIPInputStream; 22 | 23 | import static org.neo4j.batchimport.Utils.join; 24 | import static org.neo4j.index.impl.lucene.legacy.LuceneIndexImplementation.EXACT_CONFIG; 25 | import static org.neo4j.index.impl.lucene.legacy.LuceneIndexImplementation.FULLTEXT_CONFIG; 26 | 27 | public class Importer { 28 | private static final Map SPATIAL_CONFIG = Collections.singletonMap(IndexManager.PROVIDER,"spatial"); 29 | private static final Label[] NO_LABELS = new Label[0]; 30 | public static final int BATCH = 10 * 1000 * 1000; 31 | private static Report report; 32 | private final Config config; 33 | private BatchInserter db; 34 | private BatchInserterIndexProvider indexProvider; 35 | Map indexes=new HashMap(); 36 | private Label[] labelsArray = NO_LABELS; 37 | 38 | public Importer(File graphDb, final Config config) throws IOException { 39 | this.config = config; 40 | db = createBatchInserter(graphDb, config); 41 | 42 | final boolean luceneOnlyIndex = config.isCachedIndexDisabled(); 43 | indexProvider = createIndexProvider(luceneOnlyIndex); 44 | Collection indexInfos = config.getIndexInfos(); 45 | if (indexInfos!=null) { 46 | for (IndexInfo indexInfo : indexInfos) { 47 | BatchInserterIndex index = indexInfo.isNodeIndex() ? nodeIndexFor(indexInfo.indexName, indexInfo.indexType) : relationshipIndexFor(indexInfo.indexName, indexInfo.indexType); 48 | indexes.put(indexInfo.indexName, index); 49 | } 50 | } 51 | 52 | report = createReport(); 53 | } 54 | 55 | protected StdOutReport createReport() { 56 | return new StdOutReport(BATCH, 100); 57 | } 58 | 59 | protected BatchInserterIndexProvider createIndexProvider(boolean luceneOnlyIndex) { 60 | return luceneOnlyIndex ? new LuceneBatchInserterIndexProvider(db) : new MapDbCachingIndexProvider(db); 61 | } 62 | 63 | protected BatchInserter createBatchInserter(File graphDb, Config config) throws IOException { 64 | return BatchInserters.inserter(new File(graphDb.getAbsolutePath()), config.getConfigData()); 65 | } 66 | 67 | // todo multiple nodes and rels files 68 | // todo nodes and rels-files in config 69 | // todo graphdb in config 70 | public static void main(String... args) throws IOException { 71 | System.err.println("Usage: Importer data/dir nodes.csv relationships.csv [node_index node-index-name fulltext|exact nodes_index.csv rel_index rel-index-name fulltext|exact rels_index.csv ....]"); 72 | System.err.println("Using: Importer "+join(args," ")); 73 | System.err.println(); 74 | 75 | final Config config = Config.convertArgumentsToConfig(args); 76 | 77 | File graphDb = new File(config.getGraphDbDirectory()); 78 | if (graphDb.exists() && !config.keepDatabase()) { 79 | FileUtils.deleteRecursively(graphDb); 80 | } 81 | 82 | Importer importer = new Importer(graphDb, config); 83 | importer.doImport(); 84 | } 85 | 86 | void finish() { 87 | indexProvider.shutdown(); 88 | db.shutdown(); 89 | report.finish(); 90 | } 91 | 92 | void importNodes(Reader reader) throws IOException { 93 | final LineData data = createLineData(reader, 0); 94 | report.reset(); 95 | boolean hasId = data.hasId(); 96 | //重复的id不会再次建立节点 含泪删掉,因为速度太慢了 97 | // List allIds = new LinkedList(); 98 | // allIds.clear(); 99 | while (data.processLine(null)) { 100 | Map properties = data.getProperties(); 101 | // if (properties.get("id") == null || properties.get("id") == "" || allIds.contains(properties.get("id"))){ 102 | // continue; 103 | // } 104 | // allIds.add(properties.get("id")); 105 | 106 | String[] labels = data.getTypeLabels(); 107 | long id; 108 | if (hasId) { 109 | id = data.getId(); 110 | db.createNode(id, properties, labelsFor(labels)); 111 | } else { 112 | id = db.createNode(properties, labelsFor(labels)); 113 | } 114 | for (Map.Entry> entry : data.getIndexData().entrySet()) { 115 | final BatchInserterIndex index = indexFor(entry.getKey()); 116 | if (index==null) 117 | throw new IllegalStateException("Index "+entry.getKey()+" not configured."); 118 | index.add(id, entry.getValue()); 119 | } 120 | report.dots(); 121 | if (report.getCount() % BATCH == 0) flushIndexes(); 122 | } 123 | flushIndexes(); 124 | report.finishImport("Nodes"); 125 | } 126 | 127 | private Map trimDataValue(LineData data) { 128 | Map properties = data.getProperties(); 129 | for (Map.Entry entry : properties.entrySet()){ 130 | String key = entry.getKey(); 131 | Object value = entry.getValue(); 132 | if (value != null ){ 133 | properties.put(key, value.toString().trim()); 134 | } 135 | } 136 | return properties; 137 | } 138 | 139 | private Label[] labelsFor(String[] labels) { 140 | if (labels == null || labels.length == 0) return NO_LABELS; 141 | if (labels.length != labelsArray.length) labelsArray = new Label[labels.length]; 142 | for (int i = labels.length - 1; i >= 0; i--) { 143 | if (labelsArray[i] == null || !labelsArray[i].name().equals(labels[i])) 144 | labelsArray[i] = DynamicLabel.label(labels[i]); 145 | } 146 | return labelsArray; 147 | } 148 | 149 | private long lookup(String index,String property,Object value) { 150 | Long id = null; 151 | try{ 152 | id = indexFor(index).get(property, value).getSingle(); 153 | }catch (Exception e){ 154 | e.printStackTrace(); 155 | id = null; 156 | } 157 | 158 | return id==null ? -1 : id; 159 | } 160 | 161 | private BatchInserterIndex indexFor(String index) { 162 | return indexes.get(index); 163 | } 164 | 165 | void importRelationships(Reader reader) throws IOException { 166 | final int offset = 3; 167 | final LineData data = createLineData(reader, offset); 168 | final RelType relType = new RelType(); 169 | long skipped=0; 170 | report.reset(); 171 | 172 | while (data.processLine(null)) { 173 | final Map properties = data.getProperties(); 174 | final long start = id(data, 0); 175 | final long end = id(data, 1); 176 | if (start==-1 || end==-1) { 177 | skipped++; 178 | continue; 179 | } 180 | RelType type = null; 181 | try { 182 | type = relType.update(data.getRelationshipTypeLabel()); 183 | }catch (Exception e){ 184 | skipped++; 185 | continue; 186 | } 187 | 188 | final long id = db.createRelationship(start, end, type, properties); 189 | for (Map.Entry> entry : data.getIndexData().entrySet()) { 190 | indexFor(entry.getKey()).add(id, entry.getValue()); 191 | } 192 | report.dots(); 193 | } 194 | String msg = "Relationships"; 195 | if (skipped > 0) msg += " skipped (" + skipped + ")"; 196 | report.finishImport(msg); 197 | } 198 | 199 | private void flushIndexes() { 200 | for (BatchInserterIndex index : indexes.values()) { 201 | index.flush(); 202 | } 203 | } 204 | 205 | private LineData createLineData(Reader reader, int offset) { 206 | final boolean useQuotes = config.quotesEnabled(); 207 | if (useQuotes) return new CsvLineData(reader, config.getDelimChar(this),offset); 208 | return new ChunkerLineData(reader, config.getDelimChar(this), offset); 209 | } 210 | 211 | private long id(LineData data, int column) { 212 | final LineData.Header header = data.getHeader()[column]; 213 | final Object value = data.getValue(column); 214 | if (header.indexName == null || header.type == Type.ID) { 215 | return id(value); 216 | } 217 | // System.out.println("indexName: " + header.indexName); 218 | // System.out.println("name: " + header.name); 219 | // System.out.println("value: " + value); 220 | return lookup(header.indexName, header.name, value); 221 | } 222 | 223 | void importIndex(String indexName, BatchInserterIndex index, Reader reader) throws IOException { 224 | final LineData data = createLineData(reader, 1); 225 | report.reset(); 226 | while (data.processLine(null)) { 227 | final Map properties = data.getProperties(); 228 | index.add(id(data.getValue(0)), properties); 229 | report.dots(); 230 | } 231 | 232 | report.finishImport("Done inserting into " + indexName + " Index"); 233 | } 234 | 235 | private BatchInserterIndex nodeIndexFor(String indexName, String indexType) { 236 | return indexProvider.nodeIndex(indexName, configFor(indexType)); 237 | } 238 | 239 | private BatchInserterIndex relationshipIndexFor(String indexName, String indexType) { 240 | return indexProvider.relationshipIndex(indexName, configFor(indexType)); 241 | } 242 | 243 | private Map configFor(String indexType) { 244 | if (indexType.equalsIgnoreCase("fulltext")) return FULLTEXT_CONFIG; 245 | if (indexType.equalsIgnoreCase("spatial")) return SPATIAL_CONFIG; 246 | return EXACT_CONFIG; 247 | } 248 | 249 | private long id(Object id) { 250 | return Long.parseLong(id.toString()); 251 | } 252 | 253 | private void importIndex(IndexInfo indexInfo) throws IOException { 254 | File indexFile = new File(indexInfo.indexFileName); 255 | if (!indexFile.exists()) { 256 | System.err.println("Index file "+indexFile+" does not exist"); 257 | return; 258 | } 259 | importIndex(indexInfo.indexName, indexes.get(indexInfo.indexName), createFileReader(indexFile)); 260 | } 261 | 262 | private void doImport() throws IOException { 263 | try { 264 | for (File file : config.getNodesFiles()) { 265 | System.out.println("importing node file name : " + file.getName()); 266 | importNodes(createFileReader(file)); 267 | } 268 | 269 | for (File file : config.getRelsFiles()) { 270 | System.out.println("importing rel file name : " + file.getName()); 271 | importRelationships(createFileReader(file)); 272 | } 273 | 274 | for (IndexInfo indexInfo : config.getIndexInfos()) { 275 | if (indexInfo.shouldImportFile()) importIndex(indexInfo); 276 | } 277 | } finally { 278 | finish(); 279 | } 280 | } 281 | 282 | final static int BUFFERED_READER_BUFFER = 4096*512; 283 | 284 | private Reader createFileReader(File file) { 285 | try { 286 | final String fileName = file.getName(); 287 | if (fileName.endsWith(".gz") || fileName.endsWith(".zip")) { 288 | return new InputStreamReader(new GZIPInputStream(new BufferedInputStream(new FileInputStream(file)),BUFFERED_READER_BUFFER)); 289 | } 290 | final FileReader fileReader = new FileReader(file); 291 | return new BufferedReader(fileReader,BUFFERED_READER_BUFFER); 292 | } catch(Exception e) { 293 | throw new IllegalArgumentException("Error reading file "+file+" "+e.getMessage(),e); 294 | } 295 | } 296 | 297 | } 298 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/IndexInfo.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import java.io.File; 4 | import java.util.Map; 5 | 6 | /** 7 | * @author mh 8 | * @since 11.06.13 9 | */ 10 | public class IndexInfo { 11 | public IndexInfo(String[] args, int offset) { 12 | this.elementType = args[offset]; 13 | this.indexName = args[offset+1]; 14 | this.indexType = args[offset+2]; 15 | this.indexFileName = args[offset+3]; 16 | } 17 | 18 | public IndexInfo(String elementType, String indexName, String indexType, String indexFileName) { 19 | if (!(elementType.equals("node_index") || elementType.equals("relationship_index"))) throw new IllegalArgumentException("ElementType has to be node_index or relationship_index, but is "+elementType); 20 | if (!(indexType.equals("exact") || indexType.equals("fulltext"))) throw new IllegalArgumentException("IndexType has to be exact or fulltext, but is "+indexType); 21 | this.elementType = elementType; 22 | this.indexName = indexName; 23 | this.indexType = indexType; 24 | this.indexFileName = indexFileName; 25 | } 26 | 27 | public final String elementType, indexName, indexType, indexFileName; 28 | 29 | public static IndexInfo fromConfigEntry(Map.Entry entry) { 30 | if (!entry.getKey().matches("^batch_import\\.(node|relationship)_index\\..+")) return null; 31 | final String[] keyParts = entry.getKey().split("\\.", 3); 32 | final String elementType = keyParts[1]; 33 | final String indexName = keyParts[2]; 34 | final String[] valueParts = entry.getValue().split(":"); 35 | final String indexType = valueParts[0]; 36 | final String indexFileName = valueParts.length > 1 ? valueParts[1] : null; 37 | return new IndexInfo(elementType,indexName,indexType,indexFileName); 38 | } 39 | 40 | public boolean isNodeIndex() { 41 | return elementType.equals("node_index"); 42 | } 43 | 44 | public String getConfigKey() { 45 | return "batch_import."+elementType+"."+indexName; 46 | } 47 | 48 | public String getConfigValue() { 49 | if (indexFileName==null) return indexType; 50 | return indexType+":"+indexFileName; 51 | } 52 | 53 | public Map addToConfig(Map config) { 54 | config.put(getConfigKey(), getConfigValue()); 55 | return config; 56 | } 57 | 58 | public boolean shouldImportFile() { 59 | if (indexFileName == null) return false; 60 | final File file = new File(indexFileName); 61 | return file.exists() && file.isFile() && file.canRead(); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/LineData.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.neo4j.batchimport.importer.Type; 4 | 5 | import java.util.Map; 6 | 7 | public interface LineData { 8 | 9 | class Header { 10 | public Header(int column, String name, Type type, String indexName) { 11 | this.column = column; 12 | this.name = name; 13 | this.type = type; 14 | this.indexName = indexName; 15 | } 16 | 17 | public final int column; 18 | public final String name; 19 | public final Type type; 20 | public final String indexName; // todo index config in config 21 | 22 | @Override 23 | public String toString() { 24 | return column + ". " + name + 25 | (type!=null ? " type: " + type : "")+ 26 | (indexName!=null? " index: " + indexName : ""); 27 | } 28 | } 29 | boolean processLine(String line); 30 | Header[] getHeader(); 31 | long getId(); 32 | Map getProperties(); 33 | Map> getIndexData(); 34 | String[] getTypeLabels(); 35 | String getRelationshipTypeLabel(); 36 | Object getValue(int column); 37 | boolean hasId(); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/Report.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | /** 4 | * @author mh 5 | * @since 21.08.12 6 | */ 7 | public interface Report { 8 | void reset(); 9 | 10 | void finish(); 11 | 12 | void dots(); 13 | 14 | void finishImport(String type); 15 | 16 | long getCount(); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/StdOutReport.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | public class StdOutReport implements Report { 4 | private final long batch; 5 | private final long dots; 6 | private long count; 7 | private long total = System.currentTimeMillis(), time, batchTime; 8 | 9 | public StdOutReport(long batch, int dots) { 10 | this.batch = batch; 11 | this.dots = batch / dots; 12 | } 13 | 14 | @Override 15 | public void reset() { 16 | count = 0; 17 | batchTime = time = System.currentTimeMillis(); 18 | } 19 | 20 | @Override 21 | public void finish() { 22 | System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds "); 23 | } 24 | 25 | @Override 26 | public void dots() { 27 | if ((++count % dots) != 0) return; 28 | System.out.print("."); 29 | if ((count % batch) != 0) return; 30 | long now = System.currentTimeMillis(); 31 | System.out.println(" "+ (now - batchTime) + " ms for "+batch); 32 | batchTime = now; 33 | } 34 | 35 | public long getCount() { 36 | return count; 37 | } 38 | 39 | @Override 40 | public void finishImport(String type) { 41 | System.out.println("\nImporting " + count + " " + type + " took " + (System.currentTimeMillis() - time) / 1000 + " seconds "); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/Utils.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.apache.log4j.Logger; 4 | 5 | /** 6 | * @author mh 7 | * @since 27.10.12 8 | */ 9 | public class Utils { 10 | private final static Logger log = Logger.getLogger(Utils.class); 11 | 12 | public static int size(int[] ids) { 13 | if (ids==null) return 0; 14 | int count = ids.length; 15 | for (int i=count-1;i>=0;i--) { 16 | if (ids[i]!=-1) return i+1; 17 | } 18 | return count; 19 | } 20 | 21 | public static int size(long[] ids) { 22 | if (ids==null) return 0; 23 | int count = ids.length; 24 | for (int i=count-1;i>=0;i--) { 25 | if (ids[i]!=-1) return i+1; 26 | } 27 | return count; 28 | } 29 | 30 | static String join(String[] types, String delim) { 31 | StringBuilder sb =new StringBuilder(); 32 | for (String type : types) { 33 | sb.append(type).append(delim); 34 | } 35 | return sb.substring(0, sb.length() - delim.length()); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/importer/AbstractLineData.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.importer; 2 | 3 | import org.neo4j.batchimport.LineData; 4 | 5 | import java.util.Arrays; 6 | import java.util.Collections; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | 10 | import static org.neo4j.helpers.collection.MapUtil.map; 11 | 12 | public abstract class AbstractLineData implements LineData { 13 | protected final int offset; 14 | protected Object[] lineData; 15 | protected int lineSize; 16 | protected Header[] headers; 17 | int labelId = 2; 18 | int explicitLabelId = -1; 19 | private Object[] properties; 20 | protected int rows; 21 | private int propertyCount; 22 | private boolean hasIndex=false; 23 | private boolean done; 24 | private boolean hasId; 25 | 26 | public AbstractLineData(int offset) { 27 | this.offset = offset; 28 | } 29 | 30 | protected void initHeaders(Header[] headers) { 31 | this.headers = headers; 32 | lineSize=headers.length; 33 | lineData = new Object[lineSize]; 34 | } 35 | protected abstract String[] readRawRow(); 36 | 37 | protected abstract boolean readLine(); 38 | 39 | protected Header[] createHeaders(String[] fields) { 40 | if (fields[0].indexOf(".csv") != -1){ 41 | String firstLine = fields[0]; 42 | firstLine = firstLine.substring(firstLine.length() - 50, firstLine.length()).trim(); 43 | fields[0] = firstLine; 44 | } 45 | 46 | Header[] headers = new Header[fields.length]; 47 | int i=0; 48 | for (String field : fields) { 49 | String[] parts=field.split(":"); 50 | final String name = parts[0]; 51 | final String indexName = parts.length > 2 ? parts[2] : null; 52 | Type type = Type.fromString(parts.length > 1 ? parts[1] : null); 53 | if (type==Type.LABEL) { // || name.toLowerCase().matches("^(type|types|label|labels)$")) { 54 | labelId=i; 55 | type=Type.LABEL; 56 | explicitLabelId = i; 57 | } 58 | headers[i]=new Header(i, name, type, indexName); 59 | i++; 60 | hasIndex |= indexName != null; 61 | } 62 | hasId = headers[0].type == Type.ID; 63 | return headers; 64 | } 65 | 66 | protected Object[] createMapData(int lineSize, int offset) { 67 | int dataSize = Math.max(0,lineSize - offset); 68 | properties = new Object[dataSize*2]; 69 | for (int i = offset; i < dataSize; i++) { 70 | properties[(i - offset) * 2 ] = headers[i].name; 71 | } 72 | return properties; 73 | } 74 | 75 | @Override 76 | public boolean processLine(String line) { 77 | if (done) return false; 78 | return parse() > 0; 79 | } 80 | 81 | @Override 82 | public Header[] getHeader() { 83 | return headers; 84 | } 85 | 86 | @Override 87 | public long getId() { 88 | if (hasId) return (Long)getValue(0); 89 | return rows; 90 | } 91 | 92 | @Override 93 | public Map getProperties() { 94 | return properties(); 95 | } 96 | 97 | @Override 98 | public Map> getIndexData() { 99 | if (!hasIndex) return Collections.EMPTY_MAP; 100 | Map> indexData = new HashMap>(); 101 | for (int column = offset; column < headers.length; column++) { 102 | Header header = headers[column]; 103 | if (header.indexName == null) continue; 104 | Object val = getValue(column); 105 | if (val == null) continue; 106 | 107 | if (!indexData.containsKey(header.indexName)) { 108 | indexData.put(header.indexName, new HashMap()); 109 | } 110 | indexData.get(header.indexName).put(header.name,val); 111 | } 112 | return indexData; 113 | } 114 | 115 | @Override 116 | public String[] getTypeLabels() { 117 | if (explicitLabelId==-1) return null; 118 | Object labels = getValue(explicitLabelId); 119 | return labels instanceof String ? new String[]{ labels.toString() } : (String[]) labels; 120 | } 121 | 122 | @Override 123 | public String getRelationshipTypeLabel() { 124 | Object labels = getValue(labelId); 125 | return labels instanceof String[] ? ((String[])labels)[0] : (String)labels; 126 | } 127 | 128 | @Override 129 | public Object getValue(int column) { 130 | return lineData[column]; 131 | } 132 | 133 | @Override 134 | public boolean hasId() { 135 | return hasId; 136 | } 137 | 138 | private Header getHeader(int column) { 139 | return headers[column]; 140 | } 141 | 142 | private int parse() { 143 | rows++; 144 | Arrays.fill(lineData,null); 145 | done = !readLine(); 146 | return collectNonNullInData(); 147 | } 148 | 149 | private int collectNonNullInData() { 150 | propertyCount=0; 151 | int notnull = 0; 152 | for (int i = 0; i < lineSize; i++) { 153 | if (lineData[i] == null) continue; 154 | notnull++; 155 | if (i updateMap(Object... header) { 165 | processLine(null); 166 | 167 | // todo deprecate 168 | if (header.length > 0) { 169 | System.arraycopy(lineData, 0, header, 0, header.length); 170 | } 171 | 172 | return properties(); 173 | } 174 | 175 | private Map properties() { 176 | if (propertyCount == properties.length) { 177 | return map(properties); 178 | } 179 | Object[] newData=new Object[propertyCount]; 180 | System.arraycopy(properties,0,newData,0, propertyCount); 181 | return map(newData); 182 | } 183 | 184 | public int getColumnCount() { 185 | return this.propertyCount/2; 186 | } 187 | 188 | protected Object convert(int column, String value) { 189 | try { 190 | return headers[column].type == Type.STRING ? value : headers[column].type.convert(value); 191 | } catch(Exception e) { 192 | // todo potentially skip? 193 | throw new RuntimeException("Error converting value row "+rows+" column "+headers[column]+" value "+value+" error: "+e.getClass().getSimpleName()+": "+e.getMessage(),e); 194 | } 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/importer/ChunkerLineData.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.importer; 2 | 3 | import org.neo4j.batchimport.utils.Chunker; 4 | 5 | import java.io.IOException; 6 | import java.io.Reader; 7 | import java.util.*; 8 | 9 | public class ChunkerLineData extends AbstractLineData { 10 | private final Chunker chunker; 11 | 12 | public ChunkerLineData(Reader reader, char delim, int offset) { 13 | super(offset); 14 | chunker = new Chunker(reader, delim); 15 | initHeaders(createHeaders(readRawRow())); 16 | createMapData(lineSize, offset); 17 | } 18 | 19 | protected String[] readRawRow() { 20 | String value; 21 | Collection result=new ArrayList(); 22 | do { 23 | value = nextWord(); 24 | if (Chunker.NO_VALUE != value && !isEndOfLineOrFile(value)) { 25 | result.add(value); 26 | } 27 | } while (!isEndOfLineOrFile(value)); 28 | return result.toArray(new String[result.size()]); 29 | } 30 | 31 | private String nextWord() { 32 | try { 33 | return chunker.nextWord(); 34 | } catch (IOException e) { 35 | throw new RuntimeException(e); 36 | } 37 | } 38 | 39 | protected boolean readLine() { 40 | String value = null; 41 | int i=0; 42 | do { 43 | value = nextWord(); 44 | if (isEndOfLineOrFile(value)) break; 45 | if (i==lineSize) { 46 | do { 47 | value = nextWord(); 48 | } while (!isEndOfLineOrFile(value)); // consume until EOL 49 | break; 50 | } 51 | lineData[i] = Chunker.NO_VALUE == value ? null : convert(i, value); 52 | i++; 53 | } while (!isEndOfLineOrFile(value)); 54 | if (i 2 ? parts[2] : null; 42 | Type type = Type.fromString(parts.length > 1 ? parts[1] : null); 43 | if (type==Type.LABEL || name.toLowerCase().matches("^(type|types|label|labels)$")) { 44 | labelId=i; 45 | type=Type.LABEL; 46 | explicitLabelId=i; 47 | } 48 | headers[i]=new Header(i, name, type, indexName); 49 | hasIndex |= indexName != null; 50 | } 51 | hasId = headers[0].type == Type.ID; 52 | return headers; 53 | } 54 | 55 | private Object[] createMapData(int lineSize, int offset) { 56 | int dataSize = Math.max(0,lineSize - offset); 57 | properties = new Object[dataSize*2]; 58 | for (int i = offset; i < dataSize; i++) { 59 | properties[(i - offset) * 2 ] = headers[i].name; 60 | } 61 | return properties; 62 | } 63 | 64 | @Override 65 | public boolean processLine(String line) { 66 | this.propertyCount = parse(line); 67 | return true; 68 | } 69 | 70 | @Override 71 | public Header[] getHeader() { 72 | return headers; 73 | } 74 | 75 | @Override 76 | public long getId() { 77 | if (hasId) return (Long)getValue(0); 78 | return rows; 79 | } 80 | 81 | @Override 82 | public boolean hasId() { 83 | return hasId; 84 | } 85 | 86 | @Override 87 | public Map getProperties() { 88 | return properties(); 89 | } 90 | 91 | @Override 92 | public Map> getIndexData() { 93 | if (!hasIndex) return Collections.EMPTY_MAP; 94 | Map> indexData = new HashMap>(); 95 | for (int column = 0; column < headers.length; column++) { 96 | Header header = headers[column]; 97 | if (header.indexName == null) continue; 98 | 99 | if (!indexData.containsKey(header.indexName)) { 100 | indexData.put(header.indexName, new HashMap()); 101 | } 102 | indexData.get(header.indexName).put(header.name,getValue(column)); 103 | } 104 | return indexData; 105 | } 106 | 107 | @Override 108 | public String[] getTypeLabels() { 109 | if (explicitLabelId==-1) return null; 110 | Object labels = getValue(explicitLabelId); 111 | return labels instanceof String ? new String[]{ labels.toString() } : (String[]) labels; 112 | } 113 | 114 | @Override 115 | public String getRelationshipTypeLabel() { 116 | Object labels = getValue(labelId); 117 | return labels instanceof String[] ? ((String[])labels)[0] : (String)labels; 118 | } 119 | 120 | @Override 121 | public Object getValue(int column) { 122 | return getHeader(column).type.convert(lineData[column]); 123 | } 124 | 125 | private Header getHeader(int column) { 126 | return headers[column]; 127 | } 128 | 129 | private int parse(String line) { 130 | rows++; 131 | final StringTokenizer st = new StringTokenizer(line, delim,true); 132 | for (int i = 0; i < lineSize; i++) { 133 | String value = st.hasMoreTokens() ? st.nextToken() : delim; 134 | if (value.equals(delim)) { 135 | lineData[i] = null; 136 | } else { 137 | lineData[i] = value.trim().isEmpty() ? null : value; 138 | if (i< lineSize -1 && st.hasMoreTokens()) st.nextToken(); 139 | } 140 | } 141 | return collectNonNullInData(); 142 | } 143 | 144 | private int collectNonNullInData() { 145 | int count = 0; 146 | for (int i = offset; i < lineSize; i++) { 147 | if (lineData[i] == null) continue; 148 | final Header header = getHeader(i); 149 | properties[count++]= header.name; 150 | properties[count++]= getValue(i); 151 | } 152 | return count; 153 | } 154 | 155 | public Map updateMap(String line, Object... header) { 156 | processLine(line); 157 | 158 | // todo deprecate 159 | if (header.length > 0) { 160 | System.arraycopy(lineData, 0, header, 0, header.length); 161 | } 162 | 163 | return properties(); 164 | } 165 | 166 | private Map properties() { 167 | if (propertyCount == properties.length) { 168 | return map(properties); 169 | } 170 | Object[] newData=new Object[propertyCount]; 171 | System.arraycopy(properties,0,newData,0, propertyCount); 172 | return map(newData); 173 | } 174 | 175 | public int getColumnCount() { 176 | return this.propertyCount/2; 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/importer/Type.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.importer; 2 | 3 | import org.neo4j.batchimport.utils.Config; 4 | 5 | public enum Type { 6 | ID { 7 | @Override 8 | public Object convert(String value) { 9 | return Long.parseLong(value); 10 | } 11 | public boolean isProperty() { return false; } 12 | }, 13 | LABEL { 14 | @Override 15 | public Object convert(String value) { 16 | return value.trim().split("\\s*,\\s*"); 17 | } 18 | public boolean isProperty() { return false; } 19 | }, 20 | BOOLEAN { 21 | @Override 22 | public Object convert(String value) { 23 | return Boolean.valueOf(value); 24 | } 25 | }, 26 | INT { 27 | @Override 28 | public Object convert(String value) { 29 | return Integer.valueOf(value); 30 | } 31 | }, 32 | LONG { 33 | @Override 34 | public Object convert(String value) { 35 | return Long.valueOf(value); 36 | } 37 | }, 38 | DOUBLE { 39 | @Override 40 | public Object convert(String value) { 41 | return Double.valueOf(value); 42 | } 43 | }, 44 | FLOAT { 45 | @Override 46 | public Object convert(String value) { 47 | return Float.valueOf(value); 48 | } 49 | }, 50 | BYTE { 51 | @Override 52 | public Object convert(String value) { 53 | return Byte.valueOf(value); 54 | } 55 | }, 56 | SHORT { 57 | @Override 58 | public Object convert(String value) { 59 | return Short.valueOf(value); 60 | } 61 | }, 62 | CHAR { 63 | @Override 64 | public Object convert(String value) { 65 | return value.charAt(0); 66 | } 67 | }, 68 | STRING { 69 | @Override 70 | public Object convert(String value) { 71 | return value; 72 | } 73 | }, 74 | BOOLEAN_ARRAY { 75 | @Override 76 | public Object convert(String value) { 77 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 78 | boolean[] booleanArray = new boolean[strArray.length]; 79 | for(int i = 0; i < strArray.length; i++) { 80 | booleanArray[i] = Boolean.valueOf(strArray[i]); 81 | } 82 | return booleanArray; 83 | } 84 | }, 85 | INT_ARRAY { 86 | @Override 87 | public Object convert(String value) { 88 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 89 | int[] intArray = new int[strArray.length]; 90 | for(int i = 0; i < strArray.length; i++) { 91 | intArray[i] = Integer.parseInt(strArray[i]); 92 | } 93 | return intArray; 94 | } 95 | }, 96 | LONG_ARRAY { 97 | @Override 98 | public Object convert(String value) { 99 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 100 | long[] longArray = new long[strArray.length]; 101 | for(int i = 0; i < strArray.length; i++) { 102 | longArray[i] = Long.parseLong(strArray[i]); 103 | } 104 | return longArray; 105 | } 106 | }, 107 | DOUBLE_ARRAY { 108 | @Override 109 | public Object convert(String value) { 110 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 111 | double[] doubleArray = new double[strArray.length]; 112 | for(int i = 0; i < strArray.length; i++) { 113 | doubleArray[i] = Double.parseDouble(strArray[i]); 114 | } 115 | return doubleArray; 116 | } 117 | }, 118 | FLOAT_ARRAY { 119 | @Override 120 | public Object convert(String value) { 121 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 122 | float[] floatArray = new float[strArray.length]; 123 | for(int i = 0; i < strArray.length; i++) { 124 | floatArray[i] = Float.parseFloat(strArray[i]); 125 | } 126 | return floatArray; 127 | } 128 | }, 129 | BYTE_ARRAY { 130 | @Override 131 | public Object convert(String value) { 132 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 133 | byte[] byteArray = new byte[strArray.length]; 134 | for(int i = 0; i < strArray.length; i++) { 135 | byteArray[i] = Byte.parseByte(strArray[i]); 136 | } 137 | return byteArray; 138 | } 139 | }, 140 | SHORT_ARRAY { 141 | @Override 142 | public Object convert(String value) { 143 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 144 | short[] shortArray = new short[strArray.length]; 145 | for(int i = 0; i < strArray.length; i++) { 146 | shortArray[i] = Short.parseShort(strArray[i]); 147 | } 148 | return shortArray; 149 | } 150 | }, 151 | CHAR_ARRAY { 152 | @Override 153 | public Object convert(String value) { 154 | String[] strArray = value.split(Config.ARRAYS_SEPARATOR); 155 | char[] charArray = new char[strArray.length]; 156 | for(int i = 0; i < strArray.length; i++) { 157 | charArray[i] = strArray[i].charAt(0); 158 | } 159 | return charArray; 160 | } 161 | }, 162 | STRING_ARRAY { 163 | @Override 164 | public Object convert(String value) { 165 | String separator = Config.ARRAYS_SEPARATOR; 166 | return value.split(Config.ARRAYS_SEPARATOR); 167 | } 168 | }; 169 | 170 | public static Type fromString(String typeString) { 171 | if (typeString==null || typeString.isEmpty()) return Type.STRING; 172 | try { 173 | return valueOf(typeString.toUpperCase()); 174 | } catch (Exception e) { 175 | throw new IllegalArgumentException("Unknown Type "+typeString); 176 | } 177 | } 178 | 179 | public abstract Object convert(String value); 180 | 181 | public boolean isProperty() { return true; } 182 | } 183 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/index/LongIterableIndexHits.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.index; 2 | 3 | import org.neo4j.graphdb.ResourceIterator; 4 | import org.neo4j.graphdb.index.IndexHits; 5 | import org.neo4j.helpers.collection.Iterables; 6 | 7 | import java.util.Iterator; 8 | 9 | /** 10 | * @author mh 11 | * @since 11.06.13 12 | */ 13 | public class LongIterableIndexHits implements IndexHits, ResourceIterator { 14 | 15 | private final Iterable values; 16 | private Iterator iterator; 17 | 18 | public LongIterableIndexHits(Iterable values) { 19 | this.values = values; 20 | iterator = iterator(); 21 | } 22 | 23 | @Override 24 | public int size() { 25 | return (int) Iterables.count(values); 26 | } 27 | 28 | @Override 29 | public void close() { 30 | if (iterator instanceof ResourceIterator) { 31 | ((ResourceIterator)iterator).close(); 32 | } 33 | } 34 | 35 | @Override 36 | public Long getSingle() { 37 | return Iterables.singleOrNull(values); 38 | } 39 | 40 | @Override 41 | public float currentScore() { 42 | return 0; 43 | } 44 | 45 | @Override 46 | public ResourceIterator iterator() { 47 | iterator = values.iterator(); 48 | return this; 49 | } 50 | 51 | 52 | @Override 53 | public boolean hasNext() { 54 | return iterator.hasNext(); 55 | } 56 | 57 | @Override 58 | public Long next() { 59 | return iterator.next(); 60 | } 61 | 62 | @Override 63 | public void remove() { 64 | iterator.remove(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/index/MapDbCachingIndexProvider.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.index; 2 | 3 | import org.mapdb.BTreeKeySerializer; 4 | import org.mapdb.Bind; 5 | import org.mapdb.DB; 6 | import org.mapdb.DBMaker; 7 | import org.mapdb.Fun; 8 | import org.neo4j.graphdb.index.IndexHits; 9 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider; 10 | import org.neo4j.unsafe.batchinsert.BatchInserter; 11 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex; 12 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider; 13 | 14 | import java.util.HashMap; 15 | import java.util.Map; 16 | import java.util.NavigableSet; 17 | 18 | public class MapDbCachingIndexProvider implements BatchInserterIndexProvider { 19 | LuceneBatchInserterIndexProvider luceneIndex; 20 | private DB db; 21 | 22 | public MapDbCachingIndexProvider(BatchInserter inserter) { 23 | this(new LuceneBatchInserterIndexProvider(inserter)); 24 | } 25 | 26 | public MapDbCachingIndexProvider(LuceneBatchInserterIndexProvider luceneIndex) { 27 | this.luceneIndex = luceneIndex; 28 | db = DBMaker.newTempFileDB(). 29 | asyncFlushDelay(1000). 30 | cacheSize(1024 * 1024). 31 | closeOnJvmShutdown(). 32 | deleteFilesAfterClose(). 33 | syncOnCommitDisable(). 34 | writeAheadLogDisable(). 35 | make(); 36 | } 37 | 38 | @Override 39 | public BatchInserterIndex nodeIndex(String indexName, Map config) { 40 | return new CachingBatchInserterIndex(db,indexName,luceneIndex.nodeIndex(indexName,config)); 41 | } 42 | 43 | @Override 44 | public BatchInserterIndex relationshipIndex(String indexName, Map config) { 45 | return new CachingBatchInserterIndex(db,indexName,luceneIndex.relationshipIndex(indexName, config)); 46 | } 47 | 48 | @Override 49 | public void shutdown() { 50 | luceneIndex.shutdown(); 51 | db.close(); 52 | } 53 | 54 | private static class CachingBatchInserterIndex implements BatchInserterIndex { 55 | Map>> caches = new HashMap>>(); 56 | private final DB db; 57 | private final String indexName; 58 | private final BatchInserterIndex index; 59 | 60 | public CachingBatchInserterIndex(DB db, String indexName, BatchInserterIndex index) { 61 | this.db = db; 62 | this.indexName = indexName; 63 | this.index = index; 64 | } 65 | private NavigableSet> getSet(String property) { 66 | NavigableSet> set = caches.get(property); 67 | if (set != null) return set; 68 | set=db.>createTreeSet(indexName+"."+property,32,false, BTreeKeySerializer.TUPLE2,null); 69 | caches.put(property,set); 70 | return set; 71 | } 72 | 73 | @Override 74 | public void add(long entityId, Map properties) { 75 | for (Map.Entry entry : properties.entrySet()) { 76 | getSet(entry.getKey()).add(Fun.t2(entry.getValue(), entityId)); 77 | } 78 | index.add(entityId,properties); 79 | } 80 | 81 | @Override 82 | public void updateOrAdd(long entityId, Map properties) { 83 | throw new UnsupportedOperationException(); 84 | } 85 | 86 | @Override 87 | public IndexHits get(String key, Object value) { 88 | final Iterable values = Bind.findSecondaryKeys(getSet(key), value); 89 | return new LongIterableIndexHits(values); 90 | } 91 | 92 | @Override 93 | public IndexHits query(String key, Object queryOrQueryObject) { 94 | throw new UnsupportedOperationException(); 95 | } 96 | 97 | @Override 98 | public IndexHits query(Object queryOrQueryObject) { 99 | throw new UnsupportedOperationException(); 100 | } 101 | 102 | @Override 103 | public void flush() { 104 | index.flush(); 105 | } 106 | 107 | @Override 108 | public void setCacheCapacity(String key, int size) { 109 | throw new UnsupportedOperationException(); 110 | } 111 | 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/Chunker.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import java.io.IOException; 4 | import java.io.Reader; 5 | 6 | /** 7 | * @author mh 8 | * @since 13.11.12 9 | */ 10 | public class Chunker { 11 | public static final String EOF = null; 12 | public static final String EOL = "\n".intern(); 13 | public static final String NO_VALUE = "".intern(); 14 | public static final char EOL_CHAR2 = '\r'; 15 | public static final char EOL_CHAR = '\n'; 16 | public static final char EOF_CHAR = (char)-1; 17 | public static final int PREV_EOL_CHAR = -2; 18 | private static final int BUFSIZE = 32*1024; 19 | private final Reader reader; 20 | private final char delim; 21 | private final char[] buffer=new char[BUFSIZE]; 22 | private int lastChar = PREV_EOL_CHAR; 23 | private int pos=BUFSIZE; 24 | 25 | public Chunker(Reader reader, char delim) { 26 | this.reader = reader; 27 | this.delim = delim; 28 | } 29 | 30 | /** 31 | * @return the token, null for EOF, empty string for no value read (just delim) or "\n" for EOL 32 | * @throws IOException 33 | */ 34 | public String nextWord() throws IOException { 35 | int count = 0; 36 | int ch; 37 | if (lastChar == EOF_CHAR) return EOF; 38 | if (lastChar == EOL_CHAR) { 39 | lastChar = PREV_EOL_CHAR; 40 | return EOL; 41 | } 42 | 43 | if (pos == BUFSIZE) { 44 | int available = reader.read(buffer); 45 | pos = 0; 46 | if (available == -1) { 47 | available = 0; 48 | } 49 | if (available < BUFSIZE) { 50 | buffer[available] = EOF_CHAR; 51 | } 52 | } 53 | int start = pos; 54 | while ((ch = buffer[pos++])!=delim && ch!= EOL_CHAR && ch!= EOF_CHAR) { 55 | count++; 56 | if (pos == BUFSIZE) { 57 | System.arraycopy(buffer, start, buffer, 0, count); 58 | int available = reader.read(buffer, count, BUFSIZE - count); 59 | pos = count; 60 | start = 0; 61 | if (available == -1) { 62 | available = 0; 63 | } 64 | if (available < BUFSIZE - count) { 65 | buffer[available + count] = EOF_CHAR; 66 | } 67 | } 68 | } 69 | if (count == 0) { 70 | if (lastChar==PREV_EOL_CHAR && ch== EOF_CHAR) { lastChar=EOF_CHAR;return EOF; } 71 | lastChar=ch; 72 | if (ch == EOF_CHAR) return NO_VALUE; 73 | if (ch == EOL_CHAR) return NO_VALUE; 74 | return NO_VALUE; 75 | } 76 | lastChar=ch; 77 | if (buffer[start + count-1]==EOL_CHAR2) count--; 78 | return String.valueOf(buffer, start, count); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/Config.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.neo4j.batchimport.Importer; 4 | import org.neo4j.batchimport.IndexInfo; 5 | import org.neo4j.helpers.collection.MapUtil; 6 | 7 | import java.io.File; 8 | import java.io.FileWriter; 9 | import java.util.ArrayList; 10 | import java.util.Collection; 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | import java.util.Stack; 14 | 15 | public class Config { 16 | public static final String BATCH_IMPORT_RELS_FILES = "batch_import.rels_files"; 17 | public static final String BATCH_IMPORT_GRAPH_DB = "batch_import.graph_db"; 18 | public static final String BATCH_IMPORT_KEEP_DB = "batch_import.keep_db"; 19 | public static final String CONFIG_FILE_NAME = "batch.properties"; 20 | public static final String BATCH_IMPORT_NODES_FILES = "batch_import.nodes_files"; 21 | public static final String BATCH_IMPORT_MAPDB_CACHE_DISABLE = "batch_import.mapdb_cache.disable"; 22 | public static final String BATCH_IMPORT_CSV_QUOTES = "batch_import.csv.quotes"; 23 | public static final String BATCH_IMPORT_CSV_DELIM = "batch_import.csv.delim"; 24 | public static final String ARRAY_SEPARATOR_CONFIG = "batch_array_separator"; 25 | public static String ARRAYS_SEPARATOR = ","; 26 | 27 | private final Map configData; 28 | 29 | public Config(Map configData) { 30 | this.configData = configData; 31 | if (this.configData.containsKey(ARRAY_SEPARATOR_CONFIG)){ 32 | Config.ARRAYS_SEPARATOR = configData.get(ARRAY_SEPARATOR_CONFIG); 33 | } 34 | } 35 | 36 | public static Config convertArgumentsToConfig(String[] args) { 37 | final Stack argumentList = toStack(args); 38 | 39 | String configFileName = findConfigFileName(argumentList); 40 | 41 | final Map config = config(configFileName); 42 | 43 | convertParamsToConfig(argumentList, config); 44 | 45 | validateConfig(config); 46 | return new Config(config); 47 | } 48 | 49 | private static Stack toStack(String[] args) { 50 | final Stack argumentList = new Stack(); 51 | for (int i = args.length - 1; i >= 0; i--) { 52 | argumentList.push(args[i]); 53 | } 54 | return argumentList; 55 | } 56 | 57 | private static String findConfigFileName(Stack argumentList) { 58 | String firstParam = argumentList.isEmpty() ? "" : argumentList.peek(); 59 | String configFileName = CONFIG_FILE_NAME; 60 | if (firstParam.endsWith(".properties")) { 61 | configFileName = firstParam; 62 | popOrNull(argumentList); 63 | } 64 | return configFileName; 65 | } 66 | 67 | // todo more checks ? 68 | private static void validateConfig(Map config) { 69 | if (!config.containsKey(BATCH_IMPORT_GRAPH_DB)) throw new IllegalArgumentException("Missing parameter for graphdb directory"); 70 | } 71 | 72 | private static Collection convertParamsToConfig(Stack args, Map config) { 73 | addConfigParamIfArgument(args, config, BATCH_IMPORT_GRAPH_DB); 74 | addConfigParamIfArgument(args, config, BATCH_IMPORT_NODES_FILES); 75 | addConfigParamIfArgument(args, config, BATCH_IMPORT_RELS_FILES); 76 | Collection indexes = createIndexInfos(args); 77 | for (IndexInfo index : indexes) { 78 | index.addToConfig(config); 79 | } 80 | return indexes; 81 | } 82 | 83 | private static void addConfigParamIfArgument(Stack args, Map config, String param) { 84 | final String arg = popOrNull(args); 85 | if (arg==null || arg.trim().isEmpty()) return; 86 | if (!config.containsKey(param)) config.put(param, arg); 87 | } 88 | 89 | private static String popOrNull(Stack args) { 90 | if (args.isEmpty()) return null; 91 | return args.pop(); 92 | } 93 | 94 | private static Collection createIndexInfos(Stack args) { 95 | Collection indexes=new ArrayList(); 96 | while (!args.isEmpty()) { 97 | indexes.add(new IndexInfo(popOrNull(args), popOrNull(args), popOrNull(args), popOrNull(args))); 98 | } 99 | return indexes; 100 | } 101 | 102 | public static Map config(String fileName) { 103 | Map config = new HashMap(); 104 | try { 105 | if (new File(fileName).exists()) { 106 | System.out.println("Using Existing Configuration File"); 107 | } else { 108 | System.out.println("Writing Configuration File to batch.properties"); 109 | FileWriter fw = new FileWriter(fileName); 110 | fw.append("use_memory_mapped_buffers=true\n" 111 | + "neostore.nodestore.db.mapped_memory=100M\n" 112 | + "neostore.relationshipstore.db.mapped_memory=500M\n" 113 | + "neostore.propertystore.db.mapped_memory=1G\n" 114 | + "neostore.propertystore.db.strings.mapped_memory=200M\n" 115 | + "neostore.propertystore.db.arrays.mapped_memory=0M\n" 116 | + "neostore.propertystore.db.index.keys.mapped_memory=15M\n" 117 | + "neostore.propertystore.db.index.mapped_memory=15M"); 118 | fw.close(); 119 | } 120 | 121 | config = MapUtil.load(new File(fileName)); 122 | 123 | } catch (Exception e) { 124 | System.out.println(e.getMessage()); 125 | } 126 | return config; 127 | } 128 | 129 | public static Collection extractIndexInfos(Map config) { 130 | Collection result=new ArrayList(); 131 | for (Map.Entry entry : config.entrySet()) { 132 | final IndexInfo info = IndexInfo.fromConfigEntry(entry); 133 | if (info!=null) result.add(info); 134 | } 135 | return result; 136 | } 137 | 138 | public static boolean configOptionEnabled(Config config, String option) { 139 | return "true".equalsIgnoreCase(config.get(option)); 140 | } 141 | public static boolean configOptionDisabled(Config config, String option) { 142 | return "false".equalsIgnoreCase(config.get(option)); 143 | } 144 | 145 | public static Collection toFiles(String commaSeparatedFileList) { 146 | Collection files=new ArrayList(); 147 | if (commaSeparatedFileList==null || commaSeparatedFileList.isEmpty()) return files; 148 | for (String part : commaSeparatedFileList.split(",")) { 149 | final File file = new File(part); 150 | if (file.exists() && file.canRead() && file.isFile()) files.add(file); 151 | else System.err.println("File "+file+" does not exist, can not be read or is not a file."); 152 | } 153 | return files; 154 | } 155 | 156 | public static String NODE_INDEX(String indexName) { 157 | return "batch_import.node_index." + indexName; 158 | } 159 | public static String RELATIONSHIP_INDEX(String indexName) { 160 | return "batch_import.relationship_index." + indexName; 161 | } 162 | 163 | public boolean isCachedIndexDisabled() { 164 | return configOptionEnabled(this, BATCH_IMPORT_MAPDB_CACHE_DISABLE); 165 | } 166 | 167 | public Collection getIndexInfos() { 168 | return extractIndexInfos(configData); 169 | } 170 | 171 | public Collection getRelsFiles() { 172 | return toFiles(get(BATCH_IMPORT_RELS_FILES)); 173 | } 174 | 175 | public Collection getNodesFiles() { 176 | return toFiles(get(BATCH_IMPORT_NODES_FILES)); 177 | } 178 | 179 | public char getDelimChar(Importer importer) { 180 | final String delim = get(BATCH_IMPORT_CSV_DELIM); 181 | if (delim==null || delim.isEmpty()) return '\t'; 182 | return delim.trim().charAt(0); 183 | } 184 | 185 | public boolean quotesEnabled() { 186 | return !configOptionDisabled(this, BATCH_IMPORT_CSV_QUOTES); 187 | } 188 | 189 | public String getGraphDbDirectory() { 190 | return get(BATCH_IMPORT_GRAPH_DB); 191 | } 192 | 193 | String get(String option) { 194 | return configData.get(option); 195 | } 196 | 197 | public boolean keepDatabase() { 198 | return configOptionEnabled(this, BATCH_IMPORT_KEEP_DB); 199 | } 200 | 201 | public Map getConfigData() { 202 | return configData; 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/FileIterator.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.mapdb.Serializer; 4 | 5 | import java.io.*; 6 | import java.util.Comparator; 7 | import java.util.Iterator; 8 | 9 | class FileIterator implements Iterator { 10 | public static final char DELIM = '\t'; 11 | private final BufferedReader reader; 12 | private final String file; 13 | Line line; 14 | long lineNo; 15 | 16 | public FileIterator(String file) throws FileNotFoundException { 17 | reader = new BufferedReader(new FileReader(file), RelationshipSorter.BUFFER); 18 | this.file = file; 19 | line = readLine(); 20 | } 21 | 22 | public void close() throws IOException { 23 | reader.close(); 24 | } 25 | 26 | private Line readLine() { 27 | try { 28 | String line = reader.readLine(); 29 | if (line==null) return null; 30 | return Line.from(lineNo++, line); 31 | } catch (IOException e) { 32 | throw new RuntimeException("Error reading file "+ file,e); 33 | } 34 | } 35 | 36 | 37 | public boolean hasNext() { 38 | return line != null; 39 | } 40 | 41 | public Line next() { 42 | Line result=line; 43 | line = readLine(); 44 | return result; 45 | } 46 | 47 | public void remove() { 48 | } 49 | 50 | public static class LineSerializer implements Serializer { 51 | @Override 52 | public void serialize(DataOutput dataOutput, Line line) throws IOException { 53 | dataOutput.writeLong(line.lineNo); 54 | // dataOutput.writeLong(line.min); 55 | // dataOutput.writeLong(line.max); 56 | dataOutput.writeUTF(line.line); 57 | } 58 | 59 | @Override 60 | public Line deserialize(DataInput dataInput, int i) throws IOException { 61 | // return Line.from(dataInput.readLong(),dataInput.readLong(),dataInput.readLong(),dataInput.readUTF()); 62 | return Line.from(dataInput.readLong(),dataInput.readUTF()); 63 | } 64 | } 65 | public static class Line { 66 | String line; 67 | long lineNo, min, max; 68 | public static Line from(long lineNo, long min, long max, String line) { 69 | Line result = new Line(); 70 | result.lineNo = lineNo; 71 | result.min = min; 72 | result.max = max; 73 | result.line = line; 74 | return result; 75 | } 76 | public static Line from(long lineNo, String line) { 77 | if (lineNo > 0) { 78 | int idx = line.indexOf(DELIM); 79 | long start = Long.parseLong(line.substring(0, idx++)); 80 | long end = Long.parseLong(line.substring(idx, line.indexOf(DELIM, idx))); 81 | return from(lineNo,Math.min(start,end), Math.max(start, end),line); 82 | } else { 83 | return from(lineNo,-1, -1,line); 84 | } 85 | } 86 | } 87 | 88 | public static class RelStartEndComparator implements Comparator { 89 | 90 | public int compare(Line line1, Line line2) { 91 | int result = Long.compare(line1.min, line2.min); 92 | if (result == 0) { 93 | result = Long.compare(line1.max, line2.max); 94 | if (result == 0) return Long.compare(line1.lineNo, line2.lineNo); 95 | } 96 | return result; 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/Params.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import java.io.File; 4 | 5 | /** 6 | * @author mh 7 | * @since 02.11.12 8 | */ 9 | public class Params { 10 | 11 | private final String names; 12 | private final String[] args; 13 | private String[] params; 14 | 15 | public Params(String names, String... args) { 16 | this.names = names; 17 | this.params = names.split(" +"); 18 | this.args = args; 19 | } 20 | 21 | public boolean invalid() { 22 | return args.length != params.length; 23 | } 24 | 25 | public int length() { 26 | return params.length; 27 | } 28 | 29 | @Override 30 | public String toString() { 31 | return names; 32 | } 33 | 34 | public File file(String name) { 35 | return new File(string(name)); 36 | } 37 | 38 | public String string(String name) { 39 | for (int i = 0; i < params.length; i++) { 40 | if (params[i].equalsIgnoreCase(name)) { 41 | return args[i]; 42 | } 43 | } 44 | throw new IllegalArgumentException("Invalid name" + name + " only know " + names); 45 | } 46 | 47 | public long longValue(String name) { 48 | return Long.parseLong(string(name)); 49 | } 50 | 51 | public int intValue(String name) { 52 | return Integer.parseInt(string(name)); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/RelationshipSorter.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.mapdb.Pump; 4 | import org.neo4j.helpers.collection.IteratorWrapper; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.util.Iterator; 10 | 11 | /** 12 | * @author Michael Hunger @since 04.11.13 13 | */ 14 | public class RelationshipSorter { 15 | 16 | public static final int BUFFER = 1024 * 1024; 17 | 18 | public static void main(String... args) throws IOException { 19 | System.err.println("Usage mvn exec:java -Dexec.mainClass=org.neo4j.batchimport.utils.RelationshipSorter -Dexec.args='rels.csv rels_sorted.csv'"); 20 | final String file = args[0]; 21 | String file2 = args[1]; 22 | long time = System.currentTimeMillis(); 23 | FileIterator reader0 = new FileIterator(file); 24 | Iterator reader = wrapStatistics(reader0); 25 | FileIterator.Line header = reader.next(); 26 | Iterator result = Pump.sort(reader, 1_000_000, new FileIterator.RelStartEndComparator(), new FileIterator.LineSerializer()); 27 | BufferedWriter writer = new BufferedWriter(new FileWriter(file2), BUFFER); 28 | writer.write(header.line); 29 | writer.write("\n"); 30 | long count = 0; 31 | while (result.hasNext()) { 32 | writer.write(result.next().line); 33 | writer.write('\n'); 34 | count++; 35 | } 36 | writer.close(); 37 | reader0.close(); 38 | System.out.println("sorting " + count + " lines took " + (System.currentTimeMillis()-time)/1000+" seconds"); 39 | } 40 | 41 | private static Iterator wrapStatistics(final FileIterator reader0) { 42 | return new IteratorWrapper(reader0) { 43 | long time = System.currentTimeMillis(); 44 | @Override 45 | protected FileIterator.Line underlyingObjectToObject(FileIterator.Line line) { 46 | if (line.lineNo % 10000 == 0) { 47 | System.out.print("."); 48 | if (line.lineNo % 1000000 == 0) { 49 | long now = System.currentTimeMillis(); 50 | System.out.println(" "+line.lineNo+ " " +(now - time)+" ms"); 51 | time = now; 52 | } 53 | } 54 | 55 | return line; 56 | } 57 | }; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/neo4j/batchimport/utils/RelationshipSorter2.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.neo4j.helpers.collection.ArrayIterator; 4 | import org.neo4j.helpers.collection.IteratorWrapper; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.util.Arrays; 10 | import java.util.Iterator; 11 | 12 | /** 13 | * @author Michael Hunger @since 04.11.13 14 | */ 15 | public class RelationshipSorter2 { 16 | 17 | public static final char DELIM = '\t'; 18 | public static final int BUFFER = 1024 * 1024; 19 | public static final FileIterator.RelStartEndComparator COMPARATOR = new FileIterator.RelStartEndComparator(); 20 | public static final int ARRAY_BUFFER = 10_000_000; 21 | 22 | public static void main(String... args) throws IOException { 23 | System.err.println("Usage mvn exec:java -Dexec.mainClass=org.neo4j.batchimport.utils.RelationshipSorter2 -Dexec.args='rels.csv rels_sorted.csv'"); 24 | final String file = args[0]; 25 | String file2 = args[1]; 26 | long time = System.currentTimeMillis(); 27 | FileIterator reader0 = new FileIterator(file); 28 | Iterator reader = wrapStatistics(reader0); 29 | FileIterator.Line header = reader.next(); 30 | FileIterator.Line[] lines = new FileIterator.Line[ARRAY_BUFFER]; 31 | int read = readArray(reader, lines); 32 | Arrays.sort(lines, COMPARATOR); 33 | long count = writeFile(file2, lines, read); 34 | // Iterator result = new ArrayIterator<>(lines); 35 | // sort array 36 | // long count = writeFile(file2, header, result); 37 | reader0.close(); 38 | System.out.println("sorting " + count + " lines took " + (System.currentTimeMillis()-time)/1000+" seconds"); 39 | } 40 | 41 | private static long writeFile(String file, FileIterator.Line header, Iterator lines) throws IOException { 42 | BufferedWriter writer = new BufferedWriter(new FileWriter(file), BUFFER); 43 | if (header!=null) { 44 | writer.write(header.line); writer.write("\n"); 45 | } 46 | long count = 0; 47 | while (lines.hasNext()) { 48 | writer.write(lines.next().line); writer.write('\n'); 49 | count++; 50 | } 51 | writer.close(); 52 | return count; 53 | } 54 | 55 | private static long writeFile(String file, FileIterator.Line[] lines, int count) throws IOException { 56 | BufferedWriter writer = new BufferedWriter(new FileWriter(file), BUFFER); 57 | for (int i = 0; i < count; i++) { 58 | writer.write(lines[i].line); writer.write('\n'); 59 | } 60 | writer.close(); 61 | return count; 62 | } 63 | 64 | private static int readArray(Iterator reader, FileIterator.Line[] lines) { 65 | int i=0; 66 | int length = lines.length; 67 | while (i < length && reader.hasNext()) { 68 | lines[i++] = reader.next(); 69 | } 70 | return i; 71 | } 72 | 73 | private static Iterator wrapStatistics(final FileIterator reader0) { 74 | return new IteratorWrapper(reader0) { 75 | long time = System.currentTimeMillis(); 76 | @Override 77 | protected FileIterator.Line underlyingObjectToObject(FileIterator.Line line) { 78 | if (line.lineNo % 10000 == 0) { 79 | System.out.print("."); 80 | if (line.lineNo % 1000000 == 0) { 81 | long now = System.currentTimeMillis(); 82 | System.out.println(" "+line.lineNo+ " " +(now - time)+" ms"); 83 | time = now; 84 | } 85 | } 86 | 87 | return line; 88 | } 89 | }; 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # 3 | # Logging Configuration 4 | # 5 | # ------------------------------------------------------------------------ 6 | # 7 | log4j.rootLogger=INFO, Console 8 | 9 | ######################################################################## 10 | # 11 | # Logfile definitions 12 | # 13 | ######################################################################## 14 | #Console Log 15 | log4j.appender.Console=org.apache.log4j.ConsoleAppender 16 | log4j.appender.Console.Threshold=DEBUG 17 | log4j.appender.Console.layout=org.apache.log4j.PatternLayout 18 | log4j.appender.Console.layout.ConversionPattern=%-5p %C{1} - %m\n 19 | log4j.appender.Console.Target=System.err 20 | 21 | #LOGTXT Log 22 | log4j.appender.LOGTXT=org.apache.log4j.FileAppender 23 | log4j.appender.LOGTXT.File=log.txt 24 | log4j.appender.LOGTXT.Append=false 25 | log4j.appender.LOGTXT.Threshold=DEBUG 26 | log4j.appender.LOGTXT.layout=org.apache.log4j.PatternLayout 27 | log4j.appender.LOGTXT.layout.ConversionPattern=%-5p %C{1} - %m\n 28 | 29 | -------------------------------------------------------------------------------- /src/test/java/DataTest.java: -------------------------------------------------------------------------------- 1 | import org.junit.Assert; 2 | import org.junit.Test; 3 | import org.neo4j.batchimport.importer.RowData; 4 | 5 | import java.util.Arrays; 6 | import java.util.Map; 7 | 8 | import static org.junit.Assert.assertArrayEquals; 9 | import static org.junit.Assert.assertEquals; 10 | 11 | public class DataTest { 12 | @Test 13 | public void testConvertType() throws Exception { 14 | RowData data = new RowData("a:int\tb:float\tc:float", "\t", 0); 15 | data.processLine("100\t100.0\t1E+10"); 16 | Map row = data.getProperties(); 17 | assertEquals(100, row.get("a")); 18 | assertEquals(true,row.get("b") instanceof Float); 19 | assertEquals(100.0F, row.get("b")); 20 | assertEquals(true,row.get("b") instanceof Float); 21 | assertEquals(100.0F, row.get("b")); 22 | assertEquals(true,row.get("c") instanceof Float); 23 | assertEquals(1E+10F, row.get("c")); 24 | } 25 | 26 | @Test 27 | public void testRelationship() throws Exception { 28 | RowData data = new RowData("start\tend\ttype\tproperty", "\t", 3); 29 | data.processLine("1\t2\tTYPE\tPROPERTY"); 30 | Map row = data.getProperties(); 31 | assertEquals("1", data.getValue(0)); 32 | assertEquals("2", data.getValue(1)); 33 | assertEquals("TYPE", data.getTypeLabels()[0]); 34 | assertEquals("PROPERTY", row.get("property")); 35 | } 36 | 37 | @Test 38 | public void testRelationshipWithNoProperty() throws Exception { 39 | RowData data = new RowData("start\tend\ttype", "\t", 3); 40 | data.processLine("1\t2\tTYPE"); 41 | assertEquals("1", data.getValue(0)); 42 | assertEquals("2", data.getValue(1)); 43 | assertEquals("TYPE", data.getTypeLabels()[0]); 44 | } 45 | 46 | @Test 47 | public void testNodeLabels() throws Exception { 48 | RowData data = new RowData("labels", "\t", 3); 49 | data.processLine("TYPE1,TYPE2"); 50 | assertEquals("TYPE1", data.getTypeLabels()[0]); 51 | assertEquals("TYPE2", data.getTypeLabels()[1]); 52 | } 53 | @Test 54 | public void testNodeLabelsWithLabelType() throws Exception { 55 | RowData data = new RowData("foo:label", "\t", 3); 56 | data.processLine("TYPE1,TYPE2"); 57 | assertEquals("TYPE1", data.getTypeLabels()[0]); 58 | assertEquals("TYPE2", data.getTypeLabels()[1]); 59 | } 60 | @Test 61 | public void testArrayType() throws Exception { 62 | RowData data = new RowData("a:int\tb:float\tc:string_array", "\t", 0); 63 | data.processLine("100\t100.0\tbagels,coffee,tea"); 64 | Map row = data.getProperties(); 65 | assertEquals(100, row.get("a")); 66 | assertEquals(true,row.get("b") instanceof Float); 67 | assertEquals(100.0F, row.get("b")); 68 | assertEquals(true,row.get("b") instanceof Float); 69 | assertEquals(100.0F, row.get("b")); 70 | assertEquals(true,row.get("c") instanceof String[]); 71 | assertArrayEquals(new String[]{"bagels", "coffee", "tea"}, (String[]) row.get("c")); 72 | } 73 | 74 | @Test 75 | public void testBooleanArrayType() throws Exception { 76 | RowData data = new RowData("a:int\tb:float\tc:boolean_array", "\t", 0); 77 | data.processLine("100\t100.0\ttrue,false,true"); 78 | Map row = data.getProperties(); 79 | assertEquals(100, row.get("a")); 80 | assertEquals(true,row.get("b") instanceof Float); 81 | assertEquals(100.0F, row.get("b")); 82 | assertEquals(true,row.get("b") instanceof Float); 83 | assertEquals(100.0F, row.get("b")); 84 | assertEquals(true,row.get("c") instanceof boolean[]); 85 | Assert.assertTrue(Arrays.equals(new boolean[]{true, false, true}, (boolean[]) row.get("c"))); 86 | } 87 | @Test 88 | public void testIntArrayType() throws Exception { 89 | RowData data = new RowData("a:int\tb:float\tc:int_array", "\t", 0); 90 | data.processLine("100\t100.0\t1,2,3"); 91 | Map row = data.getProperties(); 92 | assertEquals(100, row.get("a")); 93 | assertEquals(true,row.get("b") instanceof Float); 94 | assertEquals(100.0F, row.get("b")); 95 | assertEquals(true,row.get("b") instanceof Float); 96 | assertEquals(100.0F, row.get("b")); 97 | assertEquals(true,row.get("c") instanceof int[]); 98 | assertArrayEquals(new int[] {1,2,3}, (int[])row.get("c")); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/ImporterIntegrationTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.junit.Test; 4 | import org.neo4j.consistency.ConsistencyCheckTool; 5 | import org.neo4j.graphdb.GraphDatabaseService; 6 | import org.neo4j.graphdb.Node; 7 | import org.neo4j.graphdb.Transaction; 8 | import org.neo4j.graphdb.factory.GraphDatabaseFactory; 9 | import org.neo4j.io.fs.FileUtils; 10 | 11 | import java.io.File; 12 | import java.io.FileWriter; 13 | 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | * @author Michael Hunger @since 05.11.13 18 | */ 19 | public class ImporterIntegrationTest { 20 | 21 | public static final String DB_DIRECTORY = "target/index-reuse.db"; 22 | 23 | @Test 24 | public void testMain() throws Exception { 25 | FileUtils.deleteRecursively(new File(DB_DIRECTORY)); 26 | TestDataGenerator.main("1000","10","A,B,C","sorted"); 27 | Importer.main(DB_DIRECTORY,"nodes.csv","rels.csv"); 28 | ConsistencyCheckTool.main(new String[]{DB_DIRECTORY}); 29 | } 30 | 31 | @Test 32 | public void testImportHashes() throws Exception { 33 | FileUtils.deleteRecursively(new File(DB_DIRECTORY)); 34 | FileWriter writer = new FileWriter("target/hashes.csv"); 35 | writer.write("a\n000000F8BE951D6DE6480F4AFDFB670C553E47C0\r\n0000021449360C1A398ED9A18800B2B13AA098A4\r\n00000DABDE4C555FC82F7D534835247B94873C2C\r\n00001BE4128DB41729365A41D3AC1D019E5ED8A6\r\n"); 36 | writer.close(); 37 | Importer.main(DB_DIRECTORY,"target/hashes.csv"); 38 | ConsistencyCheckTool.main(new String[]{DB_DIRECTORY}); 39 | GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabase(new File(DB_DIRECTORY)); 40 | try (Transaction tx = db.beginTx()) { 41 | for (Node node : db.getAllNodes()) { 42 | Object value = node.getProperty("a", null); 43 | System.out.println("value = " + value); 44 | assertTrue(value != null); 45 | } 46 | tx.success(); 47 | } 48 | db.shutdown(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/ImporterTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.junit.Before; 4 | import org.junit.Test; 5 | import org.mockito.Matchers; 6 | import org.neo4j.batchimport.index.LongIterableIndexHits; 7 | import org.neo4j.batchimport.utils.Config; 8 | import org.neo4j.graphdb.DynamicLabel; 9 | import org.neo4j.graphdb.Label; 10 | import org.neo4j.graphdb.RelationshipType; 11 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider; 12 | import org.neo4j.unsafe.batchinsert.BatchInserter; 13 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex; 14 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider; 15 | 16 | import java.io.File; 17 | import java.io.StringReader; 18 | import java.util.Arrays; 19 | import java.util.Map; 20 | 21 | import static java.util.Arrays.*; 22 | import org.junit.Assert; 23 | import org.mockito.ArgumentCaptor; 24 | import static org.mockito.Matchers.*; 25 | import static org.mockito.Mockito.*; 26 | import static org.neo4j.helpers.collection.MapUtil.map; 27 | 28 | public class ImporterTest { 29 | 30 | private BatchInserter inserter; 31 | private LuceneBatchInserterIndexProvider provider; 32 | private Importer importer; 33 | private BatchInserterIndex index; 34 | 35 | @Before 36 | public void setUp() throws Exception { 37 | inserter = mock(BatchInserter.class); 38 | provider = mock(LuceneBatchInserterIndexProvider.class); 39 | index = mock(BatchInserterIndex.class); 40 | when(provider.nodeIndex(eq("index-a"),anyMap())).thenReturn(index); 41 | 42 | final Map configData = Config.config("batch.properties"); 43 | new IndexInfo("node_index", "index-a", "exact", null).addToConfig(configData); 44 | importer = new Importer(File.createTempFile("test", "db"), new Config(configData)) { 45 | @Override 46 | protected BatchInserter createBatchInserter(File graphDb, Config config) { 47 | return inserter; 48 | } 49 | 50 | @Override 51 | protected BatchInserterIndexProvider createIndexProvider(boolean luceneOnlyIndex) { 52 | return provider; 53 | } 54 | }; 55 | } 56 | 57 | @Test 58 | public void testImportSimpleNode() throws Exception { 59 | importer.importNodes(new StringReader("a\nfoo")); 60 | importer.finish(); 61 | verify(inserter, times(1)).createNode(eq(map("a", "foo"))); 62 | } 63 | 64 | @Test 65 | public void testImportHashes() throws Exception { 66 | importer.importNodes(new StringReader("a\n000000F8BE951D6DE6480F4AFDFB670C553E47C0\n0000021449360C1A398ED9A18800B2B13AA098A4\n00000DABDE4C555FC82F7D534835247B94873C2C\n00001BE4128DB41729365A41D3AC1D019E5ED8A6\n")); 67 | importer.finish(); 68 | verify(inserter, times(1)).createNode(eq(map("a", "000000F8BE951D6DE6480F4AFDFB670C553E47C0"))); 69 | verify(inserter, times(1)).createNode(eq(map("a", "0000021449360C1A398ED9A18800B2B13AA098A4"))); 70 | verify(inserter, times(1)).createNode(eq(map("a", "00000DABDE4C555FC82F7D534835247B94873C2C"))); 71 | verify(inserter, times(1)).createNode(eq(map("a", "00001BE4128DB41729365A41D3AC1D019E5ED8A6"))); 72 | } 73 | 74 | @Test 75 | public void testImportSimpleNodeWithId() throws Exception { 76 | importer.importNodes(new StringReader("i:id\ta\n123\tfoo")); 77 | importer.finish(); 78 | verify(inserter, times(1)).createNode(eq(123L),eq(map("a", "foo"))); 79 | } 80 | 81 | @Test 82 | public void testImportNodeWithNoLabel() throws Exception { 83 | importer.importNodes(new StringReader("a\t:label\nfoo\t")); 84 | importer.finish(); 85 | verify(inserter, times(1)).createNode(eq(map("a", "foo"))); 86 | } 87 | @Test 88 | public void testImportNodeWithLabel() throws Exception { 89 | importer.importNodes(new StringReader("a\t:label\nfoo\tbar")); 90 | importer.finish(); 91 | verify(inserter, times(1)).createNode(eq(map("a", "foo")),eq(DynamicLabel.label("bar"))); 92 | } 93 | 94 | @Test 95 | public void testImportNodeWithTwoLabels() throws Exception { 96 | importer.importNodes(new StringReader("a\t:label\nfoo\tbar,bor")); 97 | importer.finish(); 98 | verify(inserter, times(1)).createNode(eq(map("a", "foo")),eq(DynamicLabel.label("bar")),eq(DynamicLabel.label("bor"))); 99 | } 100 | 101 | @Test 102 | public void testImportSimpleNodeWithNewlineAtEnd() throws Exception { 103 | importer.importNodes(new StringReader("a\nfoo\n")); 104 | importer.finish(); 105 | verify(inserter, times(1)).createNode(eq(map("a", "foo"))); 106 | } 107 | @Test 108 | public void testImportSimpleNodeWithUmlauts() throws Exception { 109 | importer.importNodes(new StringReader("ö\näáß")); 110 | importer.finish(); 111 | verify(inserter, times(1)).createNode(eq(map("ö", "äáß"))); 112 | } 113 | @Test 114 | public void testImportNodeWithMultipleProps() throws Exception { 115 | importer.importNodes(new StringReader("a\tb\nfoo\tbar")); 116 | importer.finish(); 117 | verify(inserter, times(1)).createNode(eq(map("a", "foo","b","bar"))); 118 | } 119 | @Test 120 | public void testImportNodeWithIndex() throws Exception { 121 | importer.importNodes(new StringReader("a:string:index-a\tb\nfoo\tbar")); 122 | importer.finish(); 123 | verify(inserter, times(1)).createNode(eq(map("a", "foo", "b", "bar"))); 124 | verify(index, times(1)).add(eq(0L), eq(map("a", "foo"))); 125 | } 126 | 127 | @Test 128 | public void testImportRelWithIndexLookup() throws Exception { 129 | when(index.get("a","foo")).thenReturn(new LongIterableIndexHits(asList(42L))); 130 | importer.importRelationships(new StringReader("a:string:index-a\tb\tTYPE\nfoo\t123\tFOOBAR")); 131 | importer.finish(); 132 | verify(index, times(1)).get(eq("a"), eq("foo")); 133 | verify(inserter, times(1)).createRelationship(eq(42L), eq(123L), Matchers.any(RelationshipType.class),eq(map())); 134 | } 135 | 136 | @Test 137 | public void testImportRelationshipsWithNonIndexedNodes() throws Exception { 138 | when(index.get("node","a")).thenReturn(new LongIterableIndexHits(asList(1L))); 139 | when(index.get("node","b")).thenReturn(new LongIterableIndexHits(Arrays.asList())); 140 | importer.importRelationships(new StringReader("node:string:index-a\tnode:string:index-a\ttype\na\ta\tTYPE\na\tb\tTYPE\nb\ta\tTYPE")); 141 | importer.finish(); 142 | verify(inserter, times(1)).createRelationship(eq(1L), eq(1L), argThat(new RelationshipMatcher("TYPE")),eq(map())); 143 | verify(inserter, never()).createRelationship(eq(1L), eq(-1L), argThat(new RelationshipMatcher("TYPE")),eq(map())); 144 | verify(inserter, never()).createRelationship(eq(-1L), eq(1L), argThat(new RelationshipMatcher("TYPE")),eq(map())); 145 | } 146 | 147 | @Test 148 | public void testImportNodeWithIndividualTypes() throws Exception { 149 | importer.importNodes(new StringReader("a:int\tb:float\tc:float\n10\t10.0\t1E+10")); 150 | importer.finish(); 151 | verify(inserter, times(1)).createNode(eq(map("a", 10,"b",10.0F,"c",1E+10F))); 152 | } 153 | 154 | @Test 155 | public void testImportNodeWithArrayTypes() throws Exception { 156 | importer.importNodes(new StringReader("a:STRING_ARRAY\tb:float\tc:float\n10,11,12\t10.0\t1E+10")); 157 | importer.finish(); 158 | String[] expectedArray = {"10","11","12"}; 159 | ArgumentCaptor argument = ArgumentCaptor.forClass(Map.class); 160 | verify(inserter, times(1)).createNode(argument.capture()); 161 | Map inputMap = argument.getValue(); 162 | Assert.assertArrayEquals((String[])inputMap.get("a"),expectedArray); 163 | } 164 | 165 | @Test 166 | public void testImportNodeWithArrayTypesCustomSeparator() throws Exception { 167 | Config.ARRAYS_SEPARATOR = "%"; 168 | importer.importNodes(new StringReader("a:STRING_ARRAY\tb:float\tc:float\n10%11%12\t10.0\t1E+10")); 169 | importer.finish(); 170 | String[] expectedArray = {"10","11","12"}; 171 | ArgumentCaptor argument = ArgumentCaptor.forClass(Map.class); 172 | verify(inserter, times(1)).createNode(argument.capture()); 173 | Map inputMap = argument.getValue(); 174 | Assert.assertArrayEquals((String[])inputMap.get("a"),expectedArray); 175 | } 176 | 177 | @Test 178 | public void testImportSimpleRelationship() throws Exception { 179 | importer.importRelationships(new StringReader("start\tend\ttype\ta\n1\t2\tTYPE\tfoo")); 180 | importer.finish(); 181 | verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo"))); 182 | } 183 | 184 | @Test 185 | public void testImportSimpleRelationshipWithTypeType() throws Exception { 186 | importer.importRelationships(new StringReader("start\tend\t:label\ta\n1\t2\tTYPE\tfoo")); 187 | importer.finish(); 188 | verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo"))); 189 | } 190 | 191 | @Test 192 | public void testImportSimpleRelationshipWithNewlineOnce() throws Exception { 193 | importer.importRelationships(new StringReader("start\tend\ttype\ta\n1\t2\tTYPE\tfoo\n")); 194 | importer.finish(); 195 | verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo"))); 196 | } 197 | 198 | @Test 199 | public void testImportRelationshipWithIndividualTypes() throws Exception { 200 | importer.importRelationships(new StringReader("start\tend\ttype\ta:int\tb:float\tc:float\n1\t2\tTYPE\t10\t10.0\t1E+10")); 201 | importer.finish(); 202 | verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", 10, "b", 10.0F, "c", 1E+10F))); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/IndexInfoTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.File; 6 | import java.io.FileOutputStream; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | import static org.neo4j.helpers.collection.MapUtil.stringMap; 10 | 11 | /** 12 | * @author mh 13 | * @since 11.06.13 14 | */ 15 | public class IndexInfoTest { 16 | 17 | private static final String INDEX_FILE = "target/node_index.csv"; 18 | 19 | @Test 20 | public void testCreateConfigEntry() throws Exception { 21 | assertEquals(stringMap("batch_import.node_index.foo", "exact"), new IndexInfo("node_index", "foo", "exact", null).addToConfig(stringMap())); 22 | } 23 | 24 | @Test 25 | public void testReadFromConfigEntry() throws Exception { 26 | final IndexInfo info = IndexInfo.fromConfigEntry(stringMap("batch_import.node_index.foo", "exact:file").entrySet().iterator().next()); 27 | assertEquals("node_index",info.elementType); 28 | assertEquals("foo",info.indexName); 29 | assertEquals("exact",info.indexType); 30 | assertEquals("file",info.indexFileName); 31 | } 32 | 33 | @Test 34 | public void testCreateFromParams() throws Exception { 35 | final IndexInfo info = new IndexInfo(new String[]{"relationship_index", "bar", "fulltext", "file"},0); 36 | assertEquals("relationship_index",info.elementType); 37 | assertEquals("bar",info.indexName); 38 | assertEquals("fulltext",info.indexType); 39 | assertEquals("file",info.indexFileName); 40 | } 41 | @Test 42 | public void testCreateFromParamsWithOffset() throws Exception { 43 | final IndexInfo info = new IndexInfo(new String[]{"a","b","relationship_index", "bar", "fulltext", "file"},2); 44 | assertEquals("relationship_index",info.elementType); 45 | assertEquals("bar",info.indexName); 46 | assertEquals("fulltext",info.indexType); 47 | assertEquals("file",info.indexFileName); 48 | } 49 | 50 | @Test(expected = IllegalArgumentException.class) 51 | public void testInvalidIndexType() throws Exception { 52 | new IndexInfo("node_index","foo","bar",null); 53 | } 54 | 55 | @Test(expected = IllegalArgumentException.class) 56 | public void testInvalidElementType() throws Exception { 57 | new IndexInfo("foo","exact","bar",null); 58 | } 59 | 60 | @Test 61 | public void testShouldImportFile() throws Exception { 62 | assertEquals(false, new IndexInfo("node_index","name","exact",null).shouldImportFile()); 63 | assertEquals(false, new IndexInfo("node_index","name","exact", "target").shouldImportFile()); 64 | assertEquals(false, new IndexInfo("node_index","name","exact", INDEX_FILE).shouldImportFile()); 65 | final FileOutputStream fos = new FileOutputStream(INDEX_FILE); 66 | fos.write(0); 67 | fos.close(); 68 | assertEquals(true, new IndexInfo("node_index","name", "exact", INDEX_FILE).shouldImportFile()); 69 | new File(INDEX_FILE).delete(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/RelationshipMatcher.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.hamcrest.BaseMatcher; 4 | import org.hamcrest.Description; 5 | import org.neo4j.graphdb.DynamicRelationshipType; 6 | import org.neo4j.graphdb.RelationshipType; 7 | 8 | class RelationshipMatcher extends BaseMatcher { 9 | public RelationshipType type; 10 | private Object other; 11 | 12 | RelationshipMatcher(RelationshipType type) { 13 | this.type = type; 14 | } 15 | RelationshipMatcher(String name) { 16 | this(DynamicRelationshipType.withName(name)); 17 | } 18 | 19 | public boolean matches(Object other) { 20 | this.other = other; 21 | return ((RelationshipType)other).name().equals(type.name()); 22 | } 23 | 24 | @Override 25 | public void describeTo(Description description) { 26 | description.appendText("Expected "+ type +" but got "+other); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/TestDataGenerator.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.junit.Ignore; 4 | 5 | import java.io.BufferedWriter; 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | import java.util.Random; 9 | 10 | /** 11 | * @author mh 12 | * @since 13.01.12 13 | */ 14 | @Ignore 15 | public class TestDataGenerator { 16 | 17 | private static final int NODES = 1 * 1000 * 1000; 18 | private static final int RELS_PER_NODE = 50; 19 | private static final String[] TYPES = {"ONE","TWO","THREE","FOUR","FIVE","SIX","SEVEN","EIGHT","NINE","TEN"}; 20 | 21 | public static void main(String...args) throws IOException { 22 | System.out.println("Usage: TestDataGenerator NODES RELS_PER_NODE TYPE1,TYPE2,TYPE3 sorted"); 23 | long relCount=0, time = System.currentTimeMillis(); 24 | 25 | int nodes = args.length > 0 ? Integer.parseInt(args[0]) : NODES; 26 | int relsPerNode = args.length > 1 ? Integer.parseInt(args[1]) : RELS_PER_NODE; 27 | String[] types = args.length > 2 ? args[2].split(",") : TYPES; 28 | final boolean sorted = args.length > 0 && args[args.length-1].equalsIgnoreCase("sorted"); 29 | System.out.println("Using: TestDataGenerator "+nodes+" "+relsPerNode+" "+ Utils.join(types, ",")+" "+(sorted?"sorted":"")); 30 | 31 | BufferedWriter nodeFile = new BufferedWriter(new FileWriter("nodes.csv")); 32 | nodeFile.write("Node\tRels\tProperty\tLabel:label\tCounter:int\n"); 33 | BufferedWriter relFile = new BufferedWriter(new FileWriter("rels.csv")); 34 | relFile.write("Start\tEnde\tType\tProperty\tCounter:long\n"); 35 | 36 | if (sorted) { 37 | relCount = generateSortedRels(relCount, nodeFile, relFile, nodes, relsPerNode, types); 38 | } else { 39 | relCount = generateRandomRels(relCount, nodeFile, relFile, nodes, relsPerNode, types); 40 | } 41 | nodeFile.close(); 42 | relFile.close(); 43 | long seconds = (System.currentTimeMillis() - time) / 1000; 44 | System.out.println("Creating " + nodes + " Nodes and " + relCount + (sorted? " sorted " : "") + " Relationships took " + seconds + " seconds."); 45 | } 46 | 47 | private static long generateRandomRels(long relCount, BufferedWriter nodeFile, BufferedWriter relFile, int nodes, int relsPerNode, String[] types) throws IOException { 48 | Random rnd = new Random(); 49 | int numTypes = types.length; 50 | for (int node = 0; node < nodes; node++) { 51 | final int rels = rnd.nextInt(relsPerNode); 52 | nodeFile.write(node+"\t"+rels+"\tTEST\t"+types[node % numTypes]+"\t"+node+"\n"); 53 | for (int rel = rels; rel >= 0; rel--) { 54 | relCount++; 55 | final int node1 = rnd.nextInt(nodes); 56 | final int node2 = rnd.nextInt(nodes); 57 | relFile.write(node1 + "\t" + node2 + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n"); 58 | } 59 | } 60 | return relCount; 61 | } 62 | private static long generateSortedRels(long relCount, BufferedWriter nodeFile, BufferedWriter relFile, int nodes, int relsPerNode, String[] types) throws IOException { 63 | Random rnd = new Random(); 64 | int numTypes = types.length; 65 | for (int node = 0; node < nodes; node++) { 66 | final int rels = rnd.nextInt(relsPerNode); 67 | nodeFile.write(node+"\t"+rels+"\tTEST\t"+node+"\n"); 68 | for (int rel = rels; rel >= 0; rel--) { 69 | relCount++; 70 | final int target = node + rnd.nextInt(nodes -node); 71 | final boolean outgoing = rnd.nextBoolean(); 72 | if (outgoing) { 73 | relFile.write(node + "\t" + target + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n"); 74 | } else { 75 | relFile.write(target + "\t" + node + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n"); 76 | } 77 | } 78 | } 79 | return relCount; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/TestImporter.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport; 2 | 3 | import org.junit.Ignore; 4 | import org.neo4j.graphdb.RelationshipType; 5 | import org.neo4j.helpers.collection.MapUtil; 6 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider; 7 | import org.neo4j.io.fs.FileUtils; 8 | import org.neo4j.unsafe.batchinsert.BatchInserter; 9 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider; 10 | import org.neo4j.unsafe.batchinsert.BatchInserters; 11 | 12 | import java.io.File; 13 | import java.io.IOException; 14 | import java.util.Map; 15 | import java.util.Random; 16 | 17 | import static org.neo4j.helpers.collection.MapUtil.map; 18 | 19 | @Ignore 20 | public class TestImporter { 21 | public static final int NUM_TYPES = 10; 22 | enum RelTypes implements RelationshipType { 23 | ONE,TWO,THREE,FOUR,FIVE,SIX,SEVEN,EIGHT,NINE,TEN 24 | } 25 | 26 | private static Report report; 27 | private BatchInserter db; 28 | private BatchInserterIndexProvider lucene; 29 | 30 | public TestImporter(File graphDb) throws IOException { 31 | if (!new File("batch.properties").exists()) { 32 | System.out.println("Need a Configuration File"); 33 | return; 34 | } 35 | System.out.println("Using Existing Configuration File"); 36 | 37 | Map config = MapUtil.load(new File("batch.properties")); 38 | 39 | db = createBatchInserter(graphDb, config); 40 | lucene = createIndexProvider(); 41 | report = createReport(); 42 | } 43 | 44 | protected StdOutReport createReport() { 45 | return new StdOutReport(10 * 1000 * 1000, 100); 46 | } 47 | 48 | protected LuceneBatchInserterIndexProvider createIndexProvider() { 49 | return new LuceneBatchInserterIndexProvider(db); 50 | } 51 | 52 | protected BatchInserter createBatchInserter(File graphDb, Map config) throws IOException { 53 | return BatchInserters.inserter(new File(graphDb.getAbsolutePath()), config); 54 | } 55 | 56 | public static void main(String[] args) throws IOException { 57 | if (args.length != 3) { 58 | System.err.println("Usage java -jar batchperformance.jar db-dir #nodes #rels/node"); 59 | } 60 | File graphDb = new File(args[0]); 61 | int nodesCount = Integer.parseInt(args[1]); // 40M 62 | int relsPerNode = Integer.parseInt(args[2]); // 10 63 | 64 | if (graphDb.exists()) { 65 | FileUtils.deleteRecursively(graphDb); 66 | } 67 | // int[] targetNodeIds = createTargetNodeIds(nodesCount); 68 | int[] targetNodeOffsets = createTargetNodeIds(nodesCount); 69 | long time=System.currentTimeMillis(); 70 | TestImporter importer = new TestImporter(graphDb); 71 | try { 72 | importer.createNodes(nodesCount,map("blocked",Boolean.TRUE,"age",42L)); 73 | importer.createRels(nodesCount, relsPerNode, targetNodeOffsets,map("weight",10F)); 74 | } finally { 75 | importer.finish(); 76 | } 77 | System.out.println("Import of "+nodesCount+" nodes took "+(System.currentTimeMillis()-time)+" ms."); 78 | } 79 | 80 | private static int[] createTargetNodeIds(int nodesCount) { 81 | int[] targetNodes = new int[nodesCount]; 82 | Random rnd=new Random(); 83 | for (int i = 0; i < nodesCount; i++) { 84 | targetNodes[i]=Math.abs(rnd.nextInt() % nodesCount); 85 | } 86 | return targetNodes; 87 | } 88 | 89 | private static int[] createTargetNodeOffsets(int relsPerNode) { 90 | int[] targetNodeOffsets = new int[relsPerNode]; 91 | for (int i = 0; i < relsPerNode; i++) { 92 | targetNodeOffsets[i]=1 << 2 * i; 93 | } 94 | return targetNodeOffsets; 95 | } 96 | 97 | public void createRels(int nodesCount, int relsPerNode, int[] targetNodeOffsets, Map props) { 98 | Random rnd = new Random(); 99 | RelTypes[] values = RelTypes.values(); 100 | 101 | report.reset(); 102 | for (int node = 0; node < nodesCount; node++) { 103 | final int rels = relsPerNode; // rnd.nextInt(relsPerNode); 104 | 105 | for (int rel = rels; rel >= 0; rel--) { 106 | // final long node1 = Math.abs(rnd.nextLong() % nodesCount); 107 | // final long node2 = Math.abs(rnd.nextLong() % nodesCount); 108 | // final long node2 = (node + rels +1) % nodesCount; 109 | long node2 = (node + targetNodeOffsets[rel]) % nodesCount; 110 | db.createRelationship(node, node2, RelTypes.ONE, props); // values[rel % NUM_TYPES] 111 | report.dots(); 112 | } 113 | } 114 | report.finishImport("Relationships"); 115 | } 116 | 117 | private void createNodes(long nodesCount, Map props) { 118 | report.reset(); 119 | for (int node = 0; node < nodesCount; node++) { 120 | db.createNode(props); 121 | report.dots(); 122 | } 123 | report.finishImport("Nodes"); 124 | } 125 | 126 | void finish() { 127 | lucene.shutdown(); 128 | db.shutdown(); 129 | report.finish(); 130 | } 131 | 132 | static class StdOutReport implements Report { 133 | private final long batch; 134 | private final long dots; 135 | private long count; 136 | private long total = System.currentTimeMillis(), time, batchTime; 137 | 138 | public StdOutReport(long batch, int dots) { 139 | this.batch = batch; 140 | this.dots = batch / dots; 141 | } 142 | 143 | @Override 144 | public void reset() { 145 | count = 0; 146 | batchTime = time = System.currentTimeMillis(); 147 | } 148 | 149 | @Override 150 | public void finish() { 151 | System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds "); 152 | } 153 | 154 | @Override 155 | public void dots() { 156 | if ((++count % dots) != 0) return; 157 | System.out.print("."); 158 | if ((count % batch) != 0) return; 159 | long now = System.currentTimeMillis(); 160 | System.out.println(" "+ (now - batchTime) + " ms for "+batch); 161 | batchTime = now; 162 | } 163 | 164 | @Override 165 | public void finishImport(String type) { 166 | System.out.println("\nImporting " + count + " " + type + " took " + (System.currentTimeMillis() - time) / 1000 + " seconds "); 167 | } 168 | 169 | @Override 170 | public long getCount() { 171 | return count; 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/ChunkerPerformanceTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Before; 5 | import org.junit.Ignore; 6 | import org.junit.Test; 7 | import org.neo4j.batchimport.importer.RowData; 8 | import org.neo4j.batchimport.utils.Chunker; 9 | 10 | import java.io.*; 11 | 12 | /** 13 | * @author mh 14 | * @since 11.06.13 15 | */ 16 | @Ignore("Performance") 17 | public class ChunkerPerformanceTest { 18 | 19 | @Before 20 | public void setUp() throws Exception { 21 | PerformanceTestFile.createTestFileIfNeeded(); 22 | } 23 | 24 | @Test 25 | public void testPerformance() throws Exception { 26 | final BufferedReader reader = new BufferedReader(new FileReader(PerformanceTestFile.TEST_CSV)); 27 | final Chunker chunker = new Chunker(reader, '\t'); 28 | 29 | int res = 0; 30 | long time = System.currentTimeMillis(); 31 | String token; 32 | while ( (token = chunker.nextWord()) != Chunker.EOF) { 33 | if (token!=Chunker.NO_VALUE && token != Chunker.EOL) res++; 34 | } 35 | time = System.currentTimeMillis() - time; 36 | System.out.println("time = " + time + " ms."); 37 | Assert.assertEquals((PerformanceTestFile.ROWS) * PerformanceTestFile.COLS, res); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/ChunkerRowDataTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.neo4j.batchimport.importer.ChunkerLineData; 6 | 7 | import java.io.StringReader; 8 | import java.util.Map; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | 12 | /** 13 | * @author mh 14 | * @since 29.11.12 15 | */ 16 | public class ChunkerRowDataTest { 17 | 18 | @Test 19 | public void testTrailingEmptyCells() throws Exception { 20 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n\t2\t3"), '\t', 0); 21 | final Map map = rowData.updateMap(); 22 | assertEquals(null,map.get("a")); 23 | assertEquals("2",map.get("b")); 24 | assertEquals("3",map.get("c")); 25 | } 26 | @Test 27 | public void testLeadingAndTrailingEmptyCells() throws Exception { 28 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n\t2\t"), '\t', 0); 29 | final Map map = rowData.updateMap(); 30 | assertEquals(null,map.get("a")); 31 | assertEquals("2",map.get("b")); 32 | assertEquals(null,map.get("c")); 33 | } 34 | @Test 35 | public void testLeadingEmptyCells() throws Exception { 36 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t\t"), '\t', 0); 37 | final Map map = rowData.updateMap(); 38 | assertEquals("1",map.get("a")); 39 | assertEquals(null,map.get("b")); 40 | assertEquals(null,map.get("c")); 41 | } 42 | @Test 43 | public void testEmptyRow() throws Exception { 44 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n"), '\t', 0); 45 | final Map map = rowData.updateMap(); 46 | assertEquals(null,map.get("a")); 47 | assertEquals(null,map.get("b")); 48 | assertEquals(null,map.get("c")); 49 | } 50 | 51 | @Test 52 | public void testLeadOneRow() throws Exception { 53 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t"), '\t', 0); 54 | final Map map = rowData.updateMap(); 55 | assertEquals("1",map.get("a")); 56 | assertEquals(null,map.get("b")); 57 | assertEquals(null,map.get("c")); 58 | } 59 | @Test 60 | public void testLeadTwoRow() throws Exception { 61 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2"), '\t', 0); 62 | 63 | final Map map = rowData.updateMap(); 64 | assertEquals("1",map.get("a")); 65 | assertEquals("2",map.get("b")); 66 | assertEquals(null,map.get("c")); 67 | } 68 | @Test 69 | public void testNormalCells() throws Exception { 70 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2\t3"), '\t', 0); 71 | final Map map = rowData.updateMap(); 72 | assertEquals("1",map.get("a")); 73 | assertEquals("2",map.get("b")); 74 | assertEquals("3",map.get("c")); 75 | } 76 | @Test 77 | public void testHandleNewLines() throws Exception { 78 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a,b,c:int\r\n1,2,3\r\n4,5,6"), ',', 0); 79 | Map map = rowData.updateMap(); 80 | assertEquals("1",map.get("a")); 81 | assertEquals("2",map.get("b")); 82 | assertEquals(3,map.get("c")); 83 | map = rowData.updateMap(); 84 | assertEquals("4",map.get("a")); 85 | assertEquals("5",map.get("b")); 86 | assertEquals(6,map.get("c")); 87 | } 88 | 89 | @Test 90 | public void testNormalWithCommas() throws Exception { 91 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a,b,c\n1,2,3"), ',', 0); 92 | final Map map = rowData.updateMap(); 93 | assertEquals("1",map.get("a")); 94 | assertEquals("2",map.get("b")); 95 | assertEquals("3",map.get("c")); 96 | } 97 | @Test 98 | public void testNormalCellsTwoRows() throws Exception { 99 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2\t3\n4\t5\t6"), '\t', 0); 100 | final Map row1 = rowData.updateMap(); 101 | assertEquals("1", row1.get("a")); 102 | assertEquals("2", row1.get("b")); 103 | assertEquals("3",row1.get("c")); 104 | final Map row2 = rowData.updateMap(); 105 | assertEquals("4", row2.get("a")); 106 | assertEquals("5", row2.get("b")); 107 | assertEquals("6",row2.get("c")); 108 | } 109 | 110 | @Test 111 | public void testConvert() throws Exception { 112 | ChunkerLineData rowData = new ChunkerLineData(new StringReader("a:int\tb:float\tc:boolean"+"\n"+"1\t2.1\ttrue"), '\t', 0); 113 | final Map map = rowData.updateMap(); 114 | assertEquals(1,map.get("a")); 115 | assertEquals(2.1F,map.get("b")); 116 | assertEquals(true,map.get("c")); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/CsvLineDataTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import org.neo4j.batchimport.importer.CsvLineData; 6 | 7 | import java.io.StringReader; 8 | import java.util.Map; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | 12 | /** 13 | * @author mh 14 | * @since 29.11.12 15 | */ 16 | public class CsvLineDataTest { 17 | 18 | @Test 19 | public void testInvalidConversion() throws Exception { 20 | try { 21 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb:int\tc\n2\tfoo\t3"), '\t', 0); 22 | rowData.updateMap(); 23 | Assert.fail("Expected conversion exception"); 24 | } catch(RuntimeException e) { 25 | assertEquals(true,e.getMessage().contains("row 1")); 26 | assertEquals(true,e.getMessage().contains("foo")); 27 | assertEquals(true,e.getMessage().contains("1. b")); 28 | assertEquals(true,e.getMessage().contains("type: INT")); 29 | assertEquals(true,e.getMessage().contains("NumberFormatException")); 30 | } 31 | } 32 | 33 | @Test 34 | public void testTrailingEmptyCells() throws Exception { 35 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n\t2\t3"), '\t', 0); 36 | final Map map = rowData.updateMap(); 37 | assertEquals(null,map.get("a")); 38 | assertEquals("2",map.get("b")); 39 | assertEquals("3",map.get("c")); 40 | } 41 | @Test 42 | public void testLeadingAndTrailingEmptyCells() throws Exception { 43 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n\t2\t"), '\t', 0); 44 | final Map map = rowData.updateMap(); 45 | assertEquals(null,map.get("a")); 46 | assertEquals("2",map.get("b")); 47 | assertEquals(null,map.get("c")); 48 | } 49 | @Test 50 | public void testLeadingEmptyCells() throws Exception { 51 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t\t"), '\t', 0); 52 | final Map map = rowData.updateMap(); 53 | assertEquals("1",map.get("a")); 54 | assertEquals(null,map.get("b")); 55 | assertEquals(null,map.get("c")); 56 | } 57 | @Test 58 | public void testEmptyRow() throws Exception { 59 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n"), '\t', 0); 60 | final Map map = rowData.updateMap(); 61 | assertEquals(null,map.get("a")); 62 | assertEquals(null,map.get("b")); 63 | assertEquals(null,map.get("c")); 64 | } 65 | 66 | @Test 67 | public void testLeadOneRow() throws Exception { 68 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t"), '\t', 0); 69 | final Map map = rowData.updateMap(); 70 | assertEquals("1",map.get("a")); 71 | assertEquals(null,map.get("b")); 72 | assertEquals(null,map.get("c")); 73 | } 74 | @Test 75 | public void testLeadTwoRow() throws Exception { 76 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t2"), '\t', 0); 77 | 78 | final Map map = rowData.updateMap(); 79 | assertEquals("1",map.get("a")); 80 | assertEquals("2",map.get("b")); 81 | assertEquals(null,map.get("c")); 82 | } 83 | @Test 84 | public void testNormalCells() throws Exception { 85 | CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t2\t3"), '\t', 0); 86 | final Map map = rowData.updateMap(); 87 | assertEquals("1",map.get("a")); 88 | assertEquals("2",map.get("b")); 89 | assertEquals("3",map.get("c")); 90 | } 91 | 92 | @Test 93 | public void testQuotedHeader() throws Exception { 94 | CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n1\t2\t3"), '\t', 0); 95 | final Map map = rowData.updateMap(); 96 | assertEquals("1",map.get("a")); 97 | assertEquals("2",map.get("b")); 98 | assertEquals("3",map.get("c")); 99 | } 100 | @Test 101 | public void testQuotedValue() throws Exception { 102 | CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n\"1\"\t2\t3"), '\t', 0); 103 | final Map map = rowData.updateMap(); 104 | assertEquals("1",map.get("a")); 105 | assertEquals("2",map.get("b")); 106 | assertEquals("3",map.get("c")); 107 | } 108 | 109 | @Test 110 | public void testQuotedValueWithNewline() throws Exception { 111 | CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n\"1\n2\"\t2\t3"), '\t', 0); 112 | final Map map = rowData.updateMap(); 113 | assertEquals("1\n2",map.get("a")); 114 | assertEquals("2",map.get("b")); 115 | assertEquals("3",map.get("c")); 116 | } 117 | 118 | @Test 119 | public void testQuotedValueWithNewlineAndCommas() throws Exception { 120 | CsvLineData rowData = new CsvLineData(new StringReader("\"a\",b,c\n\"1\n2\",2,3"), ',', 0); 121 | final Map map = rowData.updateMap(); 122 | assertEquals("1\n2",map.get("a")); 123 | assertEquals("2",map.get("b")); 124 | assertEquals("3",map.get("c")); 125 | } 126 | 127 | @Test 128 | public void testConvert() throws Exception { 129 | CsvLineData rowData = new CsvLineData(new StringReader("a:int\tb:float\tc:boolean"+"\n"+"1\t2.1\ttrue"), '\t', 0); 130 | final Map map = rowData.updateMap(); 131 | assertEquals(1,map.get("a")); 132 | assertEquals(2.1F,map.get("b")); 133 | assertEquals(true,map.get("c")); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/OpenCSVPerformanceTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import au.com.bytecode.opencsv.CSVReader; 4 | import org.junit.Assert; 5 | import org.junit.Before; 6 | import org.junit.Ignore; 7 | import org.junit.Test; 8 | 9 | import java.io.*; 10 | 11 | import static org.neo4j.batchimport.csv.PerformanceTestFile.*; 12 | 13 | /** 14 | * @author mh 15 | * @since 11.06.13 16 | */ 17 | @Ignore("Performance") 18 | public class OpenCSVPerformanceTest { 19 | 20 | @Before 21 | public void setUp() throws Exception { 22 | createTestFileIfNeeded(); 23 | } 24 | 25 | @Test 26 | public void testReadLineWithCommaSeparator() throws Exception { 27 | final BufferedReader reader = new BufferedReader(new FileReader(TEST_CSV)); 28 | final CSVReader csvReader = new CSVReader(reader,'\t','"'); 29 | 30 | int res = 0; 31 | long time = System.currentTimeMillis(); 32 | String[] line = null; 33 | while ((line = csvReader.readNext()) != null) { 34 | res += line.length; 35 | } 36 | time = System.currentTimeMillis() - time; 37 | System.out.println("time = " + time + " ms."); 38 | Assert.assertEquals(ROWS * COLS, res); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/OpenCSVTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import au.com.bytecode.opencsv.CSVReader; 4 | import org.junit.Assert; 5 | import org.junit.Test; 6 | 7 | import java.io.IOException; 8 | import java.io.StringReader; 9 | 10 | /** 11 | * @author mh 12 | * @since 11.06.13 13 | */ 14 | public class OpenCSVTest { 15 | 16 | @Test 17 | public void testReadLineWithCommaSeparator() throws Exception { 18 | final StringReader headerWithLine = new StringReader("a,b\n1,42"); 19 | assertReadFile(new CSVReader(headerWithLine), "42"); 20 | } 21 | @Test 22 | public void testReadLineWithTabSeparator() throws Exception { 23 | final StringReader headerWithLine = new StringReader("a\tb\n1\t42"); 24 | assertReadFile(new CSVReader(headerWithLine,'\t'), "42"); 25 | } 26 | @Test 27 | public void testReadLineWithTabSeparatorAndDoubleQuotes() throws Exception { 28 | final StringReader headerWithLine = new StringReader("a\t\"b\"\n1\t\"42\""); 29 | assertReadFile(new CSVReader(headerWithLine,'\t','"'), "42"); 30 | } 31 | 32 | @Test 33 | public void testReadLineWithTabSeparatorAndDoubleQuotesWithNewlineInValue() throws Exception { 34 | final StringReader headerWithLine = new StringReader("a\t\"b\"\n1\t\"4\n2\""); 35 | assertReadFile(new CSVReader(headerWithLine,'\t','"'), "4\n2"); 36 | } 37 | 38 | private void assertReadFile(CSVReader reader, final String value2) throws IOException { 39 | final String[] header = reader.readNext(); 40 | Assert.assertArrayEquals(new String[]{"a", "b"}, header); 41 | final String[] line = reader.readNext(); 42 | Assert.assertArrayEquals(new String[]{"1", value2}, line); 43 | Assert.assertNull("EOF", reader.readNext()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/PerformanceTestFile.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | 8 | /** 9 | * @author mh 10 | * @since 11.06.13 11 | */ 12 | public class PerformanceTestFile { 13 | public final static int ROWS = 1 * 1000 * 1000; 14 | public final static int COLS = 30; 15 | static final String TEST_CSV = "target/test.csv"; 16 | 17 | public static void createTestFile() throws IOException { 18 | final BufferedWriter writer = new BufferedWriter(new FileWriter(TEST_CSV)); 19 | for (int row = 0; row < ROWS; row++) { 20 | for (int col = 0; col < COLS; col++) { 21 | if (col > 0) writer.write('\t'); 22 | writer.write("\"" + String.valueOf(row * col) + "\""); 23 | } 24 | writer.write('\n'); 25 | } 26 | writer.close(); 27 | } 28 | 29 | static void createTestFileIfNeeded() throws IOException { 30 | if (new File(TEST_CSV).exists()) return; 31 | createTestFile(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/RowDataPerformanceTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import au.com.bytecode.opencsv.CSVReader; 4 | import org.junit.Assert; 5 | import org.junit.Before; 6 | import org.junit.Ignore; 7 | import org.junit.Test; 8 | import org.neo4j.batchimport.importer.RowData; 9 | import static org.neo4j.batchimport.csv.PerformanceTestFile.*; 10 | 11 | import java.io.*; 12 | 13 | /** 14 | * @author mh 15 | * @since 11.06.13 16 | */ 17 | @Ignore("Performance") 18 | public class RowDataPerformanceTest { 19 | @Before 20 | public void setUp() throws Exception { 21 | createTestFileIfNeeded(); 22 | } 23 | 24 | @Test 25 | public void testPerformance() throws Exception { 26 | final BufferedReader reader = new BufferedReader(new FileReader(TEST_CSV)); 27 | final RowData rowData = new RowData(reader.readLine(), "\t", 0); 28 | 29 | int res = 0; 30 | long time = System.currentTimeMillis(); 31 | String line; 32 | while ((line = reader.readLine()) != null) { 33 | rowData.processLine(line); 34 | res += rowData.getColumnCount(); 35 | } 36 | time = System.currentTimeMillis() - time; 37 | System.out.println("time = " + time + " ms."); 38 | Assert.assertEquals((ROWS-1) * COLS, res); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/RowDataTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import org.junit.Test; 4 | import org.neo4j.batchimport.importer.RowData; 5 | 6 | import java.util.Map; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | /** 11 | * @author mh 12 | * @since 29.11.12 13 | */ 14 | public class RowDataTest { 15 | 16 | private final RowData rowData = new RowData("a\tb\tc", "\t", 0); 17 | 18 | @Test 19 | public void testTrailingEmptyCells() throws Exception { 20 | final Map map = rowData.updateMap("\t2\t3"); 21 | assertEquals(null,map.get("a")); 22 | assertEquals("2",map.get("b")); 23 | assertEquals("3",map.get("c")); 24 | } 25 | @Test 26 | public void testLeadingAndTrailingEmptyCells() throws Exception { 27 | final Map map = rowData.updateMap("\t2\t"); 28 | assertEquals(null,map.get("a")); 29 | assertEquals("2",map.get("b")); 30 | assertEquals(null,map.get("c")); 31 | } 32 | @Test 33 | public void testLeadingEmptyCells() throws Exception { 34 | final Map map = rowData.updateMap("1\t\t"); 35 | assertEquals("1",map.get("a")); 36 | assertEquals(null,map.get("b")); 37 | assertEquals(null,map.get("c")); 38 | } 39 | @Test 40 | public void testEmptyRow() throws Exception { 41 | final Map map = rowData.updateMap(""); 42 | assertEquals(null,map.get("a")); 43 | assertEquals(null,map.get("b")); 44 | assertEquals(null,map.get("c")); 45 | } 46 | 47 | @Test 48 | public void testLeadOneRow() throws Exception { 49 | final Map map = rowData.updateMap("1\t"); 50 | assertEquals("1",map.get("a")); 51 | assertEquals(null,map.get("b")); 52 | assertEquals(null,map.get("c")); 53 | } 54 | @Test 55 | public void testLeadTwoRow() throws Exception { 56 | final Map map = rowData.updateMap("1\t2"); 57 | assertEquals("1",map.get("a")); 58 | assertEquals("2",map.get("b")); 59 | assertEquals(null,map.get("c")); 60 | } 61 | @Test 62 | public void testNormalCells() throws Exception { 63 | final Map map = rowData.updateMap("1\t2\t3"); 64 | assertEquals("1",map.get("a")); 65 | assertEquals("2",map.get("b")); 66 | assertEquals("3",map.get("c")); 67 | } 68 | 69 | @Test 70 | public void testConvert() throws Exception { 71 | final RowData rowData = new RowData("a:int\tb:float\tc:boolean", "\t", 0); 72 | final Map map = rowData.updateMap("1\t2.1\ttrue"); 73 | assertEquals(1,map.get("a")); 74 | assertEquals(2.1F,map.get("b")); 75 | assertEquals(true,map.get("c")); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/csv/StreamTokenizerTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.csv; 2 | 3 | import org.junit.Test; 4 | import org.neo4j.batchimport.utils.Chunker; 5 | 6 | import java.io.*; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | import static org.junit.Assert.assertTrue; 10 | 11 | /** 12 | * @author mh 13 | * @since 13.11.12 14 | */ 15 | public class StreamTokenizerTest { 16 | 17 | String file = "FROM\tTO\tTYPE\tNAME\tAGE:INT\n" 18 | +"1\t2\tKNOWS\tFOO\t42\n" 19 | +"1\t2\tKNOWS\t\t42" 20 | ; 21 | 22 | @Test 23 | public void testReadHeader() throws Exception { 24 | final BufferedReader reader = new BufferedReader(new StringReader(file)); 25 | final String[] header = reader.readLine().split("\t"); 26 | final Chunker chunker = new Chunker(reader, '\t'); 27 | readLine(header, chunker, "FOO", "42"); 28 | readLine(header, chunker, "", "42"); 29 | assertEquals(Chunker.EOF,chunker.nextWord()); 30 | } 31 | 32 | private void readLine(String[] header, Chunker st, Object...values) throws IOException { 33 | long from = Long.parseLong(st.nextWord()); 34 | assertEquals(1,from); 35 | long to = Long.parseLong(st.nextWord()); 36 | assertEquals(2,to); 37 | String type = st.nextWord(); 38 | assertEquals("KNOWS", type); 39 | 40 | for (int i = 3; i < header.length; i++) { 41 | assertEquals(header[i], values[i - 3], st.nextWord()); 42 | } 43 | String token = st.nextWord(); 44 | assertTrue(Chunker.EOL==token || Chunker.EOF==token); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/importer/AbstractLineDataTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.importer; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.StringReader; 6 | import java.util.Collections; 7 | 8 | import static org.junit.Assert.*; 9 | import static org.junit.Assert.assertArrayEquals; 10 | import static org.neo4j.helpers.collection.MapUtil.map; 11 | 12 | /** 13 | * Created by mh on 26.08.13. 14 | */ 15 | public class AbstractLineDataTest { 16 | 17 | @Test 18 | public void testLabelNamedHeaderIsNotTreatedDifferently() throws Exception { 19 | StringReader reader = new StringReader("label\nfoo"); 20 | CsvLineData data = new CsvLineData(reader, '\t', 0); 21 | assertTrue(data.readLine()); 22 | assertEquals(Type.STRING,data.getHeader()[0].type); 23 | assertEquals("foo",data.getValue(0)); 24 | } 25 | 26 | @Test 27 | public void testIdIsHandledCorrectly() throws Exception { 28 | StringReader reader = new StringReader("id:id\n123"); 29 | CsvLineData data = new CsvLineData(reader, '\t', 0); 30 | assertTrue(data.processLine("")); 31 | assertEquals(Type.ID,data.getHeader()[0].type); 32 | assertEquals(123L,data.getValue(0)); 33 | assertEquals(Collections.emptyMap(),data.getProperties()); 34 | } 35 | 36 | @Test 37 | public void testLabelTypedHeaderHandledAsLabel() throws Exception { 38 | StringReader reader = new StringReader("label:label\nfoo"); 39 | CsvLineData data = new CsvLineData(reader, '\t', 0); 40 | assertTrue(data.processLine(null)); 41 | assertEquals(Type.LABEL, data.getHeader()[0].type); 42 | assertArrayEquals(new String[]{"foo"}, (Object[]) data.getValue(0)); 43 | assertArrayEquals(new String[]{"foo"}, data.getTypeLabels()); 44 | } 45 | @Test 46 | public void testFileWithLabelHasCorrectProperties() throws Exception { 47 | StringReader reader = new StringReader("prop\tlabel:label\nbar\tfoo"); 48 | CsvLineData data = new CsvLineData(reader, '\t', 0); 49 | assertTrue(data.processLine(null)); 50 | assertEquals(Type.LABEL, data.getHeader()[1].type); 51 | assertArrayEquals(new String[]{"foo"}, (Object[]) data.getValue(1)); 52 | assertArrayEquals(new String[]{"foo"}, data.getTypeLabels()); 53 | assertEquals(map("prop", "bar"), data.getProperties()); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/ChunkerTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.StringReader; 6 | 7 | import static org.junit.Assert.assertEquals; 8 | import static org.junit.Assert.assertSame; 9 | 10 | public class ChunkerTest { 11 | @Test 12 | public void testEmptyFile() throws Exception { 13 | Chunker chunker = newChunker(""); 14 | assertEquals(Chunker.EOF, chunker.nextWord()); 15 | } 16 | @Test 17 | public void testEmptyField() throws Exception { 18 | Chunker chunker = newChunker("\t"); 19 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 20 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 21 | assertEquals(Chunker.EOF, chunker.nextWord()); 22 | } 23 | 24 | @Test 25 | public void testEmptyFieldWithNewline() throws Exception { 26 | Chunker chunker = newChunker("\t\n"); 27 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 28 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 29 | assertEquals(Chunker.EOL, chunker.nextWord()); 30 | assertEquals(Chunker.EOF, chunker.nextWord()); 31 | } 32 | 33 | @Test 34 | public void testEmptyLine() throws Exception { 35 | Chunker chunker = newChunker("\n"); 36 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 37 | assertEquals(Chunker.EOL, chunker.nextWord()); 38 | assertEquals(Chunker.EOF, chunker.nextWord()); 39 | } 40 | @Test 41 | public void testLineWithFields() throws Exception { 42 | Chunker chunker = newChunker("a\tb\n"); 43 | assertEquals("a", chunker.nextWord()); 44 | assertEquals("b", chunker.nextWord()); 45 | assertEquals(Chunker.EOL, chunker.nextWord()); 46 | assertSame(Chunker.EOF, chunker.nextWord()); 47 | } 48 | @Test 49 | public void testUtf8() throws Exception { 50 | Chunker chunker = newChunker("ä\tá\n"); 51 | assertEquals("ä", chunker.nextWord()); 52 | assertEquals("á", chunker.nextWord()); 53 | assertEquals(Chunker.EOL, chunker.nextWord()); 54 | assertSame(Chunker.EOF, chunker.nextWord()); 55 | } 56 | 57 | @Test 58 | public void testLineWithEmptyField() throws Exception { 59 | Chunker chunker = newChunker("a\t\tb\n"); 60 | assertEquals("a", chunker.nextWord()); 61 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 62 | assertEquals("b", chunker.nextWord()); 63 | assertEquals(Chunker.EOL, chunker.nextWord()); 64 | assertSame(Chunker.EOF, chunker.nextWord()); 65 | } 66 | @Test 67 | public void testLineWithOnlyEmptyFields() throws Exception { 68 | Chunker chunker = newChunker("\t\t\t\n"); 69 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 70 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 71 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 72 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 73 | assertEquals(Chunker.EOL, chunker.nextWord()); 74 | assertSame(Chunker.EOF, chunker.nextWord()); 75 | } 76 | 77 | @Test 78 | public void testLineWithEmptyLines() throws Exception { 79 | Chunker chunker = newChunker("a\t\n\nb\n"); 80 | assertEquals("a", chunker.nextWord()); 81 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 82 | assertEquals(Chunker.EOL, chunker.nextWord()); 83 | assertEquals(Chunker.NO_VALUE, chunker.nextWord()); 84 | assertEquals(Chunker.EOL, chunker.nextWord()); 85 | assertEquals("b", chunker.nextWord()); 86 | assertEquals(Chunker.EOL, chunker.nextWord()); 87 | assertSame(Chunker.EOF, chunker.nextWord()); 88 | } 89 | 90 | 91 | 92 | private Chunker newChunker(String lines) { 93 | return new Chunker(new StringReader(lines), '\t'); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/ConfigTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Before; 4 | import org.junit.Test; 5 | 6 | import java.io.File; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.StringWriter; 10 | import java.util.Collection; 11 | 12 | import static org.junit.Assert.assertEquals; 13 | 14 | public class ConfigTest { 15 | 16 | private final File testConfigFile; 17 | private final File nodesFile = createTempFile("nodes", "csv"); 18 | private final File relsFile = createTempFile("rels", "csv"); 19 | 20 | private File createTempFile(String prefix, String suffix) { 21 | final File tempFile; 22 | try { 23 | tempFile = File.createTempFile(prefix, "." + suffix, new File("target")); 24 | tempFile.deleteOnExit(); 25 | return tempFile; 26 | } catch (IOException e) { 27 | throw new RuntimeException(e); 28 | } 29 | } 30 | 31 | public ConfigTest() throws IOException { 32 | testConfigFile = createTempFile("test", "properties"); 33 | FileWriter fileWriter = new FileWriter(testConfigFile); 34 | fileWriter.write(Config.ARRAY_SEPARATOR_CONFIG+"=|"); 35 | fileWriter.close(); 36 | } 37 | 38 | @Before 39 | public void setUp() throws Exception { 40 | 41 | } 42 | 43 | // final String[] args = "data/dir nodes.csv relationships.csv [node_index node-index-name fulltext|exact nodes_index.csv rel_index rel-index-name fulltext|exact rels_index.csv ....]".split(" "); 44 | 45 | 46 | 47 | @Test 48 | public void testExtractDatabaseDir() throws Exception { 49 | assertCommandLine("data/dir", 50 | "data/dir", Config.BATCH_IMPORT_GRAPH_DB); 51 | } 52 | 53 | @Test 54 | public void testToFiles() throws Exception { 55 | final Collection files = Config.toFiles("null,,foo," + nodesFile.getAbsolutePath()); 56 | assertEquals(1,files.size()); 57 | assertEquals(nodesFile.getAbsolutePath(),files.iterator().next().getAbsolutePath()); 58 | 59 | } 60 | 61 | @Test 62 | public void testExtractNodesFiles() throws Exception { 63 | assertCommandLine("data/dir "+nodesFile.getAbsolutePath(), 64 | nodesFile.getAbsolutePath(), Config.BATCH_IMPORT_NODES_FILES); 65 | } 66 | 67 | @Test 68 | public void testExtractRelsFiles() throws Exception { 69 | assertCommandLine("data/dir "+nodesFile.getAbsolutePath()+" "+relsFile.getAbsolutePath(), 70 | relsFile.getAbsolutePath(), Config.BATCH_IMPORT_RELS_FILES); 71 | } 72 | 73 | @Test 74 | public void testExtractExactNodeIndexFile() throws Exception { 75 | assertCommandLine("data/dir nodes.csv rels.csv node_index index-name exact node_index.csv", 76 | "exact:node_index.csv", Config.NODE_INDEX("index-name")); 77 | } 78 | @Test 79 | public void testExtractFulltextNodeIndexFile() throws Exception { 80 | assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext node_index.csv", 81 | "fulltext:node_index.csv", Config.NODE_INDEX("index-name")); 82 | } 83 | @Test 84 | public void testExtractExactNodeIndex() throws Exception { 85 | assertCommandLine("data/dir nodes.csv rels.csv node_index index-name exact", 86 | "exact", Config.NODE_INDEX("index-name")); 87 | } 88 | @Test 89 | public void testExtractFulltextNodeIndex() throws Exception { 90 | assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext", 91 | "fulltext", Config.NODE_INDEX("index-name")); 92 | } 93 | 94 | @Test 95 | public void testCustomArraySeparator() throws Exception { 96 | assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext", 97 | "|", Config.ARRAY_SEPARATOR_CONFIG); 98 | Config.ARRAYS_SEPARATOR =","; 99 | } 100 | 101 | @Test 102 | public void testExtractExactRelsIndexFile() throws Exception { 103 | assertCommandLine("data/dir nodes.csv rels.csv relationship_index index-name exact rels_index.csv", 104 | "exact:rels_index.csv", Config.RELATIONSHIP_INDEX("index-name")); 105 | } 106 | 107 | @Test 108 | public void testExtractExactRelsIndex() throws Exception { 109 | assertCommandLine("data/dir nodes.csv rels.csv relationship_index index-name exact", 110 | "exact", Config.RELATIONSHIP_INDEX("index-name")); 111 | } 112 | 113 | @Test(expected = IllegalArgumentException.class) 114 | public void testFailsOnNoArguments() throws Exception { 115 | assertCommandLine("",null,null); 116 | } 117 | 118 | private void assertCommandLine(String arguments, String expected, String optionName) { 119 | final String configFileName = testConfigFile.getAbsolutePath(); 120 | final String[] args = (configFileName + " " +arguments).split(" "); 121 | final Config config = Config.convertArgumentsToConfig(args); 122 | assertEquals(expected, config.get(optionName)); 123 | } 124 | 125 | } -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/FileIteratorTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Test; 4 | import org.neo4j.helpers.collection.Iterators; 5 | 6 | import java.io.FileWriter; 7 | import java.io.IOException; 8 | 9 | import static org.junit.Assert.assertEquals; 10 | import static org.junit.Assert.assertTrue; 11 | import static org.neo4j.batchimport.utils.FileIterator.DELIM; 12 | 13 | /** 14 | * @author Michael Hunger @since 04.11.13 15 | */ 16 | public class FileIteratorTest { 17 | 18 | public static final int LINES = 10; 19 | public static final int LINES_1M = 1_000_000; 20 | 21 | @Test 22 | public void testReadLines() throws Exception { 23 | String fileName = writeFile(LINES); 24 | 25 | int count = 0; 26 | FileIterator it = new FileIterator(fileName); 27 | while (it.hasNext()) { 28 | FileIterator.Line line = it.next(); 29 | assertEquals(count, line.lineNo); 30 | assertEquals(""+start(count)+ DELIM+end(count)+ DELIM, line.line); 31 | count++; 32 | } 33 | assertEquals(false,it.hasNext()); 34 | assertEquals(LINES, count); 35 | } 36 | 37 | private String writeFile(int lines) throws IOException { 38 | String fileName = "target/FileIteratorTest.txt"; 39 | FileWriter writer = new FileWriter(fileName); 40 | for (int i=0;i< lines;i++) { 41 | writer.write(String.format("%d%s%d%s%n", start(i),DELIM, end(i),DELIM)); 42 | } 43 | writer.close(); 44 | return fileName; 45 | } 46 | 47 | private int start(int i) { 48 | return i; 49 | } 50 | 51 | private int end(int i) { 52 | return i+10-i%20; 53 | } 54 | 55 | @Test 56 | public void testPerformance() throws Exception { 57 | String file = writeFile(LINES_1M); 58 | FileIterator reader = new FileIterator(file); 59 | long time = System.currentTimeMillis(); 60 | int count = (int) Iterators.count(reader); 61 | long delta = System.currentTimeMillis() - time; 62 | System.out.println("delta = " + delta); 63 | assertTrue("timeout "+delta+" > 1000 ms", delta < 1000); 64 | assertEquals(LINES_1M, count); 65 | reader.close(); 66 | 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/ParamsTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | /** 9 | * @author mh 10 | * @since 02.11.12 11 | */ 12 | public class ParamsTest { 13 | Params params = new Params("foo bar","file","42"); 14 | 15 | @Test 16 | public void testInvalid() throws Exception { 17 | assertEquals(false,params.invalid()); 18 | assertEquals(true,new Params("foo bar", "file").invalid()); 19 | } 20 | 21 | @Test 22 | public void testLength() throws Exception { 23 | assertEquals(2,params.length()); 24 | } 25 | 26 | @Test 27 | public void testToString() throws Exception { 28 | assertEquals("foo bar",params.toString()); 29 | } 30 | 31 | @Test 32 | public void testFile() throws Exception { 33 | assertEquals("file",params.file("foo").getPath()); 34 | } 35 | 36 | @Test 37 | public void testLongValue() throws Exception { 38 | assertEquals(42L,params.longValue("bar")); 39 | } 40 | @Test 41 | public void testIntValue() throws Exception { 42 | assertEquals(42,params.intValue("bar")); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/RelStartEndComparatorTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | import static org.neo4j.batchimport.utils.FileIterator.Line.from; 8 | 9 | /** 10 | * @author Michael Hunger @since 04.11.13 11 | */ 12 | public class RelStartEndComparatorTest { 13 | 14 | private final FileIterator.RelStartEndComparator comparator = new FileIterator.RelStartEndComparator(); 15 | 16 | @Test 17 | public void testCompareMinStartEnd() throws Exception { 18 | Assert.assertEquals(-1, comparator.compare(from(1, "1\t2\t"), from(2, "1\t2\t"))); 19 | Assert.assertEquals(1, comparator.compare(from(2, "1\t2\t"), from(1, "1\t2\t"))); 20 | 21 | assertEquals(0, "1\t2\t", "1\t2\t"); 22 | assertEquals(0, "2\t1\t", "1\t2\t"); 23 | assertEquals(-1,"2\t1\t", "1\t3\t"); 24 | assertEquals(-1, "1\t2\t", "3\t4\t"); 25 | assertEquals(1, "3\t1\t", "1\t2\t"); 26 | assertEquals(1, "3\t4\t", "1\t2\t"); 27 | } 28 | 29 | private void assertEquals(int expected, String line1, String line2) { 30 | Assert.assertEquals(expected, comparator.compare(from(1, line1), from(1, line2))); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/org/neo4j/batchimport/utils/RelationshipSorterTest.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.batchimport.utils; 2 | 3 | import org.junit.Test; 4 | import org.neo4j.helpers.collection.Iterators; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.FileReader; 8 | import java.io.FileWriter; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.neo4j.batchimport.utils.FileIterator.DELIM; 12 | 13 | /** 14 | * @author Michael Hunger @since 04.11.13 15 | */ 16 | //@Ignore("Doesn't work because 'equal' lines are squashed in mapdb, idea add line # as last criterium") 17 | public class RelationshipSorterTest { 18 | private static final int LINES = 10; 19 | 20 | @Test 21 | public void testSortRelationshipFile() throws Exception { 22 | String fileName = "target/RelationshipSorterTest.txt"; 23 | String targetFile = fileName + "_sorted"; 24 | 25 | int written=1; 26 | FileWriter writer = new FileWriter(fileName); 27 | writer.write("" +"start" + DELIM + "end" + DELIM + '\n'); 28 | for (int i = LINES - 1; i >= 0; i--) { 29 | for (int j = LINES - 1; j >= 0; j--) { 30 | String line = "" +i + DELIM + j + DELIM + '\n'; 31 | writer.write(line); 32 | written++; 33 | } 34 | } 35 | writer.close(); 36 | assertEquals(written, Iterators.count(new FileIterator(fileName))); 37 | 38 | RelationshipSorter.main(fileName, targetFile); 39 | BufferedReader reader = new BufferedReader(new FileReader(targetFile)); 40 | String line = null; 41 | String[] last = null; 42 | int count = 1; 43 | reader.readLine(); 44 | while ((line = reader.readLine()) != null) { 45 | String[] parts = line.split("\t"); 46 | if (last!=null) { 47 | String msg = String.format("last min(%s,%s) < curr min(%s,%s)", last[0], last[1], parts[0], parts[1]); 48 | assertEquals(msg, true, Math.min(Integer.parseInt(last[0]),Integer.parseInt(last[1])) <= Math.min(Integer.parseInt(parts[0]),Integer.parseInt(parts[1]))); 49 | } 50 | last = parts; 51 | count++; 52 | } 53 | assertEquals(written,count); 54 | } 55 | } 56 | --------------------------------------------------------------------------------