├── .gitignore
├── NOTICE.txt
├── batch.properties
├── changelog.txt
├── generate.sh
├── import-mvn.sh
├── import.bat
├── import.sh
├── import_csv.sh
├── pom.xml
├── readme.md
├── run.sh
├── sample
    ├── batch.properties
    ├── import.sh
    ├── nodes.csv
    ├── nodes2.csv
    └── rels.csv
├── settings.sh
├── sort.sh
└── src
    ├── main
        ├── java
        │   └── org
        │   │   └── neo4j
        │   │       └── batchimport
        │   │           ├── CSVParser.java
        │   │           ├── CSVReader.java
        │   │           ├── Importer.java
        │   │           ├── IndexInfo.java
        │   │           ├── LineData.java
        │   │           ├── Report.java
        │   │           ├── StdOutReport.java
        │   │           ├── Utils.java
        │   │           ├── importer
        │   │               ├── AbstractLineData.java
        │   │               ├── ChunkerLineData.java
        │   │               ├── CsvLineData.java
        │   │               ├── RelType.java
        │   │               ├── RowData.java
        │   │               └── Type.java
        │   │           ├── index
        │   │               ├── LongIterableIndexHits.java
        │   │               └── MapDbCachingIndexProvider.java
        │   │           └── utils
        │   │               ├── Chunker.java
        │   │               ├── Config.java
        │   │               ├── FileIterator.java
        │   │               ├── Params.java
        │   │               ├── RelationshipSorter.java
        │   │               └── RelationshipSorter2.java
        └── resources
        │   └── log4j.properties
    └── test
        └── java
            ├── DataTest.java
            └── org
                └── neo4j
                    └── batchimport
                        ├── ImporterIntegrationTest.java
                        ├── ImporterTest.java
                        ├── IndexInfoTest.java
                        ├── RelationshipMatcher.java
                        ├── TestDataGenerator.java
                        ├── TestImporter.java
                        ├── csv
                            ├── ChunkerPerformanceTest.java
                            ├── ChunkerRowDataTest.java
                            ├── CsvLineDataTest.java
                            ├── OpenCSVPerformanceTest.java
                            ├── OpenCSVTest.java
                            ├── PerformanceTestFile.java
                            ├── RowDataPerformanceTest.java
                            ├── RowDataTest.java
                            └── StreamTokenizerTest.java
                        ├── importer
                            └── AbstractLineDataTest.java
                        └── utils
                            ├── ChunkerTest.java
                            ├── ConfigTest.java
                            ├── FileIteratorTest.java
                            ├── ParamsTest.java
                            ├── RelStartEndComparatorTest.java
                            └── RelationshipSorterTest.java


/.gitignore:
--------------------------------------------------------------------------------
 1 | zip.sh
 2 | *.db/
 3 | lib/
 4 | *.tsv
 5 | .project
 6 | .shell_history
 7 | *.ipr
 8 | *.iws
 9 | *.iml
10 | .idea
11 | target
12 | *.csv
13 | .DS_Store
14 | .settings
15 | *.gz
16 | gc.log
17 | *.zip
18 | zip19.sh


--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
 1 | Neo4j
 2 | Copyright © 2002-2011 Network Engine for Objects in Lund AB (referred to
 3 | in this notice as “Neo Technology”)
 4 |    [http://neotechnology.com]
 5 | 
 6 | This product includes software ("Software") developed by Neo Technology.
 7 | 
 8 | The copyright in the bundled Neo4j graph database (including the
 9 | Software) is owned by Neo Technology. The Software developed and owned
10 | by Neo Technology is licensed under the GNU GENERAL PUBLIC LICENSE
11 | Version 3 (http://www.fsf.org/licensing/licenses/gpl-3.0.html) ("GPL")
12 | to all third parties and that license, as required by the GPL, is
13 | included in the LICENSE.txt file.
14 | 
15 | However, if you have executed an End User Software License and Services
16 | Agreement or an OEM Software License and Support Services Agreement, or
17 | another commercial license agreement with Neo Technology or one of its
18 | affiliates (each, a "Commercial Agreement"), the terms of the license in
19 | such Commercial Agreement will supersede the GPL and you may use the
20 | software solely pursuant to the terms of the relevant Commercial
21 | Agreement.
22 | 
23 | 
24 | Third party libraries
25 | ---------------------
26 | 
27 | Full license texts are found in LICENSES.txt.
28 | 
29 | 
30 | The  bundled JAX-RS  (JSR311) API  is licensed  under the  GNU General
31 | Public  License  Version 2  with  classpath exception.   Alternatively
32 | under the Common Development and Distribution License, version 1.1.
33 | 
34 | The bundled Jersey library, containing Jersey Core, Jersey Server, and
35 | Jersey  Client,  is licensed  under  the  GNU  General Public  License
36 | Version 2  with classpath  exception.  Alternatively under  the Common
37 | Development and Distribution License, version 1.1.
38 | 
39 | The Mime  streaming plugin library  is licensed under the  GNU General
40 | Public  License  Version 2  with  classpath exception.   Alternatively
41 | under the Common Development and Distribution License, version 1.0.
42 | 
43 | Other bundled libraries are licenced according to the following listing.
44 | 
45 | The Apache Software License, Version 2.0:
46 |  Apache ServiceMix :: Bundles :: lucene,
47 |  Apache Commons:
48 |   Commons BeanUtils, Commons BeanUtils Core, Commons Collections, Commons IO,
49 |   Commons Configuration, Commons Digester, Commons Lang, Commons Logging,
50 |  Apache Log4j,
51 |  Apache Felix:   Felix FileInstall, Felix Framework, Felix Main,
52 |  JSON.simple,
53 |  RRD4J,
54 |  Geronimo Java Transaction API,
55 |  Groovy,
56 |  Jackson:   Jackson Core, Jackson JAX-RS, Data Mapper for Jackson,
57 |  Jansi,
58 |  Jetty:  Jetty, Jetty Util, Jetty Servlet Specification API,
59 | 
60 | MIT License:
61 |  SLF4J API Module, SLF4J Log4j-12 Binding, SLF4J JDK1.4 Logging Binding,
62 |  SLF4J Jakarta Commons Logging Binding,
63 |  Base64.js,
64 |  jTemplates,
65 |  jQuery,
66 |  jQuery BBQ,
67 |  jQuery hashchange event,
68 |  SimpleModal,
69 |  jQuery flot including colorhelpers
70 | 
71 | BSD licence:
72 |  ASM:   ASM Core, ASM Tree, ASM Commons, ASM Util, ASM Analysis,
73 |  Blueprints: Data Models and their Implementations,
74 |  Gremlin: A Graph-Based Programming Language,
75 |  Pipes: A Data Flow Framework using Process Graphs,
76 |  JLine, Scala library
77 | 
78 | provided without support or warranty: JSON (JavaScript Object Notation)
79 | 
80 | Public domain:
81 |  Dough Lea's util.concurrent package,
82 |  ANTLR 2.7.7,
83 |  JSON2.js
84 | 


--------------------------------------------------------------------------------
/batch.properties:
--------------------------------------------------------------------------------
 1 | dump_configuration=false
 2 | cache_type=none
 3 | use_memory_mapped_buffers=true
 4 | neostore.propertystore.db.index.keys.mapped_memory=5M
 5 | neostore.propertystore.db.index.mapped_memory=5M
 6 | neostore.nodestore.db.mapped_memory=200M
 7 | neostore.relationshipstore.db.mapped_memory=500M
 8 | neostore.propertystore.db.mapped_memory=200M
 9 | neostore.propertystore.db.strings.mapped_memory=200M
10 | batch_array_separator=,
11 | #batch_import.csv.quotes=true
12 | #batch_import.csv.delim=,
13 | 


--------------------------------------------------------------------------------
/changelog.txt:
--------------------------------------------------------------------------------
 1 | 2013-06-27
 2 | ==========
 3 | * supports array types
 4 | 
 5 | 2013-06-19
 6 | ==========
 7 | * import files can be now compressed as .gz or .zip
 8 | * supports multiple csv files for nodes, relationships, comma separated
 9 | * supports automatic indexing with headers like "name:string:users"
10 | * supports index lookups for relationships for start/end fields "name:string:users" and the litaral values
11 | * supports now config file based setup
12 | * supports keeping the database instead of cleaning
13 | * supports opencsv as reader, alternative delimiters, quotes
14 | * supports caching in index lookups using MapDB in front of lucene
15 | * added faster default CSV reader
16 | * added new type LABEL that will also be used for node-labels in the future


--------------------------------------------------------------------------------
/generate.sh:
--------------------------------------------------------------------------------
1 | source ./settings.sh
2 | 
3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.TestDataGenerator -Dexec.classpathScope=test \
4 | -Dexec.args="$1 $2 $3 $4"  | grep -iv '\[\(INFO\|debug\)\]'
5 | 


--------------------------------------------------------------------------------
/import-mvn.sh:
--------------------------------------------------------------------------------
1 | DB=${1-target/graph.db}
2 | shift
3 | NODES=${1-nodes.csv}
4 | shift
5 | RELS=${1-rels.csv}
6 | shift
7 | mvn compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" \
8 |    -Dexec.args="batch.properties $DB $NODES $RELS $*" | grep -iv '\[\(INFO\|debug\)\]'
9 | 


--------------------------------------------------------------------------------
/import.bat:
--------------------------------------------------------------------------------
  1 | @echo off
  2 | 
  3 | set ERROR_CODE=0
  4 | set HEAP=4G
  5 | 
  6 | :init
  7 | @REM Decide how to startup depending on the version of windows
  8 | 
  9 | @REM -- Win98ME
 10 | if NOT "%OS%"=="Windows_NT" goto Win9xArg
 11 | 
 12 | @REM set local scope for the variables with windows NT shell
 13 | if "%OS%"=="Windows_NT" @setlocal
 14 | 
 15 | @REM -- 4NT shell
 16 | if "%eval[2+2]" == "4" goto 4NTArgs
 17 | 
 18 | @REM -- Regular WinNT shell
 19 | set CMD_LINE_ARGS=%*
 20 | goto WinNTGetScriptDir
 21 | 
 22 | @REM The 4NT Shell from jp software
 23 | :4NTArgs
 24 | set CMD_LINE_ARGS=%$
 25 | goto WinNTGetScriptDir
 26 | 
 27 | :Win9xArg
 28 | @REM Slurp the command line arguments.  This loop allows for an unlimited number
 29 | @REM of agruments (up to the command line limit, anyway).
 30 | set CMD_LINE_ARGS=
 31 | :Win9xApp
 32 | if %1a==a goto Win9xGetScriptDir
 33 | set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1
 34 | shift
 35 | goto Win9xApp
 36 | 
 37 | :Win9xGetScriptDir
 38 | set SAVEDIR=%CD%
 39 | %0\
 40 | cd %0\..\.. 
 41 | set BASEDIR=%CD%
 42 | cd %SAVEDIR%
 43 | set SAVE_DIR=
 44 | goto repoSetup
 45 | 
 46 | :WinNTGetScriptDir
 47 | set BASEDIR=%~dp0\.
 48 | 
 49 | :repoSetup
 50 | 
 51 | if "%JAVACMD%"=="" set JAVACMD=java
 52 | 
 53 | if "%REPO%"=="" set REPO=%BASEDIR%\lib
 54 | 
 55 | rem Setup the classpath
 56 | set LIBPATH=""
 57 | pushd "%REPO%"
 58 | for %%G in (*.jar) do call:APPEND_TO_LIBPATH %%G
 59 | popd
 60 | goto LIBPATH_END
 61 | 
 62 | : APPEND_TO_LIBPATH
 63 | set filename=%~1
 64 | set suffix=%filename:~-4%
 65 | if %suffix% equ .jar set LIBPATH=%LIBPATH%;"%REPO%\%filename%"
 66 | goto :EOF
 67 | 
 68 | :LIBPATH_END
 69 | 
 70 | set CLASSPATH=%LIBPATH%
 71 | 
 72 | set EXTRA_JVM_ARGUMENTS=-Dfile.encoding=UTF-8 -Xmx%HEAP% -Xms%HEAP%
 73 | goto endInit
 74 | 
 75 | @REM Reaching here means variables are defined and arguments have been captured
 76 | :endInit
 77 | 
 78 | %JAVACMD% %JAVA_OPTS% %EXTRA_JVM_ARGUMENTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="batch-import" -Dapp.repo="%REPO%" -Dbasedir="%BASEDIR%" org.neo4j.batchimport.Importer %CMD_LINE_ARGS%
 79 | if ERRORLEVEL 1 goto error
 80 | goto end
 81 | 
 82 | :error
 83 | if "%OS%"=="Windows_NT" @endlocal
 84 | set ERROR_CODE=1
 85 | 
 86 | :end
 87 | @REM set local scope for the variables with windows NT shell
 88 | if "%OS%"=="Windows_NT" goto endNT
 89 | 
 90 | @REM For old DOS remove the set variables from ENV - we assume they were not set
 91 | @REM before we started - at least we don't leave any baggage around
 92 | set CMD_LINE_ARGS=
 93 | goto postExec
 94 | 
 95 | :endNT
 96 | @endlocal
 97 | 
 98 | :postExec
 99 | 
100 | if "%FORCE_EXIT_ON_ERROR%" == "on" (
101 |   if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%
102 | )
103 | 
104 | exit /B %ERROR_CODE%
105 | 


--------------------------------------------------------------------------------
/import.sh:
--------------------------------------------------------------------------------
 1 | if [ ! -d lib ]; then 
 2 |   echo lib directory of binary download missing. Please download the zip or run import-mvn.sh
 3 |   exit 1
 4 | fi
 5 | 
 6 | HEAP=4G
 7 | 
 8 | # Detect Cygwin
 9 | case `uname -s` in
10 | CYGWIN*)
11 |     cygwin=1
12 | esac
13 | 
14 | DB=${1-target/graph.db}
15 | shift
16 | NODES=${1-nodes.csv}
17 | shift
18 | RELS=${1-rels.csv}
19 | shift
20 | CP=""
21 | base=`dirname "$0"`
22 | if [ \! -z "$cygwin" ]; then
23 |     wbase=`cygpath -w "$base"`
24 | fi
25 | curdir=`pwd`
26 | cd "$base"
27 | for i in lib/*.jar; do
28 |     if [ -z "$cygwin" ]; then
29 |         CP="$CP":"$base/$i"
30 |     else
31 |         i=`cygpath -w "$i"`
32 |         CP="$CP;$wbase/$i"
33 |     fi
34 | done
35 | cd "$curdir"
36 | #echo java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.Importer batch.properties "$DB" "$NODES" "$RELS" "$@"
37 | java -classpath "$CP" -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.Importer batch.properties "$DB" "$NODES" "$RELS" "$@"
38 | 


--------------------------------------------------------------------------------
/import_csv.sh:
--------------------------------------------------------------------------------
1 | source ./settings.sh
2 | 
3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.ParallelImporter -Dexec.classpathScope=test -Dexec.args="/mnt/parallel.db nodes.csv rels.csv 100000000 4 50 100 2 ONE,TWO,THREE,FOUR,FIVE,SIX,SEVEN,EIGHT,NINE,TEN"
4 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project>
  2 |     <modelVersion>4.0.0</modelVersion>
  3 |     <groupId>org.neo4j</groupId>
  4 |     <artifactId>batch-import</artifactId>
  5 |     <version>3.0.4</version>
  6 |     <name>Neo4j Batch Importer</name>
  7 |     <properties>
  8 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  9 |         <neo4j.version>3.0.4</neo4j.version>
 10 |         <license-text.header>GPL-3-header.txt</license-text.header>
 11 |     </properties>
 12 |   <repositories>
 13 |     <repository>
 14 |       <id>Neo4j Snapshots</id>
 15 |       <url>http://m2.neo4j.org/content/repositories/snapshots</url>
 16 |     </repository>
 17 |   </repositories>
 18 | 
 19 |     <licenses>
 20 |       <license>
 21 |         <name>GNU General Public License, Version 3</name>
 22 |         <url>http://www.gnu.org/licenses/gpl-3.0-standalone.html</url>
 23 |         <comments>The software ("Software") developed and owned by Network Engine for
 24 |   Objects in Lund AB (referred to in this notice as "Neo Technology") is
 25 |   licensed under the GNU GENERAL PUBLIC LICENSE Version 3 to all third
 26 |   parties and that license is included below.
 27 | 
 28 |   However, if you have executed an End User Software License and Services
 29 |   Agreement or an OEM Software License and Support Services Agreement, or
 30 |   another commercial license agreement with Neo Technology or one of its
 31 |   affiliates (each, a "Commercial Agreement"), the terms of the license in
 32 |   such Commercial Agreement will supersede the GNU GENERAL PUBLIC LICENSE
 33 |   Version 3 and you may use the Software solely pursuant to the terms of
 34 |   the relevant Commercial Agreement.
 35 |         </comments>
 36 |       </license>
 37 |     </licenses>
 38 | 
 39 |     <dependencies>
 40 |         <dependency>
 41 |             <groupId>net.sf.opencsv</groupId>
 42 |             <artifactId>opencsv</artifactId>
 43 |             <version>2.3</version>
 44 |         </dependency>
 45 |         <dependency>
 46 |             <groupId>org.mapdb</groupId>
 47 |             <artifactId>mapdb</artifactId>
 48 |             <version>0.9.3</version>
 49 |         </dependency>
 50 |         <dependency>
 51 |             <groupId>junit</groupId>
 52 |             <artifactId>junit</artifactId>
 53 |             <version>4.8.1</version>
 54 |             <scope>test</scope>
 55 |         </dependency>
 56 |         <dependency>
 57 |             <groupId>log4j</groupId>
 58 |             <artifactId>log4j</artifactId>
 59 |             <version>1.2.17</version>
 60 |         </dependency>
 61 |         <dependency>
 62 |             <groupId>org.mockito</groupId>
 63 |             <artifactId>mockito-core</artifactId>
 64 |             <version>1.8.5</version>
 65 |             <scope>test</scope>
 66 |         </dependency>
 67 |         <dependency>
 68 |             <groupId>org.neo4j</groupId>
 69 |             <artifactId>neo4j-kernel</artifactId>
 70 |             <version>${neo4j.version}</version>
 71 |         </dependency>
 72 |         <dependency>
 73 |             <groupId>org.neo4j</groupId>
 74 |             <artifactId>neo4j-enterprise</artifactId>
 75 |             <version>${neo4j.version}</version>
 76 |         </dependency>
 77 |         <dependency>
 78 |             <groupId>org.neo4j</groupId>
 79 |             <artifactId>neo4j-lucene-index</artifactId>
 80 |             <version>${neo4j.version}</version>
 81 |         </dependency>
 82 |     </dependencies>
 83 |     <build>
 84 |         <plugins>
 85 |             <plugin>
 86 |                 <groupId>org.apache.maven.plugins</groupId>
 87 |                 <artifactId>maven-compiler-plugin</artifactId>
 88 |                 <version>2.1</version>
 89 |                 <configuration>
 90 |                     <source>1.7</source>
 91 |                     <target>1.7</target>
 92 |                 </configuration>
 93 |             </plugin>
 94 |             <plugin>
 95 |                 <artifactId>maven-assembly-plugin</artifactId>
 96 |                 <configuration>
 97 |                     <finalName>
 98 |                         batch-import
 99 |                     </finalName>
100 |                     <archive>
101 |                         <manifest>
102 |                             <mainClass>org.neo4j.batchimport.Importer</mainClass>
103 |                         </manifest>
104 |                     </archive>
105 |                     <descriptorRefs>
106 |                         <descriptorRef>jar-with-dependencies</descriptorRef>
107 |                     </descriptorRefs>
108 |                 </configuration>
109 |             </plugin>
110 |         </plugins>
111 |     </build>
112 | </project>
113 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # Neo4j (CSV) Batch Importer
  2 | 
  3 | ## Neo4j 2.2+ neo4j-import tool
  4 | 
  5 | 此工具的具体使用步骤可参见本人博客：https://my.oschina.net/u/2538940/blog/883829
  6 | 
  7 | 直接下载已编译好的工具：
  8 | https://github.com/mo9527/batch-import-tool
  9 | 
 10 | 与原始版本比较，本版本主要做了以下修改：\
 11 | 1、修复了导入.gz关系压缩文件时，win环境会出现关系无法导入的情况。\
 12 | 2、增加程序对csv文件的容错性，即使csv文件的某一行出现编码或断行问题，也不会影响接下来数据的\
 13 | 导入，前提是每一行数据的字节长度不超过5000。如果超过5000，可自行修改org.neo4j.batchimport.CSVParser\
 14 | 文件的第171行。
 15 | 
 16 | 
 17 | Since version 2.2.0 Neo4j comes with an **high performance import tool** out of the box that takes many ideas of this one, but is way more scalable across CPUs and has little memory requirements.
 18 | 
 19 | The only cases that are not covered are repeated imports in existing stores and population of manual indexes. Please consider the built-in and officially supported tool first, before falling back onto this one.
 20 | 
 21 | The simplest invocation is `/path/to/neo4j/bin/neo4j-import --into graph.db --nodes nodes.csv --relationships rels.csv` with the header format being similar to this one. For a quick intro check the [developer pages](http://neo4j.com/developer/guide-import-csv/#_super_fast_batch_importer_for_huge_datasets). 
 22 | There is much more to it, please see the [Neo4j reference manual](http://neo4j.com/docs/stable/import-tool.html). 
 23 | 
 24 | ## Licensing
 25 | 
 26 | This software is licensed under the [GPLv3](http://www.gnu.org/licenses/gpl-3.0.en.html) for now. 
 27 | You can ask [Neo Technology](http://neotechnology.com) about a different licensing agreement.
 28 | 
 29 | __Works with Neo4j 2.x__
 30 | 
 31 | ## Binary Download
 32 | 
 33 | To simply use it (no source/git/maven required):
 34 | * [download 2.2 zip](https://dl.dropboxusercontent.com/u/14493611/batch_importer_22.zip)
 35 | * unzip
 36 | * run `import.sh test.db nodes.csv rels.csv` (on Windows: `import.bat`)
 37 | * after the import point your `/path/to/neo4j/conf/neo4j-server.properties` to this `test.db` directory, 
 38 |   or copy the data over to your server `cp -r test.db/* /path/to/neo4j/data/graph.db/`
 39 | 
 40 | You provide one **tab separated** csv file for nodes and one for relationships (optionally more for indexes)
 41 | 
 42 | Example data for the files is a small family network
 43 | 
 44 | ## File format
 45 | 
 46 | * **tab separated** csv files
 47 | * Property names in first row.
 48 | * If only one file is initially imported, the row number corresponds to the node-id (*starting with 0*)
 49 | * Property values not listed will not be set on the nodes or relationships.
 50 | * Optionally property fields can have a type (defaults to String) indicated with name:type where type is one of
 51 |   (int, long, float, double, boolean, byte, short, char, string). The string value is then converted to that type.
 52 |   Conversion failure will result in abort of the import operation.
 53 | * There is a separate "label" type, which should be used for relationship types and/or node labels, (`labels:label`)
 54 | * Property fields may also be arrays by adding "_array" to the types above and separating the data with commas.
 55 | * for non-ascii characters make sure to add `-Dfile.encoding=UTF-8` to the commandline arguments
 56 | * Optionally automatic indexing of properties can be configured with a header like `name:string:users` and a configured index in `batch.properties` like `batch_import.node_index=exact`
 57 |   then the property `name` will be indexed in the `users` index for each row with a value there
 58 | * multiple files for nodes and rels, comma separated, without spaces like "node1.csv,node2.csv"
 59 | * you can specify concrete, externally provided node-id's with: `i:id`, both in the node and relationship-files
 60 | * csv files can be zipped individually as *.gz or *.zip
 61 | 
 62 | ## Examples
 63 | 
 64 | There is also a `sample` directory, please run from the main directory `./import.sh test.db sample/nodes.csv sample/rels.csv`
 65 | 
 66 | ### nodes.csv
 67 | 
 68 |     name    l:label       age works_on
 69 |     Michael Person,Father 37  neo4j
 70 |     Selina  Person,Child  14
 71 |     Rana    Person,Child  6
 72 |     Selma   Person,Child  4
 73 | 
 74 | ### rels.csv
 75 | 
 76 | Note that the node-id references are numbered from 0 (since Neo4j 2.0)
 77 | 
 78 |     start	end	type	    since   counter:int
 79 |     0     1   FATHER_OF	1998-07-10  1
 80 |     0     2   FATHER_OF 2007-09-15  2
 81 |     0     3   FATHER_OF 2008-05-03  3
 82 |     2     3   SISTER_OF 2008-05-03  5
 83 |     1     2   SISTER_OF 2007-09-15  7
 84 | 
 85 | 
 86 | ## Execution
 87 | 
 88 | Just use the provided shell script `import.sh` or `import.bat` on Windows
 89 | 
 90 |     import.sh test.db nodes.csv rels.csv
 91 | 
 92 | 
 93 | ### For Developers
 94 | 
 95 | If you want to work on the code and run the importer after making changes:
 96 | 
 97 |     mvn clean compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" -Dexec.args="neo4j/data/graph.db nodes.csv rels.csv"
 98 |     
 99 |     or
100 |     
101 |     java -server -Dfile.encoding=UTF-8 -Xmx4G -jar target/batch-import-jar-with-dependencies.jar neo4j/data/graph.db nodes.csv rels.csv
102 | 
103 | 
104 |     ynagzet:batchimport mh$ rm -rf target/db
105 |     ynagzet:batchimport mh$ mvn clean compile assembly:single
106 |     [INFO] Scanning for projects...
107 |     [INFO] ------------------------------------------------------------------------
108 |     [INFO] Building Simple Batch Importer
109 |     [INFO]    task-segment: [clean, compile, assembly:single]
110 |     [INFO] ------------------------------------------------------------------------
111 |     ...
112 |     [INFO] Building jar: /Users/mh/java/neo/batchimport/target/batch-import-jar-with-dependencies.jar
113 |     [INFO] ------------------------------------------------------------------------
114 |     [INFO] BUILD SUCCESSFUL
115 |     [INFO] ------------------------------------------------------------------------
116 |     ynagzet:batchimport mh$ java -server -Xmx4G -jar target/batch-import-jar-with-dependencies.jar target/db nodes.csv rels.csv
117 |     Physical mem: 16384MB, Heap size: 3640MB
118 | 
119 |     Configuration:
120 |     use_memory_mapped_buffers=false
121 |     neostore.nodestore.db.mapped_memory=200M
122 |     neostore.relationshipstore.db.mapped_memory=1000M
123 |     neostore.propertystore.db.mapped_memory=1000M
124 |     neostore.propertystore.db.strings.mapped_memory=100M
125 |     neostore.propertystore.db.arrays.mapped_memory=215M
126 |     neo_store=/Users/mh/java/neo/batchimport/test.db
127 |     dump_configuration=true
128 |     cache_type=none
129 | 
130 |     ...........................................................................
131 |     Importing 7500000 Nodes took 17 seconds
132 |     ....................................................................................................35818 ms
133 |     ....................................................................................................39343 ms
134 |     ....................................................................................................41788 ms
135 |     ....................................................................................................48897 ms
136 |     ............
137 |     Importing 41246740 Relationships took 170 seconds
138 |     Total 212 seconds
139 |     ynagzet:batchimport mh$ du -sh test.db
140 |     3,2G	test.db
141 | 
142 | ## Parameters
143 | 
144 | *First parameter* MIGHT be the property-file name, if so it has to end with `.properties`, then this file will be used and all other parameters are consumed as usual
145 | 
146 | *First parameter* - the graph database directory, a new db will be created in the directory except when `batch_import.keep_db=true` is set in `batch.properties`.
147 | 
148 | *Second parameter* - a comma separated list of *node-csv-files*
149 | 
150 | *Third parameter* - a comma separated list of *relationship-csv-files*
151 | 
152 | It is also possible to specify those two file-lists in the config:
153 | 
154 | ````
155 | batch_import.nodes_files=nodes1.csv[,nodes2.csv]
156 | batch_import.rels_files=rels1.csv[,rels2.csv]
157 | ````
158 | 
159 | *Fourth parameter* - index configuration each a set of 4 values: `node_index users fulltext nodes_index.csv` or more generally: `node-or-rel-index index-name index-type index-file`
160 | 
161 | This parameter set can be repeatedly used, see below. It is also possible to configure this in the config (`batch.properties`)
162 | 
163 | ````
164 | batch_import.node_index.users=exact
165 | ````
166 | 
167 | ## Schema indexes
168 | 
169 | Currently schema indexes are not created by the batch-inserter, you could create them upfront and use `batch_import.keep_db=true` to work with the existing database.
170 | You then have the option of specifying labels for your nodes using a column header like `type:label` and a comma separated list of label values.
171 | Then on shutdown of the import Neo4j will populate the schema indexes with nodes with the appropriate labels and properties automatically.
172 | (The index creation is As a rough estimate the index creation will
173 | 
174 | ## (Legacy) Indexing
175 | 
176 | ### Indexing of inserted properties
177 | 
178 | You can automatically index properties of nodes and relationships by adding ":indexName" to the property-column-header.
179 | Just configure the indexes in `batch.properties` like so:
180 | 
181 | ````
182 | batch_import.node_index.users=exact
183 | ````
184 | 
185 | ````
186 | name:string:users    age works_on
187 | Michael 37  neo4j
188 | Selina  14
189 | Rana    6
190 | Selma   4
191 | ````
192 | 
193 | **If you use `node_auto_index` as the index name, you can also initially populate Neo4j's automatic node index which is then
194 | later used and and updated while working with the database.**
195 | 
196 | 
197 | In the relationships-file you can optionally specify that the start and end-node should be looked up from the index in the same way
198 | 
199 | ````
200 | name:string:users	name:string:users	type	    since   counter:int
201 | Michael     Selina   FATHER_OF	1998-07-10  1
202 | Michael     Rana   FATHER_OF 2007-09-15  2
203 | Michael     Selma   FATHER_OF 2008-05-03  3
204 | Rana     Selma   SISTER_OF 2008-05-03  5
205 | Selina     Rana   SISTER_OF 2007-09-15  7
206 | ````
207 | 
208 | ### Explicit Indexing
209 | 
210 | Optionally you can add nodes and relationships to indexes.
211 | 
212 | Add four arguments per each index to command line:
213 | 
214 | To create a full text node index called users using nodes_index.csv:
215 | 
216 | ````
217 | node_index users fulltext nodes_index.csv
218 | ````
219 | 
220 | To create an exact relationship index called worked using rels_index.csv:
221 | 
222 | ````
223 | rel_index worked exact rels_index.csv
224 | ````
225 | 
226 | Example command line:
227 | 
228 | ````
229 | ./import.sh test.db nodes.csv rels.csv node_index users fulltext nodes_index.csv rel_index worked exact rels_index.csv
230 | ````         
231 | 
232 | ### Using Neo4j's Automatic Indexing
233 | 
234 | The auto-indexing elsewhere in this file pertains to the *batch inserter's* ability to automatically index. If you want to 
235 | use this cool feature from the batch inserter, there's a little gotcha. You still need to enable the batch inserter's feature
236 | with `batch_import.node_index` but then instead of specifying the name of a regular index, specify the auto index's name like so:
237 | 
238 | ```` 
239 | batch_import.node_index.node_auto_index=exact
240 | ````
241 | 
242 | And you have to make sure to also enable automatic indexing in your regular Neo4j database's (`conf/neo4j.properties`) and 
243 | specify the correct node properties to be indexed.
244 | 
245 | ## Examples
246 | 
247 | ### nodes_index.csv
248 | 
249 | ````
250 | id	name	language
251 | 0	Victor Richards	West Frisian
252 | 1	Virginia Shaw	Korean
253 | 2	Lois Simpson	Belarusian
254 | 3	Randy Bishop	Hiri Motu
255 | 4	Lori Mendoza	Tok Pisin
256 | ````
257 | 
258 | ### rels_index.csv
259 | 
260 | ````
261 | id	property1	property2
262 | 0	cwqbnxrv	rpyqdwhk
263 | 1	qthnrret	tzjmmhta
264 | 2	dtztaqpy	pbmcdqyc
265 | ````
266 | 
267 | ## Configuration
268 | 
269 | The Importer uses a supplied `batch.properties` file to be configured:
270 | 
271 | #### Memory Mapping I/O Config
272 | 
273 | Most important is the memory config, you should try to have enough RAM map as much of your store-files to memory as possible.
274 | 
275 | At least the node-store and large parts of the relationship-store should be mapped. The property- and string-stores are mostly
276 | append only so don't need that much RAM. Below is an example for about 6GB RAM, to leave room for the heap and also OS and OS caches.
277 | 
278 | ````
279 | cache_type=none
280 | use_memory_mapped_buffers=true
281 | # 14 bytes per node
282 | neostore.nodestore.db.mapped_memory=200M
283 | # 33 bytes per relationships
284 | neostore.relationshipstore.db.mapped_memory=3G
285 | # 38 bytes per property
286 | neostore.propertystore.db.mapped_memory=500M
287 | # 60 bytes per long-string block
288 | neostore.propertystore.db.strings.mapped_memory=500M
289 | neostore.propertystore.db.index.keys.mapped_memory=5M
290 | neostore.propertystore.db.index.mapped_memory=5M
291 | ````
292 | 
293 | #### Indexes (experimental)
294 | 
295 | ````
296 | batch_import.node_index.users=exact
297 | batch_import.node_index.articles=fulltext
298 | batch_import.relationship_index.friends=exact
299 | ````
300 | 
301 | #### CSV (experimental)
302 | 
303 | ````
304 | batch_import.csv.quotes=true // default, set to false for faster, experimental csv-reader
305 | batch_import.csv.delim=,
306 | ````
307 | 
308 | ##### Index-Cache (experimental)
309 | 
310 | ````
311 | batch_import.mapdb_cache.disable=true
312 | ````
313 | 
314 | ##### Keep Database (experimental)
315 | 
316 | ````
317 | batch_import.keep_db=true
318 | ````
319 | 
320 | ## Utilities
321 | 
322 | ### TestDataGenerator
323 | 
324 | It is a dumb random test data generator (`org.neo4j.batchimport.TestDataGenerator`) that you can run with
325 | 
326 | ./generate.sh #nodes #max-rels-per-node REL1,REL2,REL3 LABEL1,LABEL2,LABEL3
327 | 
328 | Will generate nodes.csv and rels.csv for those numbers
329 | 
330 | 
331 | ### Relationship-Sorter
332 | 
333 | Sorts a given relationship-CSV file by min(start,end) as required for the parallel sorter. Uses the data-pump sorter from mapdb
334 | for the actual sorting with a custom Comparator.
335 | 
336 | `org.neo4j.batchimport.utils.RelationshipSorter` rels-input.csv rels-output.csv
337 | 
338 | 
339 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | . ./settings.sh
2 | 
3 | mvn clean test-compile exec:java -Dexec.mainClass=org.neo4j.batchimport.DisruptorTest -Dexec.classpathScope=test


--------------------------------------------------------------------------------
/sample/batch.properties:
--------------------------------------------------------------------------------
 1 | dump_configuration=false
 2 | cache_type=none
 3 | use_memory_mapped_buffers=true
 4 | neostore.propertystore.db.index.keys.mapped_memory=5M
 5 | neostore.propertystore.db.index.mapped_memory=5M
 6 | neostore.nodestore.db.mapped_memory=200M
 7 | neostore.relationshipstore.db.mapped_memory=500M
 8 | neostore.propertystore.db.mapped_memory=200M
 9 | neostore.propertystore.db.strings.mapped_memory=200M
10 | 
11 | batch_import.node_index.users=exact


--------------------------------------------------------------------------------
/sample/import.sh:
--------------------------------------------------------------------------------
1 | echo "Run in main directory sh sample/import.sh"
2 | mvn test-compile exec:java -Dexec.mainClass="org.neo4j.batchimport.Importer" \
3 |   -Dexec.args="sample/batch.properties target/graph.db sample/nodes.csv,sample/nodes2.csv sample/rels.csv"


--------------------------------------------------------------------------------
/sample/nodes.csv:
--------------------------------------------------------------------------------
1 | name:string:users	age	works_on
2 | Michael	37	neo4j
3 | Selina	14


--------------------------------------------------------------------------------
/sample/nodes2.csv:
--------------------------------------------------------------------------------
1 | name:string:users	age	works_on
2 | Rana	6
3 | Selma	4


--------------------------------------------------------------------------------
/sample/rels.csv:
--------------------------------------------------------------------------------
1 | name:string:users	name:string:users	type	since	counter:int
2 | Michael	Selina	FATHER_OF 1998-07-10	1
3 | Michael	Rana	FATHER_OF 2007-09-15	2
4 | Michael	Selma	FATHER_OF 2008-05-03	3
5 | Rana	Selma	SISTER_OF 2008-05-03	5
6 | Selina	Rana	SISTER_OF 2007-09-15	7


--------------------------------------------------------------------------------
/settings.sh:
--------------------------------------------------------------------------------
 1 | MEMORY_OPTS="-Xmx50G -Xms50G -server -d64 -Xmn3g -XX:SurvivorRatio=2"
 2 | GC_OPTS="-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:ParallelCMSThreads=4 -XX:+CMSParallelRemarkEnabled -XX:+CMSIncrementalMode -XX:+CMSIncrementalPacing -XX:CMSIncrementalDutyCycle=10 -XX:CMSFullGCsBeforeCompaction=1 "
 3 | 
 4 | PRINT_GC_OPTS="-XX:+PrintTenuringDistribution -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:gc.log"
 5 | 
 6 | # PROFILE_OPTS="-agentpath:/root/yourkit/bin/linux-x86-64/libyjpagent.so=port=10001"
 7 | 
 8 | #-XX:+PrintGCApplicationStoppedTime -XX:+PrintGCApplicationConcurrentTime -XX:+PrintHeapAtGC -XX:+PrintGCTaskTimeStamps
 9 | 
10 | export MAVEN_OPTS="$PROFILE_OPTS $MEMORY_OPTS $GC_OPTS $PRINT_GC_OPTS"


--------------------------------------------------------------------------------
/sort.sh:
--------------------------------------------------------------------------------
 1 | HEAP=4G
 2 | IN=${1-rels.csv}
 3 | shift
 4 | OUT=${1-rels-sorted.csv}
 5 | CP=""
 6 | for i in lib/*.jar; do CP="$CP":"$i"; done
 7 | 
 8 | echo java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.utils.RelationshipSorter "$IN" "$OUT"
 9 | java -classpath $CP -Xmx$HEAP -Xms$HEAP -Dfile.encoding=UTF-8 org.neo4j.batchimport.utils.RelationshipSorter "$IN" "$OUT"
10 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/CSVParser.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport;
  2 | 
  3 | /**
  4 |  Copyright 2005 Bytecode Pty Ltd.
  5 | 
  6 |  Licensed under the Apache License, Version 2.0 (the "License");
  7 |  you may not use this file except in compliance with the License.
  8 |  You may obtain a copy of the License at
  9 | 
 10 |  http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |  Unless required by applicable law or agreed to in writing, software
 13 |  distributed under the License is distributed on an "AS IS" BASIS,
 14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  See the License for the specific language governing permissions and
 16 |  limitations under the License.
 17 |  */
 18 | 
 19 | import java.io.IOException;
 20 | import java.util.ArrayList;
 21 | import java.util.List;
 22 | 
 23 | /**
 24 |  * A very simple CSV parser released under a commercial-friendly license.
 25 |  * This just implements splitting a single line into fields.
 26 |  *
 27 |  * @author Glen Smith
 28 |  * @author Rainer Pruy
 29 |  */
 30 | public class CSVParser {
 31 | 
 32 |     private final char separator;
 33 | 
 34 |     private final char quotechar;
 35 | 
 36 |     private final char escape;
 37 | 
 38 |     private final boolean strictQuotes;
 39 | 
 40 |     private String pending;
 41 |     private boolean inField = false;
 42 | 
 43 |     private final boolean ignoreLeadingWhiteSpace;
 44 | 
 45 |     /**
 46 |      * The default separator to use if none is supplied to the constructor.
 47 |      */
 48 |     public static final char DEFAULT_SEPARATOR = ',';
 49 | 
 50 |     public static final int INITIAL_READ_SIZE = 128;
 51 | 
 52 |     /**
 53 |      * The default quote character to use if none is supplied to the
 54 |      * constructor.
 55 |      */
 56 |     public static final char DEFAULT_QUOTE_CHARACTER = '"';
 57 | 
 58 | 
 59 |     /**
 60 |      * The default escape character to use if none is supplied to the
 61 |      * constructor.
 62 |      */
 63 |     public static final char DEFAULT_ESCAPE_CHARACTER = '\\';
 64 | 
 65 |     /**
 66 |      * The default strict quote behavior to use if none is supplied to the
 67 |      * constructor
 68 |      */
 69 |     public static final boolean DEFAULT_STRICT_QUOTES = false;
 70 | 
 71 |     /**
 72 |      * The default leading whitespace behavior to use if none is supplied to the
 73 |      * constructor
 74 |      */
 75 |     public static final boolean DEFAULT_IGNORE_LEADING_WHITESPACE = true;
 76 | 
 77 |     /**
 78 |      * This is the "null" character - if a value is set to this then it is ignored.
 79 |      * I.E. if the quote character is set to null then there is no quote character.
 80 |      */
 81 |     public static final char NULL_CHARACTER = '\0';
 82 | 
 83 |     /**
 84 |      * Constructs CSVParser using a comma for the separator.
 85 |      */
 86 |     public CSVParser() {
 87 |         this(DEFAULT_SEPARATOR, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
 88 |     }
 89 | 
 90 |     /**
 91 |      * Constructs CSVParser with supplied separator.
 92 |      *
 93 |      * @param separator the delimiter to use for separating entries.
 94 |      */
 95 |     public CSVParser(char separator) {
 96 |         this(separator, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
 97 |     }
 98 | 
 99 | 
100 |     /**
101 |      * Constructs CSVParser with supplied separator and quote char.
102 |      *
103 |      * @param separator the delimiter to use for separating entries
104 |      * @param quotechar the character to use for quoted elements
105 |      */
106 |     public CSVParser(char separator, char quotechar) {
107 |         this(separator, quotechar, DEFAULT_ESCAPE_CHARACTER);
108 |     }
109 | 
110 |     /**
111 |      * Constructs CSVReader with supplied separator and quote char.
112 |      *
113 |      * @param separator the delimiter to use for separating entries
114 |      * @param quotechar the character to use for quoted elements
115 |      * @param escape    the character to use for escaping a separator or quote
116 |      */
117 |     public CSVParser(char separator, char quotechar, char escape) {
118 |         this(separator, quotechar, escape, DEFAULT_STRICT_QUOTES);
119 |     }
120 | 
121 |     /**
122 |      * Constructs CSVReader with supplied separator and quote char.
123 |      * Allows setting the "strict quotes" flag
124 |      *
125 |      * @param separator    the delimiter to use for separating entries
126 |      * @param quotechar    the character to use for quoted elements
127 |      * @param escape       the character to use for escaping a separator or quote
128 |      * @param strictQuotes if true, characters outside the quotes are ignored
129 |      */
130 |     public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) {
131 |         this(separator, quotechar, escape, strictQuotes, DEFAULT_IGNORE_LEADING_WHITESPACE);
132 |     }
133 | 
134 |     /**
135 |      * Constructs CSVReader with supplied separator and quote char.
136 |      * Allows setting the "strict quotes" and "ignore leading whitespace" flags
137 |      *
138 |      * @param separator               the delimiter to use for separating entries
139 |      * @param quotechar               the character to use for quoted elements
140 |      * @param escape                  the character to use for escaping a separator or quote
141 |      * @param strictQuotes            if true, characters outside the quotes are ignored
142 |      * @param ignoreLeadingWhiteSpace if true, white space in front of a quote in a field is ignored
143 |      */
144 |     public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) {
145 |         if (anyCharactersAreTheSame(separator, quotechar, escape)) {
146 |             throw new UnsupportedOperationException("The separator, quote, and escape characters must be different!");
147 |         }
148 |         if (separator == NULL_CHARACTER) {
149 |             throw new UnsupportedOperationException("The separator character must be defined!");
150 |         }
151 |         this.separator = separator;
152 |         this.quotechar = quotechar;
153 |         this.escape = escape;
154 |         this.strictQuotes = strictQuotes;
155 |         this.ignoreLeadingWhiteSpace = ignoreLeadingWhiteSpace;
156 |     }
157 | 
158 |     private boolean anyCharactersAreTheSame(char separator, char quotechar, char escape) {
159 |         return isSameCharacter(separator, quotechar) || isSameCharacter(separator, escape) || isSameCharacter(quotechar, escape);
160 |     }
161 | 
162 |     private boolean isSameCharacter(char c1, char c2) {
163 |         return c1 != NULL_CHARACTER && c1 == c2;
164 |     }
165 | 
166 |     /**
167 |      * @return true if something was left over from last call(s)
168 |      */
169 |     public boolean isPending() {
170 |         //防止csv文件断行出现错误，这里进行适当的介入，当pengding超过5000的长度时，则假定csv的断行出现了问题，防止假死，杜绝吃死内存
171 |         if (pending != null && pending.length() > 5000){
172 |             pending = null;
173 |             return false;
174 |         }
175 |         return pending != null;
176 |     }
177 | 
178 |     public String[] parseLineMulti(String nextLine) throws IOException {
179 |         return parseLine(nextLine, true);
180 |     }
181 | 
182 |     public String[] parseLine(String nextLine) throws IOException {
183 |         return parseLine(nextLine, false);
184 |     }
185 | 
186 |     /**
187 |      * Parses an incoming String and returns an array of elements.
188 |      *
189 |      * @param nextLine the string to parse
190 |      * @param multi
191 |      * @return the comma-tokenized list of elements, or null if nextLine is null
192 |      * @throws IOException if bad things happen during the read
193 |      */
194 |     private String[] parseLine(String nextLine, boolean multi) throws IOException {
195 | 
196 |         if (!multi && pending != null) {
197 |             pending = null;
198 |         }
199 | 
200 |         if (nextLine == null) {
201 |             if (pending != null) {
202 |                 String s = pending;
203 |                 pending = null;
204 |                 return new String[]{s};
205 |             } else {
206 |                 return null;
207 |             }
208 |         }
209 | 
210 |         List<String> tokensOnThisLine = new ArrayList<String>();
211 |         StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE);
212 |         boolean inQuotes = false;
213 |         if (pending != null) {
214 |             sb.append(pending);
215 |             pending = null;
216 |             inQuotes = true;
217 |         }
218 |         for (int i = 0; i < nextLine.length(); i++) {
219 | 
220 |             char c = nextLine.charAt(i);
221 |             if (c == this.escape) {
222 |                 if (isNextCharacterEscapable(nextLine, inQuotes || inField, i)) {
223 |                     sb.append(nextLine.charAt(i + 1));
224 |                     i++;
225 |                 }
226 |             } else if (c == quotechar) {
227 |                 if (isNextCharacterEscapedQuote(nextLine, inQuotes || inField, i)) {
228 |                     sb.append(nextLine.charAt(i + 1));
229 |                     i++;
230 |                 } else {
231 |                     //inQuotes = !inQuotes;
232 | 
233 |                     // the tricky case of an embedded quote in the middle: a,bc"d"ef,g
234 |                     if (!strictQuotes) {
235 |                         if (i > 2 //not on the beginning of the line
236 |                                 && nextLine.charAt(i - 1) != this.separator //not at the beginning of an escape sequence
237 |                                 && nextLine.length() > (i + 1) &&
238 |                                 nextLine.charAt(i + 1) != this.separator //not at the	end of an escape sequence
239 |                                 ) {
240 | 
241 |                             if (ignoreLeadingWhiteSpace && sb.length() > 0 && isAllWhiteSpace(sb)) {
242 |                                 sb.setLength(0);  //discard white space leading up to quote
243 |                             } else {
244 |                                 sb.append(c);
245 |                                 //continue;
246 |                             }
247 | 
248 |                         }
249 |                     }
250 | 
251 |                     inQuotes = !inQuotes;
252 |                 }
253 |                 inField = !inField;
254 |             } else if (c == separator && !inQuotes) {
255 |                 tokensOnThisLine.add(sb.toString());
256 |                 sb.setLength(0); // start work on next token
257 |                 inField = false;
258 |             } else {
259 |                 if (!strictQuotes || inQuotes) {
260 |                     sb.append(c);
261 |                     inField = true;
262 |                 }
263 |             }
264 |         }
265 |         // line is done - check status
266 |         if (inQuotes) {
267 |             if (multi) {
268 |                 // continuing a quoted section, re-append newline
269 |                 sb.append("\n");
270 |                 pending = sb.toString();
271 |                 sb = null; // this partial content is not to be added to field list yet
272 |             } else {
273 |                 throw new IOException("Un-terminated quoted field at end of CSV line");
274 |             }
275 |         }
276 |         if (sb != null) {
277 |             tokensOnThisLine.add(sb.toString());
278 |         }
279 |         return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]);
280 | 
281 |     }
282 | 
283 |     /**
284 |      * precondition: the current character is a quote or an escape
285 |      *
286 |      * @param nextLine the current line
287 |      * @param inQuotes true if the current context is quoted
288 |      * @param i        current index in line
289 |      * @return true if the following character is a quote
290 |      */
291 |     private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
292 |         return inQuotes  // we are in quotes, therefore there can be escaped quotes in here.
293 |                 && nextLine.length() > (i + 1)  // there is indeed another character to check.
294 |                 && nextLine.charAt(i + 1) == quotechar;
295 |     }
296 | 
297 |     /**
298 |      * precondition: the current character is an escape
299 |      *
300 |      * @param nextLine the current line
301 |      * @param inQuotes true if the current context is quoted
302 |      * @param i        current index in line
303 |      * @return true if the following character is a quote
304 |      */
305 |     protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
306 |         return inQuotes  // we are in quotes, therefore there can be escaped quotes in here.
307 |                 && nextLine.length() > (i + 1)  // there is indeed another character to check.
308 |                 && (nextLine.charAt(i + 1) == quotechar || nextLine.charAt(i + 1) == this.escape);
309 |     }
310 | 
311 |     /**
312 |      * precondition: sb.length() > 0
313 |      *
314 |      * @param sb A sequence of characters to examine
315 |      * @return true if every character in the sequence is whitespace
316 |      */
317 |     protected boolean isAllWhiteSpace(CharSequence sb) {
318 |         boolean result = true;
319 |         for (int i = 0; i < sb.length(); i++) {
320 |             char c = sb.charAt(i);
321 | 
322 |             if (!Character.isWhitespace(c)) {
323 |                 return false;
324 |             }
325 |         }
326 |         return result;
327 |     }
328 | }
329 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/CSVReader.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport;
  2 | 
  3 | /**
  4 |  Copyright 2005 Bytecode Pty Ltd.
  5 | 
  6 |  Licensed under the Apache License, Version 2.0 (the "License");
  7 |  you may not use this file except in compliance with the License.
  8 |  You may obtain a copy of the License at
  9 | 
 10 |  http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |  Unless required by applicable law or agreed to in writing, software
 13 |  distributed under the License is distributed on an "AS IS" BASIS,
 14 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  See the License for the specific language governing permissions and
 16 |  limitations under the License.
 17 |  */
 18 | 
 19 | import java.io.BufferedReader;
 20 | import java.io.Closeable;
 21 | import java.io.IOException;
 22 | import java.io.Reader;
 23 | import java.util.ArrayList;
 24 | import java.util.List;
 25 | 
 26 | /**
 27 |  * A very simple CSV reader released under a commercial-friendly license.
 28 |  *
 29 |  * @author Glen Smith
 30 |  *
 31 |  */
 32 | public class CSVReader implements Closeable {
 33 | 
 34 |     private BufferedReader br;
 35 | 
 36 |     private boolean hasNext = true;
 37 | 
 38 |     private CSVParser parser;
 39 | 
 40 |     private int skipLines;
 41 | 
 42 |     private boolean linesSkiped;
 43 | 
 44 |     /**
 45 |      * The default line to start reading.
 46 |      */
 47 |     public static final int DEFAULT_SKIP_LINES = 0;
 48 | 
 49 |     /**
 50 |      * Constructs CSVReader using a comma for the separator.
 51 |      *
 52 |      * @param reader
 53 |      *            the reader to an underlying CSV source.
 54 |      */
 55 |     public CSVReader(Reader reader) {
 56 |         this(reader, CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
 57 |     }
 58 | 
 59 |     /**
 60 |      * Constructs CSVReader with supplied separator.
 61 |      *
 62 |      * @param reader
 63 |      *            the reader to an underlying CSV source.
 64 |      * @param separator
 65 |      *            the delimiter to use for separating entries.
 66 |      */
 67 |     public CSVReader(Reader reader, char separator) {
 68 |         this(reader, separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
 69 |     }
 70 | 
 71 |     /**
 72 |      * Constructs CSVReader with supplied separator and quote char.
 73 |      *
 74 |      * @param reader
 75 |      *            the reader to an underlying CSV source.
 76 |      * @param separator
 77 |      *            the delimiter to use for separating entries
 78 |      * @param quotechar
 79 |      *            the character to use for quoted elements
 80 |      */
 81 |     public CSVReader(Reader reader, char separator, char quotechar) {
 82 |         this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES);
 83 |     }
 84 | 
 85 |     /**
 86 |      * Constructs CSVReader with supplied separator, quote char and quote handling
 87 |      * behavior.
 88 |      *
 89 |      * @param reader
 90 |      *            the reader to an underlying CSV source.
 91 |      * @param separator
 92 |      *            the delimiter to use for separating entries
 93 |      * @param quotechar
 94 |      *            the character to use for quoted elements
 95 |      * @param strictQuotes
 96 |      *            sets if characters outside the quotes are ignored
 97 |      */
 98 |     public CSVReader(Reader reader, char separator, char quotechar, boolean strictQuotes) {
 99 |         this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, strictQuotes);
100 |     }
101 | 
102 |     /**
103 |      * Constructs CSVReader with supplied separator and quote char.
104 |      *
105 |      * @param reader
106 |      *            the reader to an underlying CSV source.
107 |      * @param separator
108 |      *            the delimiter to use for separating entries
109 |      * @param quotechar
110 |      *            the character to use for quoted elements
111 |      * @param escape
112 |      *            the character to use for escaping a separator or quote
113 |      */
114 | 
115 |     public CSVReader(Reader reader, char separator,
116 |                      char quotechar, char escape) {
117 |         this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES);
118 |     }
119 | 
120 |     /**
121 |      * Constructs CSVReader with supplied separator and quote char.
122 |      *
123 |      * @param reader
124 |      *            the reader to an underlying CSV source.
125 |      * @param separator
126 |      *            the delimiter to use for separating entries
127 |      * @param quotechar
128 |      *            the character to use for quoted elements
129 |      * @param line
130 |      *            the line number to skip for start reading
131 |      */
132 |     public CSVReader(Reader reader, char separator, char quotechar, int line) {
133 |         this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, line, CSVParser.DEFAULT_STRICT_QUOTES);
134 |     }
135 | 
136 |     /**
137 |      * Constructs CSVReader with supplied separator and quote char.
138 |      *
139 |      * @param reader
140 |      *            the reader to an underlying CSV source.
141 |      * @param separator
142 |      *            the delimiter to use for separating entries
143 |      * @param quotechar
144 |      *            the character to use for quoted elements
145 |      * @param escape
146 |      *            the character to use for escaping a separator or quote
147 |      * @param line
148 |      *            the line number to skip for start reading
149 |      */
150 |     public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) {
151 |         this(reader, separator, quotechar, escape, line, CSVParser.DEFAULT_STRICT_QUOTES);
152 |     }
153 | 
154 |     /**
155 |      * Constructs CSVReader with supplied separator and quote char.
156 |      *
157 |      * @param reader
158 |      *            the reader to an underlying CSV source.
159 |      * @param separator
160 |      *            the delimiter to use for separating entries
161 |      * @param quotechar
162 |      *            the character to use for quoted elements
163 |      * @param escape
164 |      *            the character to use for escaping a separator or quote
165 |      * @param line
166 |      *            the line number to skip for start reading
167 |      * @param strictQuotes
168 |      *            sets if characters outside the quotes are ignored
169 |      */
170 |     public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) {
171 |         this(reader, separator, quotechar, escape, line, strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE);
172 |     }
173 | 
174 |     /**
175 |      * Constructs CSVReader with supplied separator and quote char.
176 |      *
177 |      * @param reader
178 |      *            the reader to an underlying CSV source.
179 |      * @param separator
180 |      *            the delimiter to use for separating entries
181 |      * @param quotechar
182 |      *            the character to use for quoted elements
183 |      * @param escape
184 |      *            the character to use for escaping a separator or quote
185 |      * @param line
186 |      *            the line number to skip for start reading
187 |      * @param strictQuotes
188 |      *            sets if characters outside the quotes are ignored
189 |      * @param ignoreLeadingWhiteSpace
190 |      *            it true, parser should ignore white space before a quote in a field
191 |      */
192 |     public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) {
193 |         this.br = new BufferedReader(reader);
194 |         this.parser = new CSVParser(separator, quotechar, escape, strictQuotes, ignoreLeadingWhiteSpace);
195 |         this.skipLines = line;
196 |     }
197 | 
198 |     /**
199 |      * Reads the entire file into a List with each element being a String[] of
200 |      * tokens.
201 |      *
202 |      * @return a List of String[], with each String[] representing a line of the
203 |      *         file.
204 |      *
205 |      * @throws IOException
206 |      *             if bad things happen during the read
207 |      */
208 |     public List<String[]> readAll() throws IOException {
209 | 
210 |         List<String[]> allElements = new ArrayList<String[]>();
211 |         while (hasNext) {
212 |             String[] nextLineAsTokens = readNext();
213 |             if (nextLineAsTokens != null)
214 |                 allElements.add(nextLineAsTokens);
215 |         }
216 |         return allElements;
217 | 
218 |     }
219 | 
220 |     /**
221 |      * Reads the next line from the buffer and converts to a string array.
222 |      *
223 |      * @return a string array with each comma-separated element as a separate
224 |      *         entry.
225 |      *
226 |      * @throws IOException
227 |      *             if bad things happen during the read
228 |      */
229 |     public String[] readNext() throws IOException {
230 | 
231 |         String[] result = null;
232 |         do {
233 |             String nextLine = getNextLine();
234 |             if (!hasNext) {
235 |                 return result; // should throw if still pending?
236 |             }
237 |             String[] r = parser.parseLineMulti(nextLine);
238 |             if (r.length > 0) {
239 |                 if (result == null) {
240 |                     result = r;
241 |                 } else {
242 |                     String[] t = new String[result.length+r.length];
243 |                     System.arraycopy(result, 0, t, 0, result.length);
244 |                     System.arraycopy(r, 0, t, result.length, r.length);
245 |                     result = t;
246 |                 }
247 |             }
248 |         } while (parser.isPending());
249 |         return result;
250 |     }
251 | 
252 |     /**
253 |      * Reads the next line from the file.
254 |      *
255 |      * @return the next line from the file without trailing newline
256 |      * @throws IOException
257 |      *             if bad things happen during the read
258 |      */
259 |     private String getNextLine() throws IOException {
260 |         if (!this.linesSkiped) {
261 |             for (int i = 0; i < skipLines; i++) {
262 |                 br.readLine();
263 |             }
264 |             this.linesSkiped = true;
265 |         }
266 |         String nextLine = br.readLine();
267 |         if (nextLine == null) {
268 |             hasNext = false;
269 |         }
270 |         return hasNext ? nextLine : null;
271 |     }
272 | 
273 |     /**
274 |      * Closes the underlying reader.
275 |      *
276 |      * @throws IOException if the close fails
277 |      */
278 |     public void close() throws IOException{
279 |         br.close();
280 |     }
281 | 
282 | }
283 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/Importer.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport;
  2 | 
  3 | import org.neo4j.batchimport.importer.ChunkerLineData;
  4 | import org.neo4j.batchimport.importer.CsvLineData;
  5 | import org.neo4j.batchimport.importer.RelType;
  6 | import org.neo4j.batchimport.importer.Type;
  7 | import org.neo4j.batchimport.index.MapDbCachingIndexProvider;
  8 | import org.neo4j.batchimport.utils.Config;
  9 | import org.neo4j.graphdb.DynamicLabel;
 10 | import org.neo4j.graphdb.Label;
 11 | import org.neo4j.graphdb.index.IndexManager;
 12 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
 13 | import org.neo4j.io.fs.FileUtils;
 14 | import org.neo4j.unsafe.batchinsert.BatchInserter;
 15 | import org.neo4j.unsafe.batchinsert.BatchInserters;
 16 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
 17 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
 18 | 
 19 | import java.io.*;
 20 | import java.util.*;
 21 | import java.util.zip.GZIPInputStream;
 22 | 
 23 | import static org.neo4j.batchimport.Utils.join;
 24 | import static org.neo4j.index.impl.lucene.legacy.LuceneIndexImplementation.EXACT_CONFIG;
 25 | import static org.neo4j.index.impl.lucene.legacy.LuceneIndexImplementation.FULLTEXT_CONFIG;
 26 | 
 27 | public class Importer {
 28 |     private static final Map<String, String> SPATIAL_CONFIG = Collections.singletonMap(IndexManager.PROVIDER,"spatial");
 29 |     private static final Label[] NO_LABELS = new Label[0];
 30 |     public static final int BATCH = 10 * 1000 * 1000;
 31 |     private static Report report;
 32 |     private final Config config;
 33 |     private BatchInserter db;
 34 |     private BatchInserterIndexProvider indexProvider;
 35 |     Map<String,BatchInserterIndex> indexes=new HashMap<String, BatchInserterIndex>();
 36 |     private Label[] labelsArray = NO_LABELS;
 37 | 
 38 |     public Importer(File graphDb, final Config config) throws IOException {
 39 |         this.config = config;
 40 |         db = createBatchInserter(graphDb, config);
 41 | 
 42 |         final boolean luceneOnlyIndex = config.isCachedIndexDisabled();
 43 |         indexProvider = createIndexProvider(luceneOnlyIndex);
 44 |         Collection<IndexInfo> indexInfos = config.getIndexInfos();
 45 |         if (indexInfos!=null) {
 46 |             for (IndexInfo indexInfo : indexInfos) {
 47 |                 BatchInserterIndex index = indexInfo.isNodeIndex() ? nodeIndexFor(indexInfo.indexName, indexInfo.indexType) : relationshipIndexFor(indexInfo.indexName, indexInfo.indexType);
 48 |                 indexes.put(indexInfo.indexName, index);
 49 |             }
 50 |         }
 51 | 
 52 |         report = createReport();
 53 |     }
 54 | 
 55 |     protected StdOutReport createReport() {
 56 |         return new StdOutReport(BATCH, 100);
 57 |     }
 58 | 
 59 |     protected BatchInserterIndexProvider createIndexProvider(boolean luceneOnlyIndex) {
 60 |         return luceneOnlyIndex ? new LuceneBatchInserterIndexProvider(db) : new MapDbCachingIndexProvider(db);
 61 |     }
 62 | 
 63 |     protected BatchInserter createBatchInserter(File graphDb, Config config) throws IOException {
 64 |         return BatchInserters.inserter(new File(graphDb.getAbsolutePath()), config.getConfigData());
 65 |     }
 66 | 
 67 |     // todo multiple nodes and rels files
 68 |     // todo nodes and rels-files in config
 69 |     // todo graphdb in config
 70 |     public static void main(String... args) throws IOException {
 71 |         System.err.println("Usage: Importer data/dir nodes.csv relationships.csv [node_index node-index-name fulltext|exact nodes_index.csv rel_index rel-index-name fulltext|exact rels_index.csv ....]");
 72 |         System.err.println("Using: Importer "+join(args," "));
 73 |         System.err.println();
 74 | 
 75 |         final Config config = Config.convertArgumentsToConfig(args);
 76 | 
 77 |         File graphDb = new File(config.getGraphDbDirectory());
 78 |         if (graphDb.exists() && !config.keepDatabase()) {
 79 |             FileUtils.deleteRecursively(graphDb);
 80 |         }
 81 | 
 82 |         Importer importer = new Importer(graphDb, config);
 83 |         importer.doImport();
 84 |     }
 85 | 
 86 |     void finish() {
 87 |         indexProvider.shutdown();
 88 |         db.shutdown();
 89 |         report.finish();
 90 |     }
 91 | 
 92 |     void importNodes(Reader reader) throws IOException {
 93 |         final LineData data = createLineData(reader, 0);
 94 |         report.reset();
 95 |         boolean hasId = data.hasId();
 96 |         //重复的id不会再次建立节点 含泪删掉，因为速度太慢了
 97 | //        List allIds = new LinkedList();
 98 | //        allIds.clear();
 99 |         while (data.processLine(null)) {
100 |             Map<String, Object> properties = data.getProperties();
101 | //            if (properties.get("id") == null || properties.get("id") == "" || allIds.contains(properties.get("id"))){
102 | //                continue;
103 | //            }
104 | //            allIds.add(properties.get("id"));
105 | 
106 |             String[] labels = data.getTypeLabels();
107 |             long id;
108 |             if (hasId) {
109 |                 id = data.getId();
110 |                 db.createNode(id, properties, labelsFor(labels));
111 |             } else {
112 |                 id = db.createNode(properties, labelsFor(labels));
113 |             }
114 |             for (Map.Entry<String, Map<String, Object>> entry : data.getIndexData().entrySet()) {
115 |                 final BatchInserterIndex index = indexFor(entry.getKey());
116 |                 if (index==null)
117 |                     throw new IllegalStateException("Index "+entry.getKey()+" not configured.");
118 |                 index.add(id, entry.getValue());
119 |             }
120 |             report.dots();
121 |             if (report.getCount() % BATCH == 0) flushIndexes();
122 |         }
123 |         flushIndexes();
124 |         report.finishImport("Nodes");
125 |     }
126 | 
127 |     private Map<String, Object> trimDataValue(LineData data) {
128 |         Map<String, Object> properties = data.getProperties();
129 |         for (Map.Entry<String, Object> entry : properties.entrySet()){
130 |             String key = entry.getKey();
131 |             Object value = entry.getValue();
132 |             if (value != null ){
133 |                 properties.put(key, value.toString().trim());
134 |             }
135 |         }
136 |         return properties;
137 |     }
138 | 
139 |     private Label[] labelsFor(String[] labels) {
140 |         if (labels == null || labels.length == 0) return NO_LABELS;
141 |         if (labels.length != labelsArray.length) labelsArray = new Label[labels.length];
142 |         for (int i = labels.length - 1; i >= 0; i--) {
143 |             if (labelsArray[i] == null || !labelsArray[i].name().equals(labels[i]))
144 |                 labelsArray[i] = DynamicLabel.label(labels[i]);
145 |         }
146 |         return labelsArray;
147 |     }
148 | 
149 |     private long lookup(String index,String property,Object value) {
150 |         Long id = null;
151 |         try{
152 |             id = indexFor(index).get(property, value).getSingle();
153 |         }catch (Exception e){
154 |             e.printStackTrace();
155 |             id = null;
156 |         }
157 | 
158 |         return id==null ? -1 : id;
159 |     }
160 | 
161 |     private BatchInserterIndex indexFor(String index) {
162 |         return indexes.get(index);
163 |     }
164 | 
165 |     void importRelationships(Reader reader) throws IOException {
166 |         final int offset = 3;
167 |         final LineData data = createLineData(reader, offset);
168 |         final RelType relType = new RelType();
169 |         long skipped=0;
170 |         report.reset();
171 | 
172 |         while (data.processLine(null)) {
173 |             final Map<String, Object> properties = data.getProperties();
174 |             final long start = id(data, 0);
175 |             final long end = id(data, 1);
176 |             if (start==-1 || end==-1) {
177 |                 skipped++;
178 |                 continue;
179 |             }
180 |             RelType type = null;
181 |             try {
182 |                 type = relType.update(data.getRelationshipTypeLabel());
183 |             }catch (Exception e){
184 |                 skipped++;
185 |                 continue;
186 |             }
187 | 
188 |             final long id = db.createRelationship(start, end, type, properties);
189 |             for (Map.Entry<String, Map<String, Object>> entry : data.getIndexData().entrySet()) {
190 |                 indexFor(entry.getKey()).add(id, entry.getValue());
191 |             }
192 |             report.dots();
193 |         }
194 |         String msg = "Relationships";
195 |         if (skipped > 0) msg += " skipped (" + skipped + ")";
196 |         report.finishImport(msg);
197 |     }
198 | 
199 |     private void flushIndexes() {
200 |         for (BatchInserterIndex index : indexes.values()) {
201 |             index.flush();
202 |         }
203 |     }
204 | 
205 |     private LineData createLineData(Reader reader, int offset) {
206 |         final boolean useQuotes = config.quotesEnabled();
207 |         if (useQuotes) return new CsvLineData(reader, config.getDelimChar(this),offset);
208 |         return new ChunkerLineData(reader, config.getDelimChar(this), offset);
209 |     }
210 | 
211 |     private long id(LineData data, int column) {
212 |         final LineData.Header header = data.getHeader()[column];
213 |         final Object value = data.getValue(column);
214 |         if (header.indexName == null || header.type == Type.ID) {
215 |             return id(value);
216 |         }
217 | //        System.out.println("indexName: " + header.indexName);
218 | //        System.out.println("name: " + header.name);
219 | //        System.out.println("value: " + value);
220 |         return lookup(header.indexName, header.name, value);
221 |     }
222 | 
223 |     void importIndex(String indexName, BatchInserterIndex index, Reader reader) throws IOException {
224 |         final LineData data = createLineData(reader, 1);
225 |         report.reset();
226 |         while (data.processLine(null)) {
227 |             final Map<String, Object> properties = data.getProperties();
228 |             index.add(id(data.getValue(0)), properties);
229 |             report.dots();
230 |         }
231 |                 
232 |         report.finishImport("Done inserting into " + indexName + " Index");
233 |     }
234 | 
235 |     private BatchInserterIndex nodeIndexFor(String indexName, String indexType) {
236 |         return indexProvider.nodeIndex(indexName, configFor(indexType));
237 |     }
238 | 
239 |     private BatchInserterIndex relationshipIndexFor(String indexName, String indexType) {
240 |         return indexProvider.relationshipIndex(indexName, configFor(indexType));
241 |     }
242 | 
243 |     private Map<String, String> configFor(String indexType) {
244 |         if (indexType.equalsIgnoreCase("fulltext")) return FULLTEXT_CONFIG;
245 |         if (indexType.equalsIgnoreCase("spatial")) return SPATIAL_CONFIG;
246 |         return EXACT_CONFIG;
247 |     }
248 | 
249 |     private long id(Object id) {
250 |         return Long.parseLong(id.toString());
251 |     }
252 | 
253 |     private void importIndex(IndexInfo indexInfo) throws IOException {
254 |         File indexFile = new File(indexInfo.indexFileName);
255 |         if (!indexFile.exists()) {
256 |             System.err.println("Index file "+indexFile+" does not exist");
257 |             return;
258 |         }
259 |         importIndex(indexInfo.indexName, indexes.get(indexInfo.indexName), createFileReader(indexFile));
260 |     }
261 | 
262 |     private void doImport() throws IOException {
263 |         try {
264 |             for (File file : config.getNodesFiles()) {
265 |                 System.out.println("importing node file name : " + file.getName());
266 |                 importNodes(createFileReader(file));
267 |             }
268 | 
269 |             for (File file : config.getRelsFiles()) {
270 |                 System.out.println("importing rel file name : " + file.getName());
271 |                 importRelationships(createFileReader(file));
272 |             }
273 | 
274 |             for (IndexInfo indexInfo : config.getIndexInfos()) {
275 |                 if (indexInfo.shouldImportFile()) importIndex(indexInfo);
276 |             }
277 | 		} finally {
278 |             finish();
279 |         }
280 |     }
281 | 
282 |     final static int BUFFERED_READER_BUFFER = 4096*512;
283 | 
284 |     private Reader createFileReader(File file) {
285 |         try {
286 |             final String fileName = file.getName();
287 |             if (fileName.endsWith(".gz") || fileName.endsWith(".zip")) {
288 |                 return new InputStreamReader(new GZIPInputStream(new BufferedInputStream(new FileInputStream(file)),BUFFERED_READER_BUFFER));
289 |             }
290 |             final FileReader fileReader = new FileReader(file);
291 |             return new BufferedReader(fileReader,BUFFERED_READER_BUFFER);
292 |         } catch(Exception e) {
293 |             throw new IllegalArgumentException("Error reading file "+file+" "+e.getMessage(),e);
294 |         }
295 |     }
296 | 
297 | }
298 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/IndexInfo.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import java.io.File;
 4 | import java.util.Map;
 5 | 
 6 | /**
 7 | * @author mh
 8 | * @since 11.06.13
 9 | */
10 | public class IndexInfo {
11 |     public IndexInfo(String[] args, int offset) {
12 |         this.elementType = args[offset];
13 |         this.indexName = args[offset+1];
14 |         this.indexType = args[offset+2];
15 |         this.indexFileName = args[offset+3];
16 |     }
17 | 
18 |     public IndexInfo(String elementType, String indexName, String indexType, String indexFileName) {
19 |         if (!(elementType.equals("node_index") || elementType.equals("relationship_index"))) throw new IllegalArgumentException("ElementType has to be node_index or relationship_index, but is "+elementType);
20 |         if (!(indexType.equals("exact") || indexType.equals("fulltext"))) throw new IllegalArgumentException("IndexType has to be exact or fulltext, but is "+indexType);
21 |         this.elementType = elementType;
22 |         this.indexName = indexName;
23 |         this.indexType = indexType;
24 |         this.indexFileName = indexFileName;
25 |     }
26 | 
27 |     public final String elementType, indexName, indexType, indexFileName;
28 | 
29 |     public static IndexInfo fromConfigEntry(Map.Entry<String, String> entry) {
30 |         if (!entry.getKey().matches("^batch_import\\.(node|relationship)_index\\..+")) return null;
31 |         final String[] keyParts = entry.getKey().split("\\.", 3);
32 |         final String elementType = keyParts[1];
33 |         final String indexName = keyParts[2];
34 |         final String[] valueParts = entry.getValue().split(":");
35 |         final String indexType = valueParts[0];
36 |         final String indexFileName = valueParts.length > 1 ? valueParts[1] : null;
37 |         return new IndexInfo(elementType,indexName,indexType,indexFileName);
38 |     }
39 | 
40 |     public boolean isNodeIndex() {
41 |         return elementType.equals("node_index");
42 |     }
43 | 
44 |     public String getConfigKey() {
45 |         return "batch_import."+elementType+"."+indexName;
46 |     }
47 | 
48 |     public String getConfigValue() {
49 |         if (indexFileName==null) return indexType;
50 |         return indexType+":"+indexFileName;
51 |     }
52 | 
53 |     public Map<String, String> addToConfig(Map<String, String> config) {
54 |         config.put(getConfigKey(), getConfigValue());
55 |         return config;
56 |     }
57 | 
58 |     public boolean shouldImportFile() {
59 |         if (indexFileName == null) return false;
60 |         final File file = new File(indexFileName);
61 |         return file.exists() && file.isFile() && file.canRead();
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/LineData.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.neo4j.batchimport.importer.Type;
 4 | 
 5 | import java.util.Map;
 6 | 
 7 | public interface LineData {
 8 | 
 9 |     class Header {
10 |         public Header(int column, String name, Type type, String indexName) {
11 |             this.column = column;
12 |             this.name = name;
13 |             this.type = type;
14 |             this.indexName = indexName;
15 |         }
16 | 
17 |         public final int column;
18 |         public final String name;
19 |         public final Type type;
20 |         public final String indexName; // todo index config in config
21 | 
22 |         @Override
23 |         public String toString() {
24 |             return column + ". " + name +
25 |                     (type!=null ? " type: " + type : "")+
26 |                     (indexName!=null? " index: " + indexName : "");
27 |         }
28 |     }
29 |     boolean processLine(String line);
30 |     Header[] getHeader();
31 |     long getId();
32 |     Map<String,Object> getProperties();
33 |     Map<String,Map<String,Object>> getIndexData();
34 |     String[] getTypeLabels();
35 |     String getRelationshipTypeLabel();
36 |     Object getValue(int column);
37 |     boolean hasId();
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/Report.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | /**
 4 |  * @author mh
 5 |  * @since 21.08.12
 6 |  */
 7 | public interface Report {
 8 |     void reset();
 9 | 
10 |     void finish();
11 | 
12 |     void dots();
13 | 
14 |     void finishImport(String type);
15 | 
16 |     long getCount();
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/StdOutReport.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | public class StdOutReport implements Report {
 4 |     private final long batch;
 5 |     private final long dots;
 6 |     private long count;
 7 |     private long total = System.currentTimeMillis(), time, batchTime;
 8 | 
 9 |     public StdOutReport(long batch, int dots) {
10 |         this.batch = batch;
11 |         this.dots = batch / dots;
12 |     }
13 | 
14 |     @Override
15 |     public void reset() {
16 |         count = 0;
17 |         batchTime = time = System.currentTimeMillis();
18 |     }
19 | 
20 |     @Override
21 |     public void finish() {
22 |         System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds ");
23 |     }
24 | 
25 |     @Override
26 |     public void dots() {
27 |         if ((++count % dots) != 0) return;
28 |         System.out.print(".");
29 |         if ((count % batch) != 0) return;
30 |         long now = System.currentTimeMillis();
31 |         System.out.println(" "+ (now - batchTime) + " ms for "+batch);
32 |         batchTime = now;
33 |     }
34 | 
35 |     public long getCount() {
36 |         return count;
37 |     }
38 | 
39 |     @Override
40 |     public void finishImport(String type) {
41 |         System.out.println("\nImporting " + count + " " + type + " took " + (System.currentTimeMillis() - time) / 1000 + " seconds ");
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/Utils.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.apache.log4j.Logger;
 4 | 
 5 | /**
 6 |  * @author mh
 7 |  * @since 27.10.12
 8 |  */
 9 | public class Utils {
10 |     private final static Logger log = Logger.getLogger(Utils.class);
11 | 
12 |     public static int size(int[] ids) {
13 |         if (ids==null) return 0;
14 |         int count = ids.length;
15 |         for (int i=count-1;i>=0;i--) {
16 |             if (ids[i]!=-1) return i+1;
17 |         }
18 |         return count;
19 |     }
20 | 
21 |     public static int size(long[] ids) {
22 |         if (ids==null) return 0;
23 |         int count = ids.length;
24 |         for (int i=count-1;i>=0;i--) {
25 |             if (ids[i]!=-1) return i+1;
26 |         }
27 |         return count;
28 |     }
29 | 
30 |     static String join(String[] types, String delim) {
31 |         StringBuilder sb =new StringBuilder();
32 |         for (String type : types) {
33 |             sb.append(type).append(delim);
34 |         }
35 |         return sb.substring(0, sb.length() - delim.length());
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/AbstractLineData.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.importer;
  2 | 
  3 | import org.neo4j.batchimport.LineData;
  4 | 
  5 | import java.util.Arrays;
  6 | import java.util.Collections;
  7 | import java.util.HashMap;
  8 | import java.util.Map;
  9 | 
 10 | import static org.neo4j.helpers.collection.MapUtil.map;
 11 | 
 12 | public abstract class AbstractLineData implements LineData {
 13 |     protected final int offset;
 14 |     protected Object[] lineData;
 15 |     protected int lineSize;
 16 |     protected Header[] headers;
 17 |     int labelId = 2;
 18 |     int explicitLabelId = -1;
 19 |     private Object[] properties;
 20 |     protected int rows;
 21 |     private int propertyCount;
 22 |     private boolean hasIndex=false;
 23 |     private boolean done;
 24 |     private boolean hasId;
 25 | 
 26 |     public AbstractLineData(int offset) {
 27 |         this.offset = offset;
 28 |     }
 29 | 
 30 |     protected void initHeaders(Header[] headers) {
 31 |         this.headers = headers;
 32 |         lineSize=headers.length;
 33 |         lineData = new Object[lineSize];
 34 |     }
 35 |     protected abstract String[] readRawRow();
 36 | 
 37 |     protected abstract boolean readLine();
 38 | 
 39 |     protected Header[] createHeaders(String[] fields) {
 40 |         if (fields[0].indexOf(".csv") != -1){
 41 |             String firstLine = fields[0];
 42 |             firstLine = firstLine.substring(firstLine.length() - 50, firstLine.length()).trim();
 43 |             fields[0] = firstLine;
 44 |         }
 45 | 
 46 |         Header[] headers = new Header[fields.length];
 47 |         int i=0;
 48 |         for (String field : fields) {
 49 |             String[] parts=field.split(":");
 50 |             final String name = parts[0];
 51 |             final String indexName = parts.length > 2 ? parts[2] : null;
 52 |             Type type = Type.fromString(parts.length > 1 ? parts[1] : null);
 53 |             if (type==Type.LABEL) { //  || name.toLowerCase().matches("^(type|types|label|labels)$")) {
 54 |                 labelId=i;
 55 |                 type=Type.LABEL;
 56 |                 explicitLabelId = i;
 57 |             }
 58 |             headers[i]=new Header(i, name, type, indexName);
 59 |             i++;
 60 |             hasIndex |= indexName != null;
 61 |         }
 62 |         hasId = headers[0].type == Type.ID;
 63 |         return headers;
 64 |     }
 65 | 
 66 |     protected Object[] createMapData(int lineSize, int offset) {
 67 |         int dataSize = Math.max(0,lineSize - offset);
 68 |         properties = new Object[dataSize*2];
 69 |         for (int i = offset; i < dataSize; i++) {
 70 |             properties[(i - offset) * 2 ] = headers[i].name;
 71 |         }
 72 |         return properties;
 73 |     }
 74 | 
 75 |     @Override
 76 |     public boolean processLine(String line) {
 77 |         if (done) return false;
 78 |         return parse() > 0;
 79 |     }
 80 | 
 81 |     @Override
 82 |     public Header[] getHeader() {
 83 |         return headers;
 84 |     }
 85 | 
 86 |     @Override
 87 |     public long getId() {
 88 |         if (hasId) return (Long)getValue(0);
 89 |         return rows;
 90 |     }
 91 | 
 92 |     @Override
 93 |     public Map<String, Object> getProperties() {
 94 |         return properties();
 95 |     }
 96 | 
 97 |     @Override
 98 |     public Map<String, Map<String, Object>> getIndexData() {
 99 |         if (!hasIndex) return Collections.EMPTY_MAP;
100 |         Map<String, Map<String, Object>> indexData = new HashMap<String, Map<String, Object>>();
101 |         for (int column = offset; column < headers.length; column++) {
102 |             Header header = headers[column];
103 |             if (header.indexName == null) continue;
104 |             Object val = getValue(column);
105 |             if (val == null) continue;
106 | 
107 |             if (!indexData.containsKey(header.indexName)) {
108 |                 indexData.put(header.indexName, new HashMap<String, Object>());
109 |             }
110 |             indexData.get(header.indexName).put(header.name,val);
111 |         }
112 |         return indexData;
113 |     }
114 | 
115 |     @Override
116 |     public String[] getTypeLabels() {
117 |         if (explicitLabelId==-1) return null;
118 |         Object labels = getValue(explicitLabelId);
119 |         return labels instanceof String ? new String[]{ labels.toString() } : (String[]) labels;
120 |     }
121 | 
122 |     @Override
123 |     public String getRelationshipTypeLabel() {
124 |         Object labels = getValue(labelId);
125 |         return labels instanceof String[] ? ((String[])labels)[0] : (String)labels;
126 |     }
127 | 
128 |     @Override
129 |     public Object getValue(int column) {
130 |         return lineData[column];
131 |     }
132 | 
133 |     @Override
134 |     public boolean hasId() {
135 |         return hasId;
136 |     }
137 | 
138 |     private Header getHeader(int column) {
139 |         return headers[column];
140 |     }
141 | 
142 |     private int parse() {
143 |         rows++;
144 |         Arrays.fill(lineData,null);
145 |         done = !readLine();
146 |         return collectNonNullInData();
147 |     }
148 | 
149 |     private int collectNonNullInData() {
150 |         propertyCount=0;
151 |         int notnull = 0;
152 |         for (int i = 0; i < lineSize; i++) {
153 |             if (lineData[i] == null) continue;
154 |             notnull++;
155 |             if (i<offset || i == explicitLabelId) continue;
156 |             final Header header = getHeader(i);
157 |             if (!header.type.isProperty()) continue;
158 |             properties[propertyCount++]= header.name;
159 |             properties[propertyCount++]= getValue(i);
160 |         }
161 |         return notnull;
162 |     }
163 | 
164 |     public Map<String,Object> updateMap(Object... header) {
165 |         processLine(null);
166 | 
167 |         // todo deprecate
168 |         if (header.length > 0) {
169 |             System.arraycopy(lineData, 0, header, 0, header.length);
170 |         }
171 | 
172 |         return properties();
173 |     }
174 | 
175 |     private Map<String, Object> properties() {
176 |         if (propertyCount == properties.length) {
177 |             return map(properties);
178 |         }
179 |         Object[] newData=new Object[propertyCount];
180 |         System.arraycopy(properties,0,newData,0, propertyCount);
181 |         return map(newData);
182 |     }
183 | 
184 |     public int getColumnCount() {
185 |         return this.propertyCount/2;
186 |     }
187 | 
188 |     protected Object convert(int column, String value) {
189 |         try {
190 |             return headers[column].type == Type.STRING ? value : headers[column].type.convert(value);
191 |         } catch(Exception e) {
192 |             // todo potentially skip?
193 |             throw new RuntimeException("Error converting value row "+rows+" column "+headers[column]+" value "+value+" error: "+e.getClass().getSimpleName()+": "+e.getMessage(),e);
194 |         }
195 |     }
196 | }
197 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/ChunkerLineData.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.importer;
 2 | 
 3 | import org.neo4j.batchimport.utils.Chunker;
 4 | 
 5 | import java.io.IOException;
 6 | import java.io.Reader;
 7 | import java.util.*;
 8 | 
 9 | public class ChunkerLineData extends AbstractLineData {
10 |     private final Chunker chunker;
11 | 
12 |     public ChunkerLineData(Reader reader, char delim, int offset) {
13 |         super(offset);
14 |         chunker = new Chunker(reader, delim);
15 |         initHeaders(createHeaders(readRawRow()));
16 |         createMapData(lineSize, offset);
17 |     }
18 | 
19 |     protected String[] readRawRow() {
20 |         String value;
21 |         Collection<String> result=new ArrayList<String>();
22 |         do {
23 |             value = nextWord();
24 |             if (Chunker.NO_VALUE != value && !isEndOfLineOrFile(value)) {
25 |                 result.add(value);
26 |             }
27 |         } while (!isEndOfLineOrFile(value));
28 |         return result.toArray(new String[result.size()]);
29 |     }
30 | 
31 |     private String nextWord() {
32 |         try {
33 |             return chunker.nextWord();
34 |         } catch (IOException e) {
35 |             throw new RuntimeException(e);
36 |         }
37 |     }
38 | 
39 |     protected boolean readLine() {
40 |         String value = null;
41 |         int i=0;
42 |         do {
43 |             value = nextWord();
44 |             if (isEndOfLineOrFile(value)) break;
45 |             if (i==lineSize) {
46 |                 do {
47 |                     value = nextWord();
48 |                 } while (!isEndOfLineOrFile(value)); // consume until EOL
49 |                 break;
50 |             }
51 |             lineData[i] = Chunker.NO_VALUE == value ? null : convert(i, value);
52 |             i++;
53 |         } while (!isEndOfLineOrFile(value));
54 |         if (i<lineSize) {
55 |             Arrays.fill(lineData,i,lineSize,null);
56 |         }
57 |         return value != Chunker.EOF;
58 |     }
59 | 
60 |     private boolean isEndOfLineOrFile(String value) {
61 |         return Chunker.EOL == value || Chunker.EOF == value;
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/CsvLineData.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.importer;
 2 | 
 3 | import org.neo4j.batchimport.CSVReader;
 4 | 
 5 | import java.io.IOException;
 6 | import java.io.Reader;
 7 | 
 8 | import static org.neo4j.helpers.collection.MapUtil.map;
 9 | 
10 | public class CsvLineData extends AbstractLineData {
11 |     private final CSVReader csvReader;
12 | 
13 |     public CsvLineData(Reader reader, char delim, int offset) {
14 |         super(offset);
15 |         this.csvReader = new CSVReader(reader, delim,'"','\\',0,false,false);
16 |         initHeaders(createHeaders(readRawRow()));
17 |         createMapData(lineSize, offset);
18 |     }
19 | 
20 |     @Override
21 |     protected String[] readRawRow() {
22 |         try {
23 |             return csvReader.readNext();
24 |         } catch (IOException e) {
25 |             throw new RuntimeException(e);
26 |         }
27 |     }
28 | 
29 |     protected boolean readLine() {
30 |         final String[] row = readRawRow();
31 |         if (row==null || row.length==0) return false;
32 |         for (int i = 0; i < row.length && i < lineSize; i++) {
33 |             String value = row[i];
34 |             lineData[i] = value == null || value.isEmpty() ? null : convert(i, value);
35 |         }
36 |         return true;
37 |     }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/RelType.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.importer;
 2 | 
 3 | import org.neo4j.graphdb.RelationshipType;
 4 | 
 5 | public class RelType implements RelationshipType {
 6 |     String name;
 7 | 
 8 |     public RelType update(Object value) {
 9 |         this.name = value.toString();
10 |         return this;
11 |     }
12 | 
13 |     public String name() {
14 |         return name;
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/RowData.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.importer;
  2 | 
  3 | import org.neo4j.batchimport.LineData;
  4 | 
  5 | import java.util.Collections;
  6 | import java.util.HashMap;
  7 | import java.util.Map;
  8 | import java.util.StringTokenizer;
  9 | 
 10 | import static org.neo4j.helpers.collection.MapUtil.map;
 11 | 
 12 | public class RowData implements LineData {
 13 |     private Object[] properties;
 14 |     private final int offset;
 15 |     private final String delim;
 16 |     private final String[] lineData;
 17 |     private final int lineSize;
 18 |     private int rows;
 19 |     int labelId = 2;
 20 |     int explicitLabelId = 2;
 21 |     private LineData.Header[] headers;
 22 |     private int propertyCount;
 23 |     private boolean hasIndex=false;
 24 |     private boolean hasId;
 25 | 
 26 |     public RowData(String header, String delim, int offset) {
 27 |         this.offset = offset;
 28 |         this.delim = delim;
 29 |         String[] fields = header.split(delim);
 30 |         lineSize = fields.length;
 31 |         lineData = new String[lineSize];
 32 |         this.headers = createHeaders(fields);
 33 |         createMapData(lineSize, offset);
 34 |     }
 35 | 
 36 |     private Header[] createHeaders(String[] fields) {
 37 |         Header[] headers = new Header[fields.length];
 38 |         for (int i = 0; i < fields.length; i++) {
 39 |             String[] parts=fields[i].split(":");
 40 |             final String name = parts[0];
 41 |             final String indexName = parts.length > 2 ? parts[2] : null;
 42 |             Type type = Type.fromString(parts.length > 1 ? parts[1] : null);
 43 |             if (type==Type.LABEL || name.toLowerCase().matches("^(type|types|label|labels)$")) {
 44 |                 labelId=i;
 45 |                 type=Type.LABEL;
 46 |                 explicitLabelId=i;
 47 |             }
 48 |             headers[i]=new Header(i, name, type, indexName);
 49 |             hasIndex |= indexName != null;
 50 |         }
 51 |         hasId = headers[0].type == Type.ID;
 52 |         return headers;
 53 |     }
 54 | 
 55 |     private Object[] createMapData(int lineSize, int offset) {
 56 |         int dataSize = Math.max(0,lineSize - offset);
 57 |         properties = new Object[dataSize*2];
 58 |         for (int i = offset; i < dataSize; i++) {
 59 |             properties[(i - offset) * 2 ] = headers[i].name;
 60 |         }
 61 |         return properties;
 62 |     }
 63 | 
 64 |     @Override
 65 |     public boolean processLine(String line) {
 66 |         this.propertyCount = parse(line);
 67 |         return true;
 68 |     }
 69 | 
 70 |     @Override
 71 |     public Header[] getHeader() {
 72 |         return headers;
 73 |     }
 74 | 
 75 |     @Override
 76 |     public long getId() {
 77 |         if (hasId) return (Long)getValue(0);
 78 |         return rows;
 79 |     }
 80 | 
 81 |     @Override
 82 |     public boolean hasId() {
 83 |         return hasId;
 84 |     }
 85 | 
 86 |     @Override
 87 |     public Map<String, Object> getProperties() {
 88 |         return properties();
 89 |     }
 90 | 
 91 |     @Override
 92 |     public Map<String, Map<String, Object>> getIndexData() {
 93 |         if (!hasIndex) return Collections.EMPTY_MAP;
 94 |         Map<String, Map<String, Object>> indexData = new HashMap<String, Map<String, Object>>();
 95 |         for (int column = 0; column < headers.length; column++) {
 96 |             Header header = headers[column];
 97 |             if (header.indexName == null) continue;
 98 | 
 99 |             if (!indexData.containsKey(header.indexName)) {
100 |                 indexData.put(header.indexName, new HashMap<String, Object>());
101 |             }
102 |             indexData.get(header.indexName).put(header.name,getValue(column));
103 |         }
104 |         return indexData;
105 |     }
106 | 
107 |     @Override
108 |     public String[] getTypeLabels() {
109 |         if (explicitLabelId==-1) return null;
110 |         Object labels = getValue(explicitLabelId);
111 |         return labels instanceof String ? new String[]{ labels.toString() } : (String[]) labels;
112 |     }
113 | 
114 |     @Override
115 |     public String getRelationshipTypeLabel() {
116 |         Object labels = getValue(labelId);
117 |         return labels instanceof String[] ? ((String[])labels)[0] : (String)labels;
118 |     }
119 | 
120 |     @Override
121 |     public Object getValue(int column) {
122 |         return getHeader(column).type.convert(lineData[column]);
123 |     }
124 | 
125 |     private Header getHeader(int column) {
126 |         return headers[column];
127 |     }
128 | 
129 |     private int parse(String line) {
130 |         rows++;
131 |         final StringTokenizer st = new StringTokenizer(line, delim,true);
132 |         for (int i = 0; i < lineSize; i++) {
133 |             String value = st.hasMoreTokens() ? st.nextToken() : delim;
134 |             if (value.equals(delim)) {
135 |                 lineData[i] = null;
136 |             } else {
137 |                 lineData[i] = value.trim().isEmpty() ? null : value;
138 |                 if (i< lineSize -1 && st.hasMoreTokens()) st.nextToken();
139 |             }
140 |         }
141 |         return collectNonNullInData();
142 |     }
143 | 
144 |     private int collectNonNullInData() {
145 |         int count = 0;
146 |         for (int i = offset; i < lineSize; i++) {
147 |             if (lineData[i] == null) continue;
148 |             final Header header = getHeader(i);
149 |             properties[count++]= header.name;
150 |             properties[count++]= getValue(i);
151 |         }
152 |         return count;
153 |     }
154 | 
155 |     public Map<String,Object> updateMap(String line, Object... header) {
156 |         processLine(line);
157 | 
158 |         // todo deprecate
159 |         if (header.length > 0) {
160 |             System.arraycopy(lineData, 0, header, 0, header.length);
161 |         }
162 | 
163 |         return properties();
164 |     }
165 | 
166 |     private Map<String, Object> properties() {
167 |         if (propertyCount == properties.length) {
168 |             return map(properties);
169 |         }
170 |         Object[] newData=new Object[propertyCount];
171 |         System.arraycopy(properties,0,newData,0, propertyCount);
172 |         return map(newData);
173 |     }
174 | 
175 |     public int getColumnCount() {
176 |         return this.propertyCount/2;
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/importer/Type.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.importer;
  2 | 
  3 | import org.neo4j.batchimport.utils.Config;
  4 | 
  5 | public enum Type {
  6 |     ID {
  7 |         @Override
  8 |         public Object convert(String value) {
  9 |             return Long.parseLong(value);
 10 |         }
 11 |         public boolean isProperty() { return false; }
 12 |     },
 13 |     LABEL {
 14 |         @Override
 15 |         public Object convert(String value) {
 16 |             return value.trim().split("\\s*,\\s*");
 17 |         }
 18 |         public boolean isProperty() { return false; }
 19 |     },
 20 |     BOOLEAN {
 21 |         @Override
 22 |         public Object convert(String value) {
 23 |             return Boolean.valueOf(value);
 24 |         }
 25 |     },
 26 |     INT {
 27 |         @Override
 28 |         public Object convert(String value) {
 29 |             return Integer.valueOf(value);
 30 |         }
 31 |     },
 32 |     LONG {
 33 |         @Override
 34 |         public Object convert(String value) {
 35 |             return Long.valueOf(value);
 36 |         }
 37 |     },
 38 |     DOUBLE {
 39 |         @Override
 40 |         public Object convert(String value) {
 41 |             return Double.valueOf(value);
 42 |         }
 43 |     },
 44 |     FLOAT {
 45 |         @Override
 46 |         public Object convert(String value) {
 47 |             return Float.valueOf(value);
 48 |         }
 49 |     },
 50 |     BYTE {
 51 |         @Override
 52 |         public Object convert(String value) {
 53 |             return Byte.valueOf(value);
 54 |         }
 55 |     },
 56 |     SHORT {
 57 |         @Override
 58 |         public Object convert(String value) {
 59 |             return Short.valueOf(value);
 60 |         }
 61 |     },
 62 |     CHAR {
 63 |         @Override
 64 |         public Object convert(String value) {
 65 |             return value.charAt(0);
 66 |         }
 67 |     },
 68 |     STRING {
 69 |         @Override
 70 |         public Object convert(String value) {
 71 |             return value;
 72 |         }
 73 |     },
 74 |     BOOLEAN_ARRAY {
 75 |         @Override
 76 |         public Object convert(String value) {
 77 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
 78 |             boolean[] booleanArray = new boolean[strArray.length];
 79 |             for(int i = 0; i < strArray.length; i++) {
 80 |                 booleanArray[i] = Boolean.valueOf(strArray[i]);
 81 |             }
 82 |             return booleanArray;
 83 |         }
 84 |     },
 85 |     INT_ARRAY {
 86 |         @Override
 87 |         public Object convert(String value) {
 88 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
 89 |             int[] intArray = new int[strArray.length];
 90 |             for(int i = 0; i < strArray.length; i++) {
 91 |                 intArray[i] = Integer.parseInt(strArray[i]);
 92 |             }
 93 |             return intArray;
 94 |         }
 95 |     },
 96 |     LONG_ARRAY {
 97 |         @Override
 98 |         public Object convert(String value) {
 99 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
100 |             long[] longArray = new long[strArray.length];
101 |             for(int i = 0; i < strArray.length; i++) {
102 |                 longArray[i] = Long.parseLong(strArray[i]);
103 |             }
104 |             return longArray;
105 |         }
106 |     },
107 |     DOUBLE_ARRAY {
108 |         @Override
109 |         public Object convert(String value) {
110 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
111 |             double[] doubleArray = new double[strArray.length];
112 |             for(int i = 0; i < strArray.length; i++) {
113 |                 doubleArray[i] = Double.parseDouble(strArray[i]);
114 |             }
115 |             return doubleArray;
116 |         }
117 |     },
118 |     FLOAT_ARRAY {
119 |         @Override
120 |         public Object convert(String value) {
121 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
122 |             float[] floatArray = new float[strArray.length];
123 |             for(int i = 0; i < strArray.length; i++) {
124 |                 floatArray[i] = Float.parseFloat(strArray[i]);
125 |             }
126 |             return floatArray;
127 |         }
128 |     },
129 |     BYTE_ARRAY {
130 |         @Override
131 |         public Object convert(String value) {
132 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
133 |             byte[] byteArray = new byte[strArray.length];
134 |             for(int i = 0; i < strArray.length; i++) {
135 |                 byteArray[i] = Byte.parseByte(strArray[i]);
136 |             }
137 |             return byteArray;
138 |         }
139 |     },
140 |     SHORT_ARRAY {
141 |         @Override
142 |         public Object convert(String value) {
143 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
144 |             short[] shortArray = new short[strArray.length];
145 |             for(int i = 0; i < strArray.length; i++) {
146 |                 shortArray[i] = Short.parseShort(strArray[i]);
147 |             }
148 |             return shortArray;
149 |         }
150 |     },
151 |     CHAR_ARRAY {
152 |         @Override
153 |         public Object convert(String value) {
154 |             String[] strArray = value.split(Config.ARRAYS_SEPARATOR);
155 |             char[] charArray = new char[strArray.length];
156 |             for(int i = 0; i < strArray.length; i++) {
157 |                 charArray[i] = strArray[i].charAt(0);
158 |             }
159 |             return charArray;
160 |         }
161 |     },
162 |     STRING_ARRAY {
163 |         @Override
164 |         public Object convert(String value) {
165 |             String separator = Config.ARRAYS_SEPARATOR;
166 |             return value.split(Config.ARRAYS_SEPARATOR);
167 |         }
168 |     };
169 | 
170 |     public static Type fromString(String typeString) {
171 |         if (typeString==null || typeString.isEmpty()) return Type.STRING;
172 |         try {
173 |             return valueOf(typeString.toUpperCase());
174 |         } catch (Exception e) {
175 |             throw new IllegalArgumentException("Unknown Type "+typeString);
176 |         }
177 |     }
178 | 
179 |     public abstract Object convert(String value);
180 | 
181 |     public boolean isProperty() { return true; }
182 | }
183 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/index/LongIterableIndexHits.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.index;
 2 | 
 3 | import org.neo4j.graphdb.ResourceIterator;
 4 | import org.neo4j.graphdb.index.IndexHits;
 5 | import org.neo4j.helpers.collection.Iterables;
 6 | 
 7 | import java.util.Iterator;
 8 | 
 9 | /**
10 | * @author mh
11 | * @since 11.06.13
12 | */
13 | public class LongIterableIndexHits implements IndexHits<Long>, ResourceIterator<Long> {
14 | 
15 |     private final Iterable<Long> values;
16 |     private Iterator<Long> iterator;
17 | 
18 |     public LongIterableIndexHits(Iterable<Long> values) {
19 |         this.values = values;
20 |         iterator = iterator();
21 |     }
22 | 
23 |     @Override
24 |     public int size() {
25 |         return (int) Iterables.count(values);
26 |     }
27 | 
28 |     @Override
29 |     public void close() {
30 |         if (iterator instanceof ResourceIterator) {
31 |             ((ResourceIterator)iterator).close();
32 |         }
33 |     }
34 | 
35 |     @Override
36 |     public Long getSingle() {
37 |         return Iterables.singleOrNull(values);
38 |     }
39 | 
40 |     @Override
41 |     public float currentScore() {
42 |         return 0;
43 |     }
44 | 
45 |     @Override
46 |     public ResourceIterator<Long> iterator() {
47 |         iterator = values.iterator();
48 |         return this;
49 |     }
50 | 
51 | 
52 |     @Override
53 |     public boolean hasNext() {
54 |         return iterator.hasNext();
55 |     }
56 | 
57 |     @Override
58 |     public Long next() {
59 |         return iterator.next();
60 |     }
61 | 
62 |     @Override
63 |     public void remove() {
64 |         iterator.remove();
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/index/MapDbCachingIndexProvider.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.index;
  2 | 
  3 | import org.mapdb.BTreeKeySerializer;
  4 | import org.mapdb.Bind;
  5 | import org.mapdb.DB;
  6 | import org.mapdb.DBMaker;
  7 | import org.mapdb.Fun;
  8 | import org.neo4j.graphdb.index.IndexHits;
  9 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
 10 | import org.neo4j.unsafe.batchinsert.BatchInserter;
 11 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
 12 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
 13 | 
 14 | import java.util.HashMap;
 15 | import java.util.Map;
 16 | import java.util.NavigableSet;
 17 | 
 18 | public class MapDbCachingIndexProvider implements BatchInserterIndexProvider {
 19 |     LuceneBatchInserterIndexProvider luceneIndex;
 20 |     private DB db;
 21 | 
 22 |     public MapDbCachingIndexProvider(BatchInserter inserter) {
 23 |         this(new LuceneBatchInserterIndexProvider(inserter));
 24 |     }
 25 | 
 26 |     public MapDbCachingIndexProvider(LuceneBatchInserterIndexProvider luceneIndex) {
 27 |         this.luceneIndex = luceneIndex;
 28 |         db = DBMaker.newTempFileDB().
 29 |                 asyncFlushDelay(1000).
 30 |                 cacheSize(1024 * 1024).
 31 |                 closeOnJvmShutdown().
 32 |                 deleteFilesAfterClose().
 33 |                 syncOnCommitDisable().
 34 |                 writeAheadLogDisable().
 35 |                 make();
 36 |     }
 37 | 
 38 |     @Override
 39 |     public BatchInserterIndex nodeIndex(String indexName, Map<String, String> config) {
 40 |         return new CachingBatchInserterIndex(db,indexName,luceneIndex.nodeIndex(indexName,config));
 41 |     }
 42 | 
 43 |     @Override
 44 |     public BatchInserterIndex relationshipIndex(String indexName, Map<String, String> config) {
 45 |         return new CachingBatchInserterIndex(db,indexName,luceneIndex.relationshipIndex(indexName, config));
 46 |     }
 47 | 
 48 |     @Override
 49 |     public void shutdown() {
 50 |         luceneIndex.shutdown();
 51 |         db.close();
 52 |     }
 53 | 
 54 |     private static class CachingBatchInserterIndex implements BatchInserterIndex {
 55 |         Map<String,NavigableSet<Fun.Tuple2<Object, Long>>> caches = new HashMap<String, NavigableSet<Fun.Tuple2<Object, Long>>>();
 56 |         private final DB db;
 57 |         private final String indexName;
 58 |         private final BatchInserterIndex index;
 59 | 
 60 |         public CachingBatchInserterIndex(DB db, String indexName, BatchInserterIndex index) {
 61 |             this.db = db;
 62 |             this.indexName = indexName;
 63 |             this.index = index;
 64 |         }
 65 |         private NavigableSet<Fun.Tuple2<Object,Long>> getSet(String property) {
 66 |             NavigableSet<Fun.Tuple2<Object, Long>> set = caches.get(property);
 67 |             if (set != null) return set;
 68 |             set=db.<Fun.Tuple2<Object, Long>>createTreeSet(indexName+"."+property,32,false, BTreeKeySerializer.TUPLE2,null);
 69 |             caches.put(property,set);
 70 |             return set;
 71 |         }
 72 | 
 73 |         @Override
 74 |         public void add(long entityId, Map<String, Object> properties) {
 75 |             for (Map.Entry<String, Object> entry : properties.entrySet()) {
 76 |                 getSet(entry.getKey()).add(Fun.t2(entry.getValue(), entityId));
 77 |             }
 78 |             index.add(entityId,properties);
 79 |         }
 80 | 
 81 |         @Override
 82 |         public void updateOrAdd(long entityId, Map<String, Object> properties) {
 83 |             throw new UnsupportedOperationException();
 84 |         }
 85 | 
 86 |         @Override
 87 |         public IndexHits<Long> get(String key, Object value) {
 88 |             final Iterable<Long> values = Bind.findSecondaryKeys(getSet(key), value);
 89 |             return new LongIterableIndexHits(values);
 90 |         }
 91 | 
 92 |         @Override
 93 |         public IndexHits<Long> query(String key, Object queryOrQueryObject) {
 94 |             throw new UnsupportedOperationException();
 95 |         }
 96 | 
 97 |         @Override
 98 |         public IndexHits<Long> query(Object queryOrQueryObject) {
 99 |             throw new UnsupportedOperationException();
100 |         }
101 | 
102 |         @Override
103 |         public void flush() {
104 |             index.flush();
105 |         }
106 | 
107 |         @Override
108 |         public void setCacheCapacity(String key, int size) {
109 |             throw new UnsupportedOperationException();
110 |         }
111 | 
112 |     }
113 | 
114 | }
115 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/Chunker.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import java.io.IOException;
 4 | import java.io.Reader;
 5 | 
 6 | /**
 7 | * @author mh
 8 | * @since 13.11.12
 9 | */
10 | public class Chunker {
11 |     public static final String EOF = null;
12 |     public static final String EOL = "\n".intern();
13 |     public static final String NO_VALUE = "".intern();
14 |     public static final char EOL_CHAR2 = '\r';
15 |     public static final char EOL_CHAR = '\n';
16 |     public static final char EOF_CHAR = (char)-1;
17 |     public static final int PREV_EOL_CHAR = -2;
18 |     private static final int BUFSIZE = 32*1024;
19 |     private final Reader reader;
20 |     private final char delim;
21 |     private final char[] buffer=new char[BUFSIZE];
22 |     private int lastChar = PREV_EOL_CHAR;
23 |     private int pos=BUFSIZE;
24 | 
25 |     public Chunker(Reader reader, char delim) {
26 |         this.reader = reader;
27 |         this.delim = delim;
28 |     }
29 | 
30 |     /**
31 |      * @return the token, null for EOF, empty string for no value read (just delim) or "\n" for EOL
32 |      * @throws IOException
33 |      */
34 |     public String nextWord() throws IOException {
35 |         int count = 0;
36 |         int ch;
37 |         if (lastChar == EOF_CHAR) return EOF;
38 |         if (lastChar == EOL_CHAR) {
39 |             lastChar = PREV_EOL_CHAR;
40 |             return EOL;
41 |         }
42 | 
43 |         if (pos == BUFSIZE) {
44 |             int available = reader.read(buffer);
45 |             pos = 0;
46 |             if (available == -1) {
47 |                 available = 0;
48 |             }
49 |             if (available < BUFSIZE) {
50 |                 buffer[available] = EOF_CHAR;
51 |             }
52 |         }
53 |         int start = pos;
54 |         while ((ch = buffer[pos++])!=delim && ch!= EOL_CHAR && ch!= EOF_CHAR) {
55 |             count++;
56 |             if (pos == BUFSIZE) {
57 |                 System.arraycopy(buffer, start, buffer, 0, count);
58 |                 int available = reader.read(buffer, count, BUFSIZE - count);
59 |                 pos = count;
60 |                 start = 0;
61 |                 if (available == -1) {
62 |                     available = 0;
63 |                 }
64 |                 if (available < BUFSIZE - count) {
65 |                     buffer[available + count] = EOF_CHAR;
66 |                 }
67 |             }
68 |         }
69 |         if (count == 0) {
70 |             if (lastChar==PREV_EOL_CHAR && ch== EOF_CHAR) { lastChar=EOF_CHAR;return EOF; }            
71 |             lastChar=ch;
72 |             if (ch == EOF_CHAR) return NO_VALUE;
73 |             if (ch == EOL_CHAR) return NO_VALUE;
74 |             return NO_VALUE;
75 |         }
76 |         lastChar=ch;
77 |         if (buffer[start + count-1]==EOL_CHAR2) count--;
78 |         return String.valueOf(buffer, start, count);
79 |     }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/Config.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.utils;
  2 | 
  3 | import org.neo4j.batchimport.Importer;
  4 | import org.neo4j.batchimport.IndexInfo;
  5 | import org.neo4j.helpers.collection.MapUtil;
  6 | 
  7 | import java.io.File;
  8 | import java.io.FileWriter;
  9 | import java.util.ArrayList;
 10 | import java.util.Collection;
 11 | import java.util.HashMap;
 12 | import java.util.Map;
 13 | import java.util.Stack;
 14 | 
 15 | public class Config {
 16 |     public static final String BATCH_IMPORT_RELS_FILES = "batch_import.rels_files";
 17 |     public static final String BATCH_IMPORT_GRAPH_DB = "batch_import.graph_db";
 18 |     public static final String BATCH_IMPORT_KEEP_DB = "batch_import.keep_db";
 19 |     public static final String CONFIG_FILE_NAME = "batch.properties";
 20 |     public static final String BATCH_IMPORT_NODES_FILES = "batch_import.nodes_files";
 21 |     public static final String BATCH_IMPORT_MAPDB_CACHE_DISABLE = "batch_import.mapdb_cache.disable";
 22 |     public static final String BATCH_IMPORT_CSV_QUOTES = "batch_import.csv.quotes";
 23 |     public static final String BATCH_IMPORT_CSV_DELIM = "batch_import.csv.delim";
 24 |     public static final String ARRAY_SEPARATOR_CONFIG = "batch_array_separator";
 25 |     public static String ARRAYS_SEPARATOR = ",";
 26 | 
 27 |     private final Map<String, String> configData;
 28 | 
 29 |     public Config(Map<String, String> configData) {
 30 |         this.configData = configData;
 31 |         if (this.configData.containsKey(ARRAY_SEPARATOR_CONFIG)){
 32 |             Config.ARRAYS_SEPARATOR = configData.get(ARRAY_SEPARATOR_CONFIG);
 33 |          }
 34 |     }
 35 | 
 36 |     public static Config convertArgumentsToConfig(String[] args) {
 37 |         final Stack<String> argumentList = toStack(args);
 38 | 
 39 |         String configFileName = findConfigFileName(argumentList);
 40 | 
 41 |         final Map<String, String> config = config(configFileName);
 42 | 
 43 |         convertParamsToConfig(argumentList, config);
 44 | 
 45 |         validateConfig(config);
 46 |         return new Config(config);
 47 |     }
 48 | 
 49 |     private static Stack<String> toStack(String[] args) {
 50 |         final Stack<String> argumentList = new Stack<String>();
 51 |         for (int i = args.length - 1; i >= 0; i--) {
 52 |             argumentList.push(args[i]);
 53 |         }
 54 |         return argumentList;
 55 |     }
 56 | 
 57 |     private static String findConfigFileName(Stack<String> argumentList) {
 58 |         String firstParam = argumentList.isEmpty() ? "" : argumentList.peek();
 59 |         String configFileName = CONFIG_FILE_NAME;
 60 |         if (firstParam.endsWith(".properties")) {
 61 |             configFileName = firstParam;
 62 |             popOrNull(argumentList);
 63 |         }
 64 |         return configFileName;
 65 |     }
 66 | 
 67 |     // todo more checks ?
 68 |     private static void validateConfig(Map<String, String> config) {
 69 |         if (!config.containsKey(BATCH_IMPORT_GRAPH_DB)) throw new IllegalArgumentException("Missing parameter for graphdb directory");
 70 |     }
 71 | 
 72 |     private static Collection<IndexInfo> convertParamsToConfig(Stack<String> args, Map<String, String> config) {
 73 |         addConfigParamIfArgument(args, config, BATCH_IMPORT_GRAPH_DB);
 74 |         addConfigParamIfArgument(args, config, BATCH_IMPORT_NODES_FILES);
 75 |         addConfigParamIfArgument(args, config, BATCH_IMPORT_RELS_FILES);
 76 |         Collection<IndexInfo> indexes = createIndexInfos(args);
 77 |         for (IndexInfo index : indexes) {
 78 |             index.addToConfig(config);
 79 |         }
 80 |         return indexes;
 81 |     }
 82 | 
 83 |     private static void addConfigParamIfArgument(Stack<String> args, Map<String, String> config, String param) {
 84 |         final String arg = popOrNull(args);
 85 |         if (arg==null || arg.trim().isEmpty()) return;
 86 |         if (!config.containsKey(param)) config.put(param, arg);
 87 |     }
 88 | 
 89 |     private static String popOrNull(Stack<String> args) {
 90 |         if (args.isEmpty()) return null;
 91 |         return args.pop();
 92 |     }
 93 | 
 94 |     private static Collection<IndexInfo> createIndexInfos(Stack<String> args) {
 95 |         Collection<IndexInfo> indexes=new ArrayList<IndexInfo>();
 96 |         while (!args.isEmpty()) {
 97 |             indexes.add(new IndexInfo(popOrNull(args), popOrNull(args), popOrNull(args), popOrNull(args)));
 98 |         }
 99 |         return indexes;
100 |     }
101 | 
102 |     public static Map<String, String> config(String fileName) {
103 |         Map<String, String> config = new HashMap<String, String>();
104 |         try {
105 |             if (new File(fileName).exists()) {
106 |                 System.out.println("Using Existing Configuration File");
107 |             } else {
108 |                 System.out.println("Writing Configuration File to batch.properties");
109 |                 FileWriter fw = new FileWriter(fileName);
110 |                 fw.append("use_memory_mapped_buffers=true\n"
111 |                         + "neostore.nodestore.db.mapped_memory=100M\n"
112 |                         + "neostore.relationshipstore.db.mapped_memory=500M\n"
113 |                         + "neostore.propertystore.db.mapped_memory=1G\n"
114 |                         + "neostore.propertystore.db.strings.mapped_memory=200M\n"
115 |                         + "neostore.propertystore.db.arrays.mapped_memory=0M\n"
116 |                         + "neostore.propertystore.db.index.keys.mapped_memory=15M\n"
117 |                         + "neostore.propertystore.db.index.mapped_memory=15M");
118 |                 fw.close();
119 |             }
120 | 
121 |             config = MapUtil.load(new File(fileName));
122 | 
123 |         } catch (Exception e) {
124 |             System.out.println(e.getMessage());
125 |         }
126 |         return config;
127 |     }
128 | 
129 |     public static Collection<IndexInfo> extractIndexInfos(Map<String, String> config) {
130 |         Collection<IndexInfo>  result=new ArrayList<IndexInfo>();
131 |         for (Map.Entry<String, String> entry : config.entrySet()) {
132 |             final IndexInfo info = IndexInfo.fromConfigEntry(entry);
133 |             if (info!=null) result.add(info);
134 |         }
135 |         return result;
136 |     }
137 | 
138 |     public static boolean configOptionEnabled(Config config, String option) {
139 |         return "true".equalsIgnoreCase(config.get(option));
140 |     }
141 |     public static boolean configOptionDisabled(Config config, String option) {
142 |         return "false".equalsIgnoreCase(config.get(option));
143 |     }
144 | 
145 |     public static Collection<File> toFiles(String commaSeparatedFileList) {
146 |         Collection<File> files=new ArrayList<File>();
147 |         if (commaSeparatedFileList==null || commaSeparatedFileList.isEmpty()) return files;
148 |         for (String part : commaSeparatedFileList.split(",")) {
149 |             final File file = new File(part);
150 |             if (file.exists() && file.canRead() && file.isFile()) files.add(file);
151 |             else System.err.println("File "+file+" does not exist, can not be read or is not a file.");
152 |         }
153 |         return files;
154 |     }
155 | 
156 |     public static String NODE_INDEX(String indexName) {
157 |         return "batch_import.node_index." + indexName;
158 |     }
159 |     public static String RELATIONSHIP_INDEX(String indexName) {
160 |         return "batch_import.relationship_index." + indexName;
161 |     }
162 | 
163 |     public boolean isCachedIndexDisabled() {
164 |         return configOptionEnabled(this, BATCH_IMPORT_MAPDB_CACHE_DISABLE);
165 |     }
166 | 
167 |     public Collection<IndexInfo> getIndexInfos() {
168 |         return extractIndexInfos(configData);
169 |     }
170 | 
171 |     public Collection<File> getRelsFiles() {
172 |         return toFiles(get(BATCH_IMPORT_RELS_FILES));
173 |     }
174 | 
175 |     public Collection<File> getNodesFiles() {
176 |         return toFiles(get(BATCH_IMPORT_NODES_FILES));
177 |     }
178 | 
179 |     public char getDelimChar(Importer importer) {
180 |         final String delim = get(BATCH_IMPORT_CSV_DELIM);
181 |         if (delim==null || delim.isEmpty()) return '\t';
182 |         return delim.trim().charAt(0);
183 |     }
184 | 
185 |     public boolean quotesEnabled() {
186 |         return !configOptionDisabled(this, BATCH_IMPORT_CSV_QUOTES);
187 |     }
188 | 
189 |     public String getGraphDbDirectory() {
190 |         return get(BATCH_IMPORT_GRAPH_DB);
191 |     }
192 | 
193 |     String get(String option) {
194 |         return configData.get(option);
195 |     }
196 | 
197 |     public boolean keepDatabase() {
198 |         return configOptionEnabled(this, BATCH_IMPORT_KEEP_DB);
199 |     }
200 | 
201 |     public Map<String, String> getConfigData() {
202 |         return configData;
203 |     }
204 | }
205 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/FileIterator.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.utils;
  2 | 
  3 | import org.mapdb.Serializer;
  4 | 
  5 | import java.io.*;
  6 | import java.util.Comparator;
  7 | import java.util.Iterator;
  8 | 
  9 | class FileIterator implements Iterator<FileIterator.Line> {
 10 |     public static final char DELIM = '\t';
 11 |     private final BufferedReader reader;
 12 |     private final String file;
 13 |     Line line;
 14 |     long lineNo;
 15 | 
 16 |     public FileIterator(String file) throws FileNotFoundException {
 17 |         reader = new BufferedReader(new FileReader(file), RelationshipSorter.BUFFER);
 18 |         this.file = file;
 19 |         line = readLine();
 20 |     }
 21 | 
 22 |     public void close() throws IOException {
 23 |         reader.close();
 24 |     }
 25 | 
 26 |     private Line readLine() {
 27 |         try {
 28 |             String line = reader.readLine();
 29 |             if (line==null) return null;
 30 |             return Line.from(lineNo++, line);
 31 |         } catch (IOException e) {
 32 |             throw new RuntimeException("Error reading file "+ file,e);
 33 |         }
 34 |     }
 35 | 
 36 | 
 37 |     public boolean hasNext() {
 38 |         return line != null;
 39 |     }
 40 | 
 41 |     public Line next() {
 42 |         Line result=line;
 43 |         line = readLine();
 44 |         return result;
 45 |     }
 46 | 
 47 |     public void remove() {
 48 |     }
 49 | 
 50 |     public static class LineSerializer implements Serializer<Line> {
 51 |         @Override
 52 |         public void serialize(DataOutput dataOutput, Line line) throws IOException {
 53 |             dataOutput.writeLong(line.lineNo);
 54 | //            dataOutput.writeLong(line.min);
 55 | //            dataOutput.writeLong(line.max);
 56 |             dataOutput.writeUTF(line.line);
 57 |         }
 58 | 
 59 |         @Override
 60 |         public Line deserialize(DataInput dataInput, int i) throws IOException {
 61 | //            return Line.from(dataInput.readLong(),dataInput.readLong(),dataInput.readLong(),dataInput.readUTF());
 62 |             return Line.from(dataInput.readLong(),dataInput.readUTF());
 63 |         }
 64 |     }
 65 |     public static class Line {
 66 |         String line;
 67 |         long lineNo, min, max;
 68 |         public static Line from(long lineNo, long min, long max, String line) {
 69 |             Line result = new Line();
 70 |             result.lineNo = lineNo;
 71 |             result.min = min;
 72 |             result.max = max;
 73 |             result.line = line;
 74 |             return result;
 75 |         }
 76 |         public static Line from(long lineNo, String line) {
 77 |             if (lineNo > 0) {
 78 |                 int idx = line.indexOf(DELIM);
 79 |                 long start = Long.parseLong(line.substring(0, idx++));
 80 |                 long end = Long.parseLong(line.substring(idx, line.indexOf(DELIM, idx)));
 81 |                 return from(lineNo,Math.min(start,end), Math.max(start, end),line);
 82 |             } else {
 83 |                 return from(lineNo,-1, -1,line);
 84 |             }
 85 |         }
 86 |     }
 87 | 
 88 |     public static class RelStartEndComparator implements Comparator<Line> {
 89 | 
 90 |         public int compare(Line line1, Line line2) {
 91 |             int result = Long.compare(line1.min, line2.min);
 92 |             if (result == 0) {
 93 |                 result = Long.compare(line1.max, line2.max);
 94 |                 if (result == 0) return Long.compare(line1.lineNo, line2.lineNo);
 95 |             }
 96 |             return result;
 97 |         }
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/Params.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import java.io.File;
 4 | 
 5 | /**
 6 |  * @author mh
 7 |  * @since 02.11.12
 8 |  */
 9 | public class Params {
10 | 
11 |     private final String names;
12 |     private final String[] args;
13 |     private String[] params;
14 | 
15 |     public Params(String names, String... args) {
16 |         this.names = names;
17 |         this.params = names.split(" +");
18 |         this.args = args;
19 |     }
20 | 
21 |     public boolean invalid() {
22 |         return args.length != params.length;
23 |     }
24 | 
25 |     public int length() {
26 |         return params.length;
27 |     }
28 | 
29 |     @Override
30 |     public String toString() {
31 |         return names;
32 |     }
33 | 
34 |     public File file(String name) {
35 |         return new File(string(name));
36 |     }
37 | 
38 |     public String string(String name) {
39 |         for (int i = 0; i < params.length; i++) {
40 |             if (params[i].equalsIgnoreCase(name)) {
41 |                 return args[i];
42 |             }
43 |         }
44 |         throw new IllegalArgumentException("Invalid name" + name + " only know " + names);
45 |     }
46 | 
47 |     public long longValue(String name) {
48 |         return Long.parseLong(string(name));
49 |     }
50 | 
51 |     public int intValue(String name) {
52 |         return Integer.parseInt(string(name));
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/RelationshipSorter.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.mapdb.Pump;
 4 | import org.neo4j.helpers.collection.IteratorWrapper;
 5 | 
 6 | import java.io.BufferedWriter;
 7 | import java.io.FileWriter;
 8 | import java.io.IOException;
 9 | import java.util.Iterator;
10 | 
11 | /**
12 |  * @author Michael Hunger @since 04.11.13
13 |  */
14 | public class RelationshipSorter {
15 | 
16 |     public static final int BUFFER = 1024 * 1024;
17 | 
18 |     public static void main(String... args) throws IOException {
19 |         System.err.println("Usage mvn exec:java -Dexec.mainClass=org.neo4j.batchimport.utils.RelationshipSorter -Dexec.args='rels.csv rels_sorted.csv'");
20 |         final String file = args[0];
21 |         String file2 = args[1];
22 |         long time = System.currentTimeMillis();
23 |         FileIterator reader0 = new FileIterator(file);
24 |         Iterator<FileIterator.Line> reader = wrapStatistics(reader0);
25 |         FileIterator.Line header = reader.next();
26 |         Iterator<FileIterator.Line> result = Pump.sort(reader, 1_000_000, new FileIterator.RelStartEndComparator(), new FileIterator.LineSerializer());
27 |         BufferedWriter writer = new BufferedWriter(new FileWriter(file2), BUFFER);
28 |         writer.write(header.line);
29 |         writer.write("\n");
30 |         long count = 0;
31 |         while (result.hasNext()) {
32 |             writer.write(result.next().line);
33 |             writer.write('\n');
34 |             count++;
35 |         }
36 |         writer.close();
37 |         reader0.close();
38 |         System.out.println("sorting " + count + " lines took "  + (System.currentTimeMillis()-time)/1000+" seconds");
39 |     }
40 | 
41 |     private static Iterator<FileIterator.Line> wrapStatistics(final FileIterator reader0) {
42 |         return new IteratorWrapper<FileIterator.Line,FileIterator.Line>(reader0) {
43 |                 long time = System.currentTimeMillis();
44 |                 @Override
45 |                 protected FileIterator.Line underlyingObjectToObject(FileIterator.Line line) {
46 |                     if (line.lineNo % 10000 == 0) {
47 |                         System.out.print(".");
48 |                         if (line.lineNo % 1000000 == 0) {
49 |                             long now = System.currentTimeMillis();
50 |                             System.out.println(" "+line.lineNo+ " " +(now - time)+" ms");
51 |                             time = now;
52 |                         }
53 |                     }
54 | 
55 |                     return line;
56 |                 }
57 |             };
58 |     }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/org/neo4j/batchimport/utils/RelationshipSorter2.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.neo4j.helpers.collection.ArrayIterator;
 4 | import org.neo4j.helpers.collection.IteratorWrapper;
 5 | 
 6 | import java.io.BufferedWriter;
 7 | import java.io.FileWriter;
 8 | import java.io.IOException;
 9 | import java.util.Arrays;
10 | import java.util.Iterator;
11 | 
12 | /**
13 |  * @author Michael Hunger @since 04.11.13
14 |  */
15 | public class RelationshipSorter2 {
16 | 
17 |     public static final char DELIM = '\t';
18 |     public static final int BUFFER = 1024 * 1024;
19 |     public static final FileIterator.RelStartEndComparator COMPARATOR = new FileIterator.RelStartEndComparator();
20 |     public static final int ARRAY_BUFFER = 10_000_000;
21 | 
22 |     public static void main(String... args) throws IOException {
23 |         System.err.println("Usage mvn exec:java -Dexec.mainClass=org.neo4j.batchimport.utils.RelationshipSorter2 -Dexec.args='rels.csv rels_sorted.csv'");
24 |         final String file = args[0];
25 |         String file2 = args[1];
26 |         long time = System.currentTimeMillis();
27 |         FileIterator reader0 = new FileIterator(file);
28 |         Iterator<FileIterator.Line> reader = wrapStatistics(reader0);
29 |         FileIterator.Line header = reader.next();
30 |         FileIterator.Line[] lines = new FileIterator.Line[ARRAY_BUFFER];
31 |         int read = readArray(reader, lines);
32 |         Arrays.sort(lines, COMPARATOR);
33 |         long count = writeFile(file2, lines, read);
34 | //        Iterator<FileIterator.Line> result = new ArrayIterator<>(lines);
35 |         // sort array
36 | //        long count = writeFile(file2, header, result);
37 |         reader0.close();
38 |         System.out.println("sorting " + count + " lines took "  + (System.currentTimeMillis()-time)/1000+" seconds");
39 |     }
40 | 
41 |     private static long writeFile(String file, FileIterator.Line header, Iterator<FileIterator.Line> lines) throws IOException {
42 |         BufferedWriter writer = new BufferedWriter(new FileWriter(file), BUFFER);
43 |         if (header!=null) {
44 |             writer.write(header.line); writer.write("\n");
45 |         }
46 |         long count = 0;
47 |         while (lines.hasNext()) {
48 |             writer.write(lines.next().line); writer.write('\n');
49 |             count++;
50 |         }
51 |         writer.close();
52 |         return count;
53 |     }
54 | 
55 |     private static long writeFile(String file, FileIterator.Line[] lines, int count) throws IOException {
56 |         BufferedWriter writer = new BufferedWriter(new FileWriter(file), BUFFER);
57 |         for (int i = 0; i < count; i++) {
58 |             writer.write(lines[i].line); writer.write('\n');
59 |         }
60 |         writer.close();
61 |         return count;
62 |     }
63 | 
64 |     private static int readArray(Iterator<FileIterator.Line> reader, FileIterator.Line[] lines) {
65 |         int i=0;
66 |         int length = lines.length;
67 |         while (i < length && reader.hasNext()) {
68 |             lines[i++] = reader.next();
69 |         }
70 |         return i;
71 |     }
72 | 
73 |     private static Iterator<FileIterator.Line> wrapStatistics(final FileIterator reader0) {
74 |         return new IteratorWrapper<FileIterator.Line,FileIterator.Line>(reader0) {
75 |                 long time = System.currentTimeMillis();
76 |                 @Override
77 |                 protected FileIterator.Line underlyingObjectToObject(FileIterator.Line line) {
78 |                     if (line.lineNo % 10000 == 0) {
79 |                         System.out.print(".");
80 |                         if (line.lineNo % 1000000 == 0) {
81 |                             long now = System.currentTimeMillis();
82 |                             System.out.println(" "+line.lineNo+ " " +(now - time)+" ms");
83 |                             time = now;
84 |                         }
85 |                     }
86 | 
87 |                     return line;
88 |                 }
89 |             };
90 |     }
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | #
 3 | # Logging Configuration
 4 | #
 5 | # ------------------------------------------------------------------------
 6 | #
 7 | log4j.rootLogger=INFO, Console
 8 | 
 9 | ########################################################################
10 | #
11 | # Logfile definitions
12 | #
13 | ########################################################################
14 | #Console Log
15 | log4j.appender.Console=org.apache.log4j.ConsoleAppender
16 | log4j.appender.Console.Threshold=DEBUG
17 | log4j.appender.Console.layout=org.apache.log4j.PatternLayout
18 | log4j.appender.Console.layout.ConversionPattern=%-5p %C{1} - %m\n
19 | log4j.appender.Console.Target=System.err
20 | 
21 | #LOGTXT Log
22 | log4j.appender.LOGTXT=org.apache.log4j.FileAppender
23 | log4j.appender.LOGTXT.File=log.txt
24 | log4j.appender.LOGTXT.Append=false
25 | log4j.appender.LOGTXT.Threshold=DEBUG
26 | log4j.appender.LOGTXT.layout=org.apache.log4j.PatternLayout
27 | log4j.appender.LOGTXT.layout.ConversionPattern=%-5p %C{1} - %m\n
28 | 
29 | 


--------------------------------------------------------------------------------
/src/test/java/DataTest.java:
--------------------------------------------------------------------------------
  1 | import org.junit.Assert;
  2 | import org.junit.Test;
  3 | import org.neo4j.batchimport.importer.RowData;
  4 | 
  5 | import java.util.Arrays;
  6 | import java.util.Map;
  7 | 
  8 | import static org.junit.Assert.assertArrayEquals;
  9 | import static org.junit.Assert.assertEquals;
 10 | 
 11 | public class DataTest {
 12 |     @Test
 13 |     public void testConvertType() throws Exception {
 14 |         RowData data = new RowData("a:int\tb:float\tc:float", "\t", 0);
 15 |         data.processLine("100\t100.0\t1E+10");
 16 |         Map<String,Object> row = data.getProperties();
 17 |         assertEquals(100, row.get("a"));
 18 |         assertEquals(true,row.get("b") instanceof Float);
 19 |         assertEquals(100.0F, row.get("b"));
 20 |         assertEquals(true,row.get("b") instanceof Float);
 21 |         assertEquals(100.0F, row.get("b"));
 22 |         assertEquals(true,row.get("c") instanceof Float);
 23 |         assertEquals(1E+10F, row.get("c"));
 24 |     }
 25 | 
 26 |     @Test
 27 |     public void testRelationship() throws Exception {
 28 |         RowData data = new RowData("start\tend\ttype\tproperty", "\t", 3);
 29 |         data.processLine("1\t2\tTYPE\tPROPERTY");
 30 |         Map<String,Object> row = data.getProperties();
 31 |         assertEquals("1", data.getValue(0));
 32 |         assertEquals("2", data.getValue(1));
 33 |         assertEquals("TYPE", data.getTypeLabels()[0]);
 34 |         assertEquals("PROPERTY", row.get("property"));
 35 |     }
 36 | 
 37 |     @Test
 38 |     public void testRelationshipWithNoProperty() throws Exception {
 39 |         RowData data = new RowData("start\tend\ttype", "\t", 3);
 40 |         data.processLine("1\t2\tTYPE");
 41 |         assertEquals("1", data.getValue(0));
 42 |         assertEquals("2", data.getValue(1));
 43 |         assertEquals("TYPE", data.getTypeLabels()[0]);
 44 |     }
 45 | 
 46 |     @Test
 47 |     public void testNodeLabels() throws Exception {
 48 |         RowData data = new RowData("labels", "\t", 3);
 49 |         data.processLine("TYPE1,TYPE2");
 50 |         assertEquals("TYPE1", data.getTypeLabels()[0]);
 51 |         assertEquals("TYPE2", data.getTypeLabels()[1]);
 52 |     }
 53 |     @Test
 54 |     public void testNodeLabelsWithLabelType() throws Exception {
 55 |         RowData data = new RowData("foo:label", "\t", 3);
 56 |         data.processLine("TYPE1,TYPE2");
 57 |         assertEquals("TYPE1", data.getTypeLabels()[0]);
 58 |         assertEquals("TYPE2", data.getTypeLabels()[1]);
 59 |     }
 60 |     @Test
 61 |     public void testArrayType() throws Exception {
 62 |         RowData data = new RowData("a:int\tb:float\tc:string_array", "\t", 0);
 63 |         data.processLine("100\t100.0\tbagels,coffee,tea");
 64 |         Map<String,Object> row = data.getProperties();
 65 |         assertEquals(100, row.get("a"));
 66 |         assertEquals(true,row.get("b") instanceof Float);
 67 |         assertEquals(100.0F, row.get("b"));
 68 |         assertEquals(true,row.get("b") instanceof Float);
 69 |         assertEquals(100.0F, row.get("b"));
 70 |         assertEquals(true,row.get("c") instanceof String[]);
 71 |         assertArrayEquals(new String[]{"bagels", "coffee", "tea"}, (String[]) row.get("c"));
 72 |     }
 73 | 
 74 |     @Test
 75 |     public void testBooleanArrayType() throws Exception {
 76 |         RowData data = new RowData("a:int\tb:float\tc:boolean_array", "\t", 0);
 77 |         data.processLine("100\t100.0\ttrue,false,true");
 78 |         Map<String,Object> row = data.getProperties();
 79 |         assertEquals(100, row.get("a"));
 80 |         assertEquals(true,row.get("b") instanceof Float);
 81 |         assertEquals(100.0F, row.get("b"));
 82 |         assertEquals(true,row.get("b") instanceof Float);
 83 |         assertEquals(100.0F, row.get("b"));
 84 |         assertEquals(true,row.get("c") instanceof boolean[]);
 85 |         Assert.assertTrue(Arrays.equals(new boolean[]{true, false, true}, (boolean[]) row.get("c")));
 86 |     }
 87 |     @Test
 88 |     public void testIntArrayType() throws Exception {
 89 |         RowData data = new RowData("a:int\tb:float\tc:int_array", "\t", 0);
 90 |         data.processLine("100\t100.0\t1,2,3");
 91 |         Map<String,Object> row = data.getProperties();
 92 |         assertEquals(100, row.get("a"));
 93 |         assertEquals(true,row.get("b") instanceof Float);
 94 |         assertEquals(100.0F, row.get("b"));
 95 |         assertEquals(true,row.get("b") instanceof Float);
 96 |         assertEquals(100.0F, row.get("b"));
 97 |         assertEquals(true,row.get("c") instanceof int[]);
 98 |         assertArrayEquals(new int[] {1,2,3}, (int[])row.get("c"));
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/ImporterIntegrationTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.junit.Test;
 4 | import org.neo4j.consistency.ConsistencyCheckTool;
 5 | import org.neo4j.graphdb.GraphDatabaseService;
 6 | import org.neo4j.graphdb.Node;
 7 | import org.neo4j.graphdb.Transaction;
 8 | import org.neo4j.graphdb.factory.GraphDatabaseFactory;
 9 | import org.neo4j.io.fs.FileUtils;
10 | 
11 | import java.io.File;
12 | import java.io.FileWriter;
13 | 
14 | import static org.junit.Assert.assertTrue;
15 | 
16 | /**
17 |  * @author Michael Hunger @since 05.11.13
18 |  */
19 | public class ImporterIntegrationTest {
20 | 
21 |     public static final String DB_DIRECTORY = "target/index-reuse.db";
22 | 
23 |     @Test
24 |     public void testMain() throws Exception {
25 |         FileUtils.deleteRecursively(new File(DB_DIRECTORY));
26 |         TestDataGenerator.main("1000","10","A,B,C","sorted");
27 |         Importer.main(DB_DIRECTORY,"nodes.csv","rels.csv");
28 |         ConsistencyCheckTool.main(new String[]{DB_DIRECTORY});
29 |     }
30 | 
31 |     @Test
32 |     public void testImportHashes() throws Exception {
33 |         FileUtils.deleteRecursively(new File(DB_DIRECTORY));
34 |         FileWriter writer = new FileWriter("target/hashes.csv");
35 |         writer.write("a\n000000F8BE951D6DE6480F4AFDFB670C553E47C0\r\n0000021449360C1A398ED9A18800B2B13AA098A4\r\n00000DABDE4C555FC82F7D534835247B94873C2C\r\n00001BE4128DB41729365A41D3AC1D019E5ED8A6\r\n");
36 |         writer.close();
37 |         Importer.main(DB_DIRECTORY,"target/hashes.csv");
38 |         ConsistencyCheckTool.main(new String[]{DB_DIRECTORY});
39 |         GraphDatabaseService db = new GraphDatabaseFactory().newEmbeddedDatabase(new File(DB_DIRECTORY));
40 |         try (Transaction tx = db.beginTx()) {
41 |             for (Node node : db.getAllNodes()) {
42 |                 Object value = node.getProperty("a", null);
43 |                 System.out.println("value = " + value);
44 |                 assertTrue(value != null);
45 |             }
46 |             tx.success();
47 |         }
48 |         db.shutdown();
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/ImporterTest.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport;
  2 | 
  3 | import org.junit.Before;
  4 | import org.junit.Test;
  5 | import org.mockito.Matchers;
  6 | import org.neo4j.batchimport.index.LongIterableIndexHits;
  7 | import org.neo4j.batchimport.utils.Config;
  8 | import org.neo4j.graphdb.DynamicLabel;
  9 | import org.neo4j.graphdb.Label;
 10 | import org.neo4j.graphdb.RelationshipType;
 11 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
 12 | import org.neo4j.unsafe.batchinsert.BatchInserter;
 13 | import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
 14 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
 15 | 
 16 | import java.io.File;
 17 | import java.io.StringReader;
 18 | import java.util.Arrays;
 19 | import java.util.Map;
 20 | 
 21 | import static java.util.Arrays.*;
 22 | import org.junit.Assert;
 23 | import org.mockito.ArgumentCaptor;
 24 | import static org.mockito.Matchers.*;
 25 | import static org.mockito.Mockito.*;
 26 | import static org.neo4j.helpers.collection.MapUtil.map;
 27 | 
 28 | public class ImporterTest {
 29 | 
 30 |     private BatchInserter inserter;
 31 |     private LuceneBatchInserterIndexProvider provider;
 32 |     private Importer importer;
 33 |     private BatchInserterIndex index;
 34 | 
 35 |     @Before
 36 |     public void setUp() throws Exception {
 37 |         inserter = mock(BatchInserter.class);
 38 |         provider = mock(LuceneBatchInserterIndexProvider.class);
 39 |         index = mock(BatchInserterIndex.class);
 40 |         when(provider.nodeIndex(eq("index-a"),anyMap())).thenReturn(index);
 41 | 
 42 |         final Map<String, String> configData = Config.config("batch.properties");
 43 |         new IndexInfo("node_index", "index-a", "exact", null).addToConfig(configData);
 44 |         importer = new Importer(File.createTempFile("test", "db"), new Config(configData)) {
 45 |             @Override
 46 |             protected BatchInserter createBatchInserter(File graphDb, Config config) {
 47 |                 return inserter;
 48 |             }
 49 | 
 50 |             @Override
 51 |             protected BatchInserterIndexProvider createIndexProvider(boolean luceneOnlyIndex) {
 52 |                 return provider;
 53 |             }
 54 |         };
 55 |     }
 56 | 
 57 |     @Test
 58 |     public void testImportSimpleNode() throws Exception {
 59 |         importer.importNodes(new StringReader("a\nfoo"));
 60 |         importer.finish();
 61 |         verify(inserter, times(1)).createNode(eq(map("a", "foo")));
 62 |     }
 63 | 
 64 |     @Test
 65 |     public void testImportHashes() throws Exception {
 66 |         importer.importNodes(new StringReader("a\n000000F8BE951D6DE6480F4AFDFB670C553E47C0\n0000021449360C1A398ED9A18800B2B13AA098A4\n00000DABDE4C555FC82F7D534835247B94873C2C\n00001BE4128DB41729365A41D3AC1D019E5ED8A6\n"));
 67 |         importer.finish();
 68 |         verify(inserter, times(1)).createNode(eq(map("a", "000000F8BE951D6DE6480F4AFDFB670C553E47C0")));
 69 |         verify(inserter, times(1)).createNode(eq(map("a", "0000021449360C1A398ED9A18800B2B13AA098A4")));
 70 |         verify(inserter, times(1)).createNode(eq(map("a", "00000DABDE4C555FC82F7D534835247B94873C2C")));
 71 |         verify(inserter, times(1)).createNode(eq(map("a", "00001BE4128DB41729365A41D3AC1D019E5ED8A6")));
 72 |     }
 73 | 
 74 |     @Test
 75 |     public void testImportSimpleNodeWithId() throws Exception {
 76 |         importer.importNodes(new StringReader("i:id\ta\n123\tfoo"));
 77 |         importer.finish();
 78 |         verify(inserter, times(1)).createNode(eq(123L),eq(map("a", "foo")));
 79 |     }
 80 | 
 81 |     @Test
 82 |     public void testImportNodeWithNoLabel() throws Exception {
 83 |         importer.importNodes(new StringReader("a\t:label\nfoo\t"));
 84 |         importer.finish();
 85 |         verify(inserter, times(1)).createNode(eq(map("a", "foo")));
 86 |     }
 87 |     @Test
 88 |     public void testImportNodeWithLabel() throws Exception {
 89 |         importer.importNodes(new StringReader("a\t:label\nfoo\tbar"));
 90 |         importer.finish();
 91 |         verify(inserter, times(1)).createNode(eq(map("a", "foo")),eq(DynamicLabel.label("bar")));
 92 |     }
 93 | 
 94 |     @Test
 95 |     public void testImportNodeWithTwoLabels() throws Exception {
 96 |         importer.importNodes(new StringReader("a\t:label\nfoo\tbar,bor"));
 97 |         importer.finish();
 98 |         verify(inserter, times(1)).createNode(eq(map("a", "foo")),eq(DynamicLabel.label("bar")),eq(DynamicLabel.label("bor")));
 99 |     }
100 | 
101 |     @Test
102 |     public void testImportSimpleNodeWithNewlineAtEnd() throws Exception {
103 |         importer.importNodes(new StringReader("a\nfoo\n"));
104 |         importer.finish();
105 |         verify(inserter, times(1)).createNode(eq(map("a", "foo")));
106 |     }
107 |     @Test
108 |     public void testImportSimpleNodeWithUmlauts() throws Exception {
109 |         importer.importNodes(new StringReader("ö\näáß"));
110 |         importer.finish();
111 |         verify(inserter, times(1)).createNode(eq(map("ö", "äáß")));
112 |     }
113 |     @Test
114 |     public void testImportNodeWithMultipleProps() throws Exception {
115 |         importer.importNodes(new StringReader("a\tb\nfoo\tbar"));
116 |         importer.finish();
117 |         verify(inserter, times(1)).createNode(eq(map("a", "foo","b","bar")));
118 |     }
119 |     @Test
120 |     public void testImportNodeWithIndex() throws Exception {
121 |         importer.importNodes(new StringReader("a:string:index-a\tb\nfoo\tbar"));
122 |         importer.finish();
123 |         verify(inserter, times(1)).createNode(eq(map("a", "foo", "b", "bar")));
124 |         verify(index, times(1)).add(eq(0L), eq(map("a", "foo")));
125 |     }
126 | 
127 |     @Test
128 |     public void testImportRelWithIndexLookup() throws Exception {
129 |         when(index.get("a","foo")).thenReturn(new LongIterableIndexHits(asList(42L)));
130 |         importer.importRelationships(new StringReader("a:string:index-a\tb\tTYPE\nfoo\t123\tFOOBAR"));
131 |         importer.finish();
132 |         verify(index, times(1)).get(eq("a"), eq("foo"));
133 |         verify(inserter, times(1)).createRelationship(eq(42L), eq(123L), Matchers.any(RelationshipType.class),eq(map()));
134 |     }
135 | 
136 |     @Test
137 |     public void testImportRelationshipsWithNonIndexedNodes() throws Exception {
138 |         when(index.get("node","a")).thenReturn(new LongIterableIndexHits(asList(1L)));
139 |         when(index.get("node","b")).thenReturn(new LongIterableIndexHits(Arrays.<Long>asList()));
140 |         importer.importRelationships(new StringReader("node:string:index-a\tnode:string:index-a\ttype\na\ta\tTYPE\na\tb\tTYPE\nb\ta\tTYPE"));
141 |         importer.finish();
142 |         verify(inserter, times(1)).createRelationship(eq(1L), eq(1L), argThat(new RelationshipMatcher("TYPE")),eq(map()));
143 |         verify(inserter, never()).createRelationship(eq(1L), eq(-1L), argThat(new RelationshipMatcher("TYPE")),eq(map()));
144 |         verify(inserter, never()).createRelationship(eq(-1L), eq(1L), argThat(new RelationshipMatcher("TYPE")),eq(map()));
145 |     }
146 | 
147 |     @Test
148 |     public void testImportNodeWithIndividualTypes() throws Exception {
149 |         importer.importNodes(new StringReader("a:int\tb:float\tc:float\n10\t10.0\t1E+10"));
150 |         importer.finish();
151 |         verify(inserter, times(1)).createNode(eq(map("a", 10,"b",10.0F,"c",1E+10F)));
152 |     }
153 |     
154 |     @Test
155 |     public void testImportNodeWithArrayTypes() throws Exception {
156 |         importer.importNodes(new StringReader("a:STRING_ARRAY\tb:float\tc:float\n10,11,12\t10.0\t1E+10"));
157 |         importer.finish();
158 |         String[] expectedArray = {"10","11","12"};
159 |         ArgumentCaptor<Map> argument = ArgumentCaptor.forClass(Map.class);
160 |         verify(inserter, times(1)).createNode(argument.capture());
161 |         Map<String, Object> inputMap = argument.getValue();
162 |         Assert.assertArrayEquals((String[])inputMap.get("a"),expectedArray);   
163 |     }
164 |     
165 |     @Test
166 |     public void testImportNodeWithArrayTypesCustomSeparator() throws Exception {
167 |         Config.ARRAYS_SEPARATOR = "%";
168 |         importer.importNodes(new StringReader("a:STRING_ARRAY\tb:float\tc:float\n10%11%12\t10.0\t1E+10"));
169 |         importer.finish();
170 |         String[] expectedArray = {"10","11","12"};
171 |         ArgumentCaptor<Map> argument = ArgumentCaptor.forClass(Map.class);
172 |         verify(inserter, times(1)).createNode(argument.capture());
173 |         Map<String, Object> inputMap = argument.getValue();
174 |         Assert.assertArrayEquals((String[])inputMap.get("a"),expectedArray);   
175 |     }
176 | 
177 |     @Test
178 |     public void testImportSimpleRelationship() throws Exception {
179 |         importer.importRelationships(new StringReader("start\tend\ttype\ta\n1\t2\tTYPE\tfoo"));
180 |         importer.finish();
181 |         verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo")));
182 |     }
183 | 
184 |     @Test
185 |     public void testImportSimpleRelationshipWithTypeType() throws Exception {
186 |         importer.importRelationships(new StringReader("start\tend\t:label\ta\n1\t2\tTYPE\tfoo"));
187 |         importer.finish();
188 |         verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo")));
189 |     }
190 | 
191 |     @Test
192 |     public void testImportSimpleRelationshipWithNewlineOnce() throws Exception {
193 |         importer.importRelationships(new StringReader("start\tend\ttype\ta\n1\t2\tTYPE\tfoo\n"));
194 |         importer.finish();
195 |         verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", "foo")));
196 |     }
197 | 
198 |     @Test
199 |     public void testImportRelationshipWithIndividualTypes() throws Exception {
200 |         importer.importRelationships(new StringReader("start\tend\ttype\ta:int\tb:float\tc:float\n1\t2\tTYPE\t10\t10.0\t1E+10"));
201 |         importer.finish();
202 |         verify(inserter, times(1)).createRelationship(eq(1L), eq(2L), argThat(new RelationshipMatcher("TYPE")), eq(map("a", 10, "b", 10.0F, "c", 1E+10F)));
203 |     }
204 | }
205 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/IndexInfoTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import java.io.File;
 6 | import java.io.FileOutputStream;
 7 | 
 8 | import static org.junit.Assert.assertEquals;
 9 | import static org.neo4j.helpers.collection.MapUtil.stringMap;
10 | 
11 | /**
12 |  * @author mh
13 |  * @since 11.06.13
14 |  */
15 | public class IndexInfoTest {
16 | 
17 |     private static final String INDEX_FILE = "target/node_index.csv";
18 | 
19 |     @Test
20 |     public void testCreateConfigEntry() throws Exception {
21 |         assertEquals(stringMap("batch_import.node_index.foo", "exact"), new IndexInfo("node_index", "foo", "exact", null).addToConfig(stringMap()));
22 |     }
23 | 
24 |     @Test
25 |     public void testReadFromConfigEntry() throws Exception {
26 |         final IndexInfo info = IndexInfo.fromConfigEntry(stringMap("batch_import.node_index.foo", "exact:file").entrySet().iterator().next());
27 |         assertEquals("node_index",info.elementType);
28 |         assertEquals("foo",info.indexName);
29 |         assertEquals("exact",info.indexType);
30 |         assertEquals("file",info.indexFileName);
31 |     }
32 | 
33 |     @Test
34 |     public void testCreateFromParams() throws Exception {
35 |         final IndexInfo info = new IndexInfo(new String[]{"relationship_index", "bar", "fulltext", "file"},0);
36 |         assertEquals("relationship_index",info.elementType);
37 |         assertEquals("bar",info.indexName);
38 |         assertEquals("fulltext",info.indexType);
39 |         assertEquals("file",info.indexFileName);
40 |     }
41 |     @Test
42 |     public void testCreateFromParamsWithOffset() throws Exception {
43 |         final IndexInfo info = new IndexInfo(new String[]{"a","b","relationship_index", "bar", "fulltext", "file"},2);
44 |         assertEquals("relationship_index",info.elementType);
45 |         assertEquals("bar",info.indexName);
46 |         assertEquals("fulltext",info.indexType);
47 |         assertEquals("file",info.indexFileName);
48 |     }
49 | 
50 |     @Test(expected = IllegalArgumentException.class)
51 |     public void testInvalidIndexType() throws Exception {
52 |         new IndexInfo("node_index","foo","bar",null);
53 |     }
54 | 
55 |     @Test(expected = IllegalArgumentException.class)
56 |     public void testInvalidElementType() throws Exception {
57 |         new IndexInfo("foo","exact","bar",null);
58 |     }
59 | 
60 |     @Test
61 |     public void testShouldImportFile() throws Exception {
62 |         assertEquals(false, new IndexInfo("node_index","name","exact",null).shouldImportFile());
63 |         assertEquals(false, new IndexInfo("node_index","name","exact", "target").shouldImportFile());
64 |         assertEquals(false, new IndexInfo("node_index","name","exact", INDEX_FILE).shouldImportFile());
65 |         final FileOutputStream fos = new FileOutputStream(INDEX_FILE);
66 |         fos.write(0);
67 |         fos.close();
68 |         assertEquals(true, new IndexInfo("node_index","name", "exact", INDEX_FILE).shouldImportFile());
69 |         new File(INDEX_FILE).delete();
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/RelationshipMatcher.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.hamcrest.BaseMatcher;
 4 | import org.hamcrest.Description;
 5 | import org.neo4j.graphdb.DynamicRelationshipType;
 6 | import org.neo4j.graphdb.RelationshipType;
 7 | 
 8 | class RelationshipMatcher extends BaseMatcher<RelationshipType> {
 9 |     public RelationshipType type;
10 |     private Object other;
11 | 
12 |     RelationshipMatcher(RelationshipType type) {
13 |         this.type = type;
14 |     }
15 |     RelationshipMatcher(String name) {
16 |         this(DynamicRelationshipType.withName(name));
17 |     }
18 | 
19 |     public boolean matches(Object other) {
20 |         this.other = other;
21 |         return ((RelationshipType)other).name().equals(type.name());
22 |     }
23 | 
24 |     @Override
25 |     public void describeTo(Description description) {
26 |         description.appendText("Expected "+ type +" but got "+other);
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/TestDataGenerator.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport;
 2 | 
 3 | import org.junit.Ignore;
 4 | 
 5 | import java.io.BufferedWriter;
 6 | import java.io.FileWriter;
 7 | import java.io.IOException;
 8 | import java.util.Random;
 9 | 
10 | /**
11 |  * @author mh
12 |  * @since 13.01.12
13 |  */
14 | @Ignore
15 | public class TestDataGenerator {
16 | 
17 |     private static final int NODES = 1  * 1000 * 1000;
18 |     private static final int RELS_PER_NODE = 50;
19 |     private static final String[] TYPES = {"ONE","TWO","THREE","FOUR","FIVE","SIX","SEVEN","EIGHT","NINE","TEN"};
20 | 
21 |     public static void main(String...args) throws IOException {
22 |         System.out.println("Usage: TestDataGenerator NODES RELS_PER_NODE TYPE1,TYPE2,TYPE3 sorted");
23 |         long relCount=0, time = System.currentTimeMillis();
24 | 
25 |         int nodes = args.length > 0 ? Integer.parseInt(args[0]) : NODES;
26 |         int relsPerNode = args.length > 1 ? Integer.parseInt(args[1]) : RELS_PER_NODE;
27 |         String[] types = args.length > 2 ? args[2].split(",") : TYPES;
28 |         final boolean sorted = args.length > 0 && args[args.length-1].equalsIgnoreCase("sorted");
29 |         System.out.println("Using: TestDataGenerator "+nodes+" "+relsPerNode+" "+ Utils.join(types, ",")+" "+(sorted?"sorted":""));
30 | 
31 |         BufferedWriter nodeFile = new BufferedWriter(new FileWriter("nodes.csv"));
32 |         nodeFile.write("Node\tRels\tProperty\tLabel:label\tCounter:int\n");
33 |         BufferedWriter relFile = new BufferedWriter(new FileWriter("rels.csv"));
34 |         relFile.write("Start\tEnde\tType\tProperty\tCounter:long\n");
35 | 
36 |         if (sorted) {
37 |             relCount = generateSortedRels(relCount, nodeFile, relFile, nodes, relsPerNode, types);
38 |         } else {
39 |             relCount = generateRandomRels(relCount, nodeFile, relFile, nodes, relsPerNode, types);
40 |         }
41 |         nodeFile.close();
42 |         relFile.close();
43 |         long seconds = (System.currentTimeMillis() - time) / 1000;
44 |         System.out.println("Creating " + nodes + " Nodes and " + relCount + (sorted? " sorted " : "") + " Relationships took " + seconds + " seconds.");
45 |     }
46 | 
47 |     private static long generateRandomRels(long relCount, BufferedWriter nodeFile, BufferedWriter relFile, int nodes, int relsPerNode, String[] types) throws IOException {
48 |         Random rnd = new Random();
49 |         int numTypes = types.length;
50 |         for (int node = 0; node < nodes; node++) {
51 |             final int rels = rnd.nextInt(relsPerNode);
52 |             nodeFile.write(node+"\t"+rels+"\tTEST\t"+types[node % numTypes]+"\t"+node+"\n");
53 |             for (int rel = rels; rel >= 0; rel--) {
54 |                 relCount++;
55 |                 final int node1 = rnd.nextInt(nodes);
56 |                 final int node2 = rnd.nextInt(nodes);
57 |                 relFile.write(node1 + "\t" + node2 + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n");
58 |             }
59 |         }
60 |         return relCount;
61 |     }
62 |     private static long generateSortedRels(long relCount, BufferedWriter nodeFile, BufferedWriter relFile, int nodes, int relsPerNode, String[] types) throws IOException {
63 |         Random rnd = new Random();
64 |         int numTypes = types.length;
65 |         for (int node = 0; node < nodes; node++) {
66 |             final int rels = rnd.nextInt(relsPerNode);
67 |             nodeFile.write(node+"\t"+rels+"\tTEST\t"+node+"\n");
68 |             for (int rel = rels; rel >= 0; rel--) {
69 |                 relCount++;
70 |                 final int target = node + rnd.nextInt(nodes -node);
71 |                 final boolean outgoing = rnd.nextBoolean();
72 |                 if (outgoing) {
73 |                     relFile.write(node + "\t" + target + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n");
74 |                 } else {
75 |                     relFile.write(target + "\t" + node + "\t" + types[rel % numTypes] + "\t" + "Property"+"\t" + relCount+ "\n");
76 |                 }
77 |             }
78 |         }
79 |         return relCount;
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/TestImporter.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport;
  2 | 
  3 | import org.junit.Ignore;
  4 | import org.neo4j.graphdb.RelationshipType;
  5 | import org.neo4j.helpers.collection.MapUtil;
  6 | import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
  7 | import org.neo4j.io.fs.FileUtils;
  8 | import org.neo4j.unsafe.batchinsert.BatchInserter;
  9 | import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
 10 | import org.neo4j.unsafe.batchinsert.BatchInserters;
 11 | 
 12 | import java.io.File;
 13 | import java.io.IOException;
 14 | import java.util.Map;
 15 | import java.util.Random;
 16 | 
 17 | import static org.neo4j.helpers.collection.MapUtil.map;
 18 | 
 19 | @Ignore
 20 | public class TestImporter {
 21 |     public static final int NUM_TYPES = 10;
 22 |     enum RelTypes implements RelationshipType {
 23 |         ONE,TWO,THREE,FOUR,FIVE,SIX,SEVEN,EIGHT,NINE,TEN
 24 |     }
 25 | 
 26 |     private static Report report;
 27 |     private BatchInserter db;
 28 |     private BatchInserterIndexProvider lucene;
 29 |     
 30 |     public TestImporter(File graphDb) throws IOException {
 31 |         if (!new File("batch.properties").exists()) {
 32 |             System.out.println("Need a Configuration File");
 33 |             return;
 34 |         }
 35 |         System.out.println("Using Existing Configuration File");
 36 | 
 37 |         Map<String, String> config = MapUtil.load(new File("batch.properties"));
 38 | 
 39 |         db = createBatchInserter(graphDb, config);
 40 |         lucene = createIndexProvider();
 41 |         report = createReport();
 42 |     }
 43 | 
 44 |     protected StdOutReport createReport() {
 45 |         return new StdOutReport(10 * 1000 * 1000, 100);
 46 |     }
 47 | 
 48 |     protected LuceneBatchInserterIndexProvider createIndexProvider() {
 49 |         return new LuceneBatchInserterIndexProvider(db);
 50 |     }
 51 | 
 52 |     protected BatchInserter createBatchInserter(File graphDb, Map<String, String> config) throws IOException {
 53 |         return BatchInserters.inserter(new File(graphDb.getAbsolutePath()), config);
 54 |     }
 55 | 
 56 |     public static void main(String[] args) throws IOException {
 57 |         if (args.length != 3) {
 58 |             System.err.println("Usage java -jar batchperformance.jar db-dir #nodes #rels/node");
 59 |         }
 60 |         File graphDb = new File(args[0]);
 61 |         int nodesCount = Integer.parseInt(args[1]); // 40M
 62 |         int relsPerNode = Integer.parseInt(args[2]); // 10
 63 | 
 64 |         if (graphDb.exists()) {
 65 |             FileUtils.deleteRecursively(graphDb);
 66 |         }
 67 |         // int[] targetNodeIds = createTargetNodeIds(nodesCount);
 68 |         int[] targetNodeOffsets = createTargetNodeIds(nodesCount);
 69 |         long time=System.currentTimeMillis();
 70 |         TestImporter importer = new TestImporter(graphDb);
 71 |         try {
 72 |             importer.createNodes(nodesCount,map("blocked",Boolean.TRUE,"age",42L));
 73 |             importer.createRels(nodesCount, relsPerNode, targetNodeOffsets,map("weight",10F));
 74 |         } finally {
 75 |             importer.finish();
 76 |         }
 77 |         System.out.println("Import of "+nodesCount+" nodes took "+(System.currentTimeMillis()-time)+" ms.");
 78 |     }
 79 | 
 80 |     private static int[] createTargetNodeIds(int nodesCount) {
 81 |         int[] targetNodes = new int[nodesCount];
 82 |         Random rnd=new Random();
 83 |         for (int i = 0; i < nodesCount; i++) {
 84 |             targetNodes[i]=Math.abs(rnd.nextInt() % nodesCount);
 85 |         }
 86 |         return targetNodes;
 87 |     }
 88 | 
 89 |     private static int[] createTargetNodeOffsets(int relsPerNode) {
 90 |         int[] targetNodeOffsets = new int[relsPerNode];
 91 |         for (int i = 0; i < relsPerNode; i++) {
 92 |             targetNodeOffsets[i]=1 << 2 * i;
 93 |         }
 94 |         return targetNodeOffsets;
 95 |     }
 96 | 
 97 |     public void createRels(int nodesCount, int relsPerNode, int[] targetNodeOffsets, Map<String, Object> props) {
 98 |         Random rnd = new Random();
 99 |         RelTypes[] values = RelTypes.values();
100 | 
101 |         report.reset();
102 |         for (int node = 0; node < nodesCount; node++) {
103 |             final int rels = relsPerNode; // rnd.nextInt(relsPerNode);
104 | 
105 |             for (int rel = rels; rel >= 0; rel--) {
106 |                 // final long node1 = Math.abs(rnd.nextLong() % nodesCount);
107 |                 // final long node2 = Math.abs(rnd.nextLong() % nodesCount);
108 |                 // final long node2 = (node + rels +1) % nodesCount;
109 |                 long node2 = (node + targetNodeOffsets[rel])  % nodesCount;
110 |                 db.createRelationship(node, node2, RelTypes.ONE, props); // values[rel % NUM_TYPES]
111 |                 report.dots();
112 |             }
113 |         }
114 |         report.finishImport("Relationships");
115 |     }
116 | 
117 |     private void createNodes(long nodesCount, Map<String, Object> props) {
118 |         report.reset();
119 |         for (int node = 0; node < nodesCount; node++) {
120 |             db.createNode(props);
121 |             report.dots();
122 |         }
123 |         report.finishImport("Nodes");
124 |     }
125 | 
126 |     void finish() {
127 |         lucene.shutdown();
128 |         db.shutdown();
129 |         report.finish();
130 |     }
131 | 
132 |     static class StdOutReport implements Report {
133 |         private final long batch;
134 |         private final long dots;
135 |         private long count;
136 |         private long total = System.currentTimeMillis(), time, batchTime;
137 | 
138 |         public StdOutReport(long batch, int dots) {
139 |             this.batch = batch;
140 |             this.dots = batch / dots;
141 |         }
142 | 
143 |         @Override
144 |         public void reset() {
145 |             count = 0;
146 |             batchTime = time = System.currentTimeMillis();
147 |         }
148 | 
149 |         @Override
150 |         public void finish() {
151 |             System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds ");
152 |         }
153 | 
154 |         @Override
155 |         public void dots() {
156 |             if ((++count % dots) != 0) return;
157 |             System.out.print(".");
158 |             if ((count % batch) != 0) return;
159 |             long now = System.currentTimeMillis();
160 |             System.out.println(" "+ (now - batchTime) + " ms for "+batch);
161 |             batchTime = now;
162 |         }
163 | 
164 |         @Override
165 |         public void finishImport(String type) {
166 |             System.out.println("\nImporting " + count + " " + type + " took " + (System.currentTimeMillis() - time) / 1000 + " seconds ");
167 |         }
168 | 
169 |         @Override
170 |         public long getCount() {
171 |             return count;
172 |         }
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/ChunkerPerformanceTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import org.junit.Assert;
 4 | import org.junit.Before;
 5 | import org.junit.Ignore;
 6 | import org.junit.Test;
 7 | import org.neo4j.batchimport.importer.RowData;
 8 | import org.neo4j.batchimport.utils.Chunker;
 9 | 
10 | import java.io.*;
11 | 
12 | /**
13 |  * @author mh
14 |  * @since 11.06.13
15 |  */
16 | @Ignore("Performance")
17 | public class ChunkerPerformanceTest {
18 | 
19 |     @Before
20 |     public void setUp() throws Exception {
21 |         PerformanceTestFile.createTestFileIfNeeded();
22 |     }
23 | 
24 |     @Test
25 |     public void testPerformance() throws Exception {
26 |         final BufferedReader reader = new BufferedReader(new FileReader(PerformanceTestFile.TEST_CSV));
27 |         final Chunker chunker = new Chunker(reader, '\t');
28 | 
29 |         int res = 0;
30 |         long time = System.currentTimeMillis();
31 |         String token;
32 |         while ( (token = chunker.nextWord()) != Chunker.EOF)  {
33 |             if (token!=Chunker.NO_VALUE && token != Chunker.EOL) res++;
34 |         }
35 |         time = System.currentTimeMillis() - time;
36 |         System.out.println("time = " + time + " ms.");
37 |         Assert.assertEquals((PerformanceTestFile.ROWS) * PerformanceTestFile.COLS, res);
38 |     }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/ChunkerRowDataTest.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.csv;
  2 | 
  3 | import org.junit.Test;
  4 | import org.junit.runner.RunWith;
  5 | import org.neo4j.batchimport.importer.ChunkerLineData;
  6 | 
  7 | import java.io.StringReader;
  8 | import java.util.Map;
  9 | 
 10 | import static org.junit.Assert.assertEquals;
 11 | 
 12 | /**
 13 |  * @author mh
 14 |  * @since 29.11.12
 15 |  */
 16 | public class ChunkerRowDataTest {
 17 | 
 18 |     @Test
 19 |     public void testTrailingEmptyCells() throws Exception {
 20 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n\t2\t3"), '\t', 0);
 21 |         final Map<String,Object> map = rowData.updateMap();
 22 |         assertEquals(null,map.get("a"));
 23 |         assertEquals("2",map.get("b"));
 24 |         assertEquals("3",map.get("c"));
 25 |     }
 26 |     @Test
 27 |     public void testLeadingAndTrailingEmptyCells() throws Exception {
 28 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n\t2\t"), '\t', 0);
 29 |         final Map<String,Object> map = rowData.updateMap();
 30 |         assertEquals(null,map.get("a"));
 31 |         assertEquals("2",map.get("b"));
 32 |         assertEquals(null,map.get("c"));
 33 |     }
 34 |     @Test
 35 |     public void testLeadingEmptyCells() throws Exception {
 36 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t\t"), '\t', 0);
 37 |         final Map<String,Object> map = rowData.updateMap();
 38 |         assertEquals("1",map.get("a"));
 39 |         assertEquals(null,map.get("b"));
 40 |         assertEquals(null,map.get("c"));
 41 |     }
 42 |     @Test
 43 |     public void testEmptyRow() throws Exception {
 44 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n"), '\t', 0);
 45 |         final Map<String,Object> map = rowData.updateMap();
 46 |         assertEquals(null,map.get("a"));
 47 |         assertEquals(null,map.get("b"));
 48 |         assertEquals(null,map.get("c"));
 49 |     }
 50 | 
 51 |     @Test
 52 |     public void testLeadOneRow() throws Exception {
 53 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t"), '\t', 0);
 54 |         final Map<String,Object> map = rowData.updateMap();
 55 |         assertEquals("1",map.get("a"));
 56 |         assertEquals(null,map.get("b"));
 57 |         assertEquals(null,map.get("c"));
 58 |     }
 59 |     @Test
 60 |     public void testLeadTwoRow() throws Exception {
 61 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2"), '\t', 0);
 62 | 
 63 |         final Map<String,Object> map = rowData.updateMap();
 64 |         assertEquals("1",map.get("a"));
 65 |         assertEquals("2",map.get("b"));
 66 |         assertEquals(null,map.get("c"));
 67 |     }
 68 |     @Test
 69 |     public void testNormalCells() throws Exception {
 70 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2\t3"), '\t', 0);
 71 |         final Map<String,Object> map = rowData.updateMap();
 72 |         assertEquals("1",map.get("a"));
 73 |         assertEquals("2",map.get("b"));
 74 |         assertEquals("3",map.get("c"));
 75 |     }
 76 |     @Test
 77 |     public void testHandleNewLines() throws Exception {
 78 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a,b,c:int\r\n1,2,3\r\n4,5,6"), ',', 0);
 79 |         Map<String,Object> map = rowData.updateMap();
 80 |         assertEquals("1",map.get("a"));
 81 |         assertEquals("2",map.get("b"));
 82 |         assertEquals(3,map.get("c"));
 83 |         map = rowData.updateMap();
 84 |         assertEquals("4",map.get("a"));
 85 |         assertEquals("5",map.get("b"));
 86 |         assertEquals(6,map.get("c"));
 87 |     }
 88 | 
 89 |     @Test
 90 |     public void testNormalWithCommas() throws Exception {
 91 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a,b,c\n1,2,3"), ',', 0);
 92 |         final Map<String,Object> map = rowData.updateMap();
 93 |         assertEquals("1",map.get("a"));
 94 |         assertEquals("2",map.get("b"));
 95 |         assertEquals("3",map.get("c"));
 96 |     }
 97 |     @Test
 98 |     public void testNormalCellsTwoRows() throws Exception {
 99 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a\tb\tc\n1\t2\t3\n4\t5\t6"), '\t', 0);
100 |         final Map<String,Object> row1 = rowData.updateMap();
101 |         assertEquals("1", row1.get("a"));
102 |         assertEquals("2", row1.get("b"));
103 |         assertEquals("3",row1.get("c"));
104 |         final Map<String,Object> row2 = rowData.updateMap();
105 |         assertEquals("4", row2.get("a"));
106 |         assertEquals("5", row2.get("b"));
107 |         assertEquals("6",row2.get("c"));
108 |     }
109 | 
110 |     @Test
111 |     public void testConvert() throws Exception {
112 |         ChunkerLineData rowData = new ChunkerLineData(new StringReader("a:int\tb:float\tc:boolean"+"\n"+"1\t2.1\ttrue"), '\t', 0);
113 |         final Map<String,Object> map = rowData.updateMap();
114 |         assertEquals(1,map.get("a"));
115 |         assertEquals(2.1F,map.get("b"));
116 |         assertEquals(true,map.get("c"));
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/CsvLineDataTest.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.csv;
  2 | 
  3 | import org.junit.Assert;
  4 | import org.junit.Test;
  5 | import org.neo4j.batchimport.importer.CsvLineData;
  6 | 
  7 | import java.io.StringReader;
  8 | import java.util.Map;
  9 | 
 10 | import static org.junit.Assert.assertEquals;
 11 | 
 12 | /**
 13 |  * @author mh
 14 |  * @since 29.11.12
 15 |  */
 16 | public class CsvLineDataTest {
 17 | 
 18 |     @Test
 19 |     public void testInvalidConversion() throws Exception {
 20 |         try {
 21 |             CsvLineData rowData = new CsvLineData(new StringReader("a\tb:int\tc\n2\tfoo\t3"), '\t', 0);
 22 |             rowData.updateMap();
 23 |             Assert.fail("Expected conversion exception");
 24 |         } catch(RuntimeException e) {
 25 |             assertEquals(true,e.getMessage().contains("row 1"));
 26 |             assertEquals(true,e.getMessage().contains("foo"));
 27 |             assertEquals(true,e.getMessage().contains("1. b"));
 28 |             assertEquals(true,e.getMessage().contains("type: INT"));
 29 |             assertEquals(true,e.getMessage().contains("NumberFormatException"));
 30 |         }
 31 |     }
 32 | 
 33 |     @Test
 34 |     public void testTrailingEmptyCells() throws Exception {
 35 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n\t2\t3"), '\t', 0);
 36 |         final Map<String,Object> map = rowData.updateMap();
 37 |         assertEquals(null,map.get("a"));
 38 |         assertEquals("2",map.get("b"));
 39 |         assertEquals("3",map.get("c"));
 40 |     }
 41 |     @Test
 42 |     public void testLeadingAndTrailingEmptyCells() throws Exception {
 43 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n\t2\t"), '\t', 0);
 44 |         final Map<String,Object> map = rowData.updateMap();
 45 |         assertEquals(null,map.get("a"));
 46 |         assertEquals("2",map.get("b"));
 47 |         assertEquals(null,map.get("c"));
 48 |     }
 49 |     @Test
 50 |     public void testLeadingEmptyCells() throws Exception {
 51 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t\t"), '\t', 0);
 52 |         final Map<String,Object> map = rowData.updateMap();
 53 |         assertEquals("1",map.get("a"));
 54 |         assertEquals(null,map.get("b"));
 55 |         assertEquals(null,map.get("c"));
 56 |     }
 57 |     @Test
 58 |     public void testEmptyRow() throws Exception {
 59 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n"), '\t', 0);
 60 |         final Map<String,Object> map = rowData.updateMap();
 61 |         assertEquals(null,map.get("a"));
 62 |         assertEquals(null,map.get("b"));
 63 |         assertEquals(null,map.get("c"));
 64 |     }
 65 | 
 66 |     @Test
 67 |     public void testLeadOneRow() throws Exception {
 68 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t"), '\t', 0);
 69 |         final Map<String,Object> map = rowData.updateMap();
 70 |         assertEquals("1",map.get("a"));
 71 |         assertEquals(null,map.get("b"));
 72 |         assertEquals(null,map.get("c"));
 73 |     }
 74 |     @Test
 75 |     public void testLeadTwoRow() throws Exception {
 76 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t2"), '\t', 0);
 77 | 
 78 |         final Map<String,Object> map = rowData.updateMap();
 79 |         assertEquals("1",map.get("a"));
 80 |         assertEquals("2",map.get("b"));
 81 |         assertEquals(null,map.get("c"));
 82 |     }
 83 |     @Test
 84 |     public void testNormalCells() throws Exception {
 85 |         CsvLineData rowData = new CsvLineData(new StringReader("a\tb\tc\n1\t2\t3"), '\t', 0);
 86 |         final Map<String,Object> map = rowData.updateMap();
 87 |         assertEquals("1",map.get("a"));
 88 |         assertEquals("2",map.get("b"));
 89 |         assertEquals("3",map.get("c"));
 90 |     }
 91 | 
 92 |     @Test
 93 |     public void testQuotedHeader() throws Exception {
 94 |         CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n1\t2\t3"), '\t', 0);
 95 |         final Map<String,Object> map = rowData.updateMap();
 96 |         assertEquals("1",map.get("a"));
 97 |         assertEquals("2",map.get("b"));
 98 |         assertEquals("3",map.get("c"));
 99 |     }
100 |     @Test
101 |     public void testQuotedValue() throws Exception {
102 |         CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n\"1\"\t2\t3"), '\t', 0);
103 |         final Map<String,Object> map = rowData.updateMap();
104 |         assertEquals("1",map.get("a"));
105 |         assertEquals("2",map.get("b"));
106 |         assertEquals("3",map.get("c"));
107 |     }
108 | 
109 |     @Test
110 |     public void testQuotedValueWithNewline() throws Exception {
111 |         CsvLineData rowData = new CsvLineData(new StringReader("\"a\"\tb\tc\n\"1\n2\"\t2\t3"), '\t', 0);
112 |         final Map<String,Object> map = rowData.updateMap();
113 |         assertEquals("1\n2",map.get("a"));
114 |         assertEquals("2",map.get("b"));
115 |         assertEquals("3",map.get("c"));
116 |     }
117 | 
118 |     @Test
119 |     public void testQuotedValueWithNewlineAndCommas() throws Exception {
120 |         CsvLineData rowData = new CsvLineData(new StringReader("\"a\",b,c\n\"1\n2\",2,3"), ',', 0);
121 |         final Map<String,Object> map = rowData.updateMap();
122 |         assertEquals("1\n2",map.get("a"));
123 |         assertEquals("2",map.get("b"));
124 |         assertEquals("3",map.get("c"));
125 |     }
126 | 
127 |     @Test
128 |     public void testConvert() throws Exception {
129 |         CsvLineData rowData = new CsvLineData(new StringReader("a:int\tb:float\tc:boolean"+"\n"+"1\t2.1\ttrue"), '\t', 0);
130 |         final Map<String,Object> map = rowData.updateMap();
131 |         assertEquals(1,map.get("a"));
132 |         assertEquals(2.1F,map.get("b"));
133 |         assertEquals(true,map.get("c"));
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/OpenCSVPerformanceTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import au.com.bytecode.opencsv.CSVReader;
 4 | import org.junit.Assert;
 5 | import org.junit.Before;
 6 | import org.junit.Ignore;
 7 | import org.junit.Test;
 8 | 
 9 | import java.io.*;
10 | 
11 | import static org.neo4j.batchimport.csv.PerformanceTestFile.*;
12 | 
13 | /**
14 |  * @author mh
15 |  * @since 11.06.13
16 |  */
17 | @Ignore("Performance")
18 | public class OpenCSVPerformanceTest {
19 | 
20 |     @Before
21 |     public void setUp() throws Exception {
22 |         createTestFileIfNeeded();
23 |     }
24 | 
25 |     @Test
26 |     public void testReadLineWithCommaSeparator() throws Exception {
27 |         final BufferedReader reader = new BufferedReader(new FileReader(TEST_CSV));
28 |         final CSVReader csvReader = new CSVReader(reader,'\t','"');
29 | 
30 |         int res = 0;
31 |         long time = System.currentTimeMillis();
32 |         String[] line = null;
33 |         while ((line = csvReader.readNext()) != null) {
34 |             res += line.length;
35 |         }
36 |         time = System.currentTimeMillis() - time;
37 |         System.out.println("time = " + time + " ms.");
38 |         Assert.assertEquals(ROWS * COLS, res);
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/OpenCSVTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import au.com.bytecode.opencsv.CSVReader;
 4 | import org.junit.Assert;
 5 | import org.junit.Test;
 6 | 
 7 | import java.io.IOException;
 8 | import java.io.StringReader;
 9 | 
10 | /**
11 |  * @author mh
12 |  * @since 11.06.13
13 |  */
14 | public class OpenCSVTest {
15 | 
16 |     @Test
17 |     public void testReadLineWithCommaSeparator() throws Exception {
18 |         final StringReader headerWithLine = new StringReader("a,b\n1,42");
19 |         assertReadFile(new CSVReader(headerWithLine), "42");
20 |     }
21 |     @Test
22 |     public void testReadLineWithTabSeparator() throws Exception {
23 |         final StringReader headerWithLine = new StringReader("a\tb\n1\t42");
24 |         assertReadFile(new CSVReader(headerWithLine,'\t'), "42");
25 |     }
26 |     @Test
27 |     public void testReadLineWithTabSeparatorAndDoubleQuotes() throws Exception {
28 |         final StringReader headerWithLine = new StringReader("a\t\"b\"\n1\t\"42\"");
29 |         assertReadFile(new CSVReader(headerWithLine,'\t','"'), "42");
30 |     }
31 | 
32 |     @Test
33 |     public void testReadLineWithTabSeparatorAndDoubleQuotesWithNewlineInValue() throws Exception {
34 |         final StringReader headerWithLine = new StringReader("a\t\"b\"\n1\t\"4\n2\"");
35 |         assertReadFile(new CSVReader(headerWithLine,'\t','"'), "4\n2");
36 |     }
37 | 
38 |     private void assertReadFile(CSVReader reader, final String value2) throws IOException {
39 |         final String[] header = reader.readNext();
40 |         Assert.assertArrayEquals(new String[]{"a", "b"}, header);
41 |         final String[] line = reader.readNext();
42 |         Assert.assertArrayEquals(new String[]{"1", value2}, line);
43 |         Assert.assertNull("EOF", reader.readNext());
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/PerformanceTestFile.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import java.io.BufferedWriter;
 4 | import java.io.File;
 5 | import java.io.FileWriter;
 6 | import java.io.IOException;
 7 | 
 8 | /**
 9 |  * @author mh
10 |  * @since 11.06.13
11 |  */
12 | public class PerformanceTestFile {
13 |     public final static int ROWS = 1 * 1000 * 1000;
14 |     public final static int COLS = 30;
15 |     static final String TEST_CSV = "target/test.csv";
16 | 
17 |     public static void createTestFile() throws IOException {
18 |         final BufferedWriter writer = new BufferedWriter(new FileWriter(TEST_CSV));
19 |         for (int row = 0; row < ROWS; row++) {
20 |             for (int col = 0; col < COLS; col++) {
21 |                 if (col > 0) writer.write('\t');
22 |                 writer.write("\"" + String.valueOf(row * col) + "\"");
23 |             }
24 |             writer.write('\n');
25 |         }
26 |         writer.close();
27 |     }
28 | 
29 |     static void createTestFileIfNeeded() throws IOException {
30 |         if (new File(TEST_CSV).exists()) return;
31 |         createTestFile();
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/RowDataPerformanceTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import au.com.bytecode.opencsv.CSVReader;
 4 | import org.junit.Assert;
 5 | import org.junit.Before;
 6 | import org.junit.Ignore;
 7 | import org.junit.Test;
 8 | import org.neo4j.batchimport.importer.RowData;
 9 | import static org.neo4j.batchimport.csv.PerformanceTestFile.*;
10 | 
11 | import java.io.*;
12 | 
13 | /**
14 |  * @author mh
15 |  * @since 11.06.13
16 |  */
17 | @Ignore("Performance")
18 | public class RowDataPerformanceTest {
19 |     @Before
20 |     public void setUp() throws Exception {
21 |         createTestFileIfNeeded();
22 |     }
23 | 
24 |     @Test
25 |     public void testPerformance() throws Exception {
26 |         final BufferedReader reader = new BufferedReader(new FileReader(TEST_CSV));
27 |         final RowData rowData = new RowData(reader.readLine(), "\t", 0);
28 | 
29 |         int res = 0;
30 |         long time = System.currentTimeMillis();
31 |         String line;
32 |         while ((line = reader.readLine()) != null) {
33 |             rowData.processLine(line);
34 |             res += rowData.getColumnCount();
35 |         }
36 |         time = System.currentTimeMillis() - time;
37 |         System.out.println("time = " + time + " ms.");
38 |         Assert.assertEquals((ROWS-1) * COLS, res);
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/RowDataTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import org.junit.Test;
 4 | import org.neo4j.batchimport.importer.RowData;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | import static org.junit.Assert.assertEquals;
 9 | 
10 | /**
11 |  * @author mh
12 |  * @since 29.11.12
13 |  */
14 | public class RowDataTest {
15 | 
16 |     private final RowData rowData = new RowData("a\tb\tc", "\t", 0);
17 | 
18 |     @Test
19 |     public void testTrailingEmptyCells() throws Exception {
20 |         final Map<String,Object> map = rowData.updateMap("\t2\t3");
21 |         assertEquals(null,map.get("a"));
22 |         assertEquals("2",map.get("b"));
23 |         assertEquals("3",map.get("c"));
24 |     }
25 |     @Test
26 |     public void testLeadingAndTrailingEmptyCells() throws Exception {
27 |         final Map<String,Object> map = rowData.updateMap("\t2\t");
28 |         assertEquals(null,map.get("a"));
29 |         assertEquals("2",map.get("b"));
30 |         assertEquals(null,map.get("c"));
31 |     }
32 |     @Test
33 |     public void testLeadingEmptyCells() throws Exception {
34 |         final Map<String,Object> map = rowData.updateMap("1\t\t");
35 |         assertEquals("1",map.get("a"));
36 |         assertEquals(null,map.get("b"));
37 |         assertEquals(null,map.get("c"));
38 |     }
39 |     @Test
40 |     public void testEmptyRow() throws Exception {
41 |         final Map<String,Object> map = rowData.updateMap("");
42 |         assertEquals(null,map.get("a"));
43 |         assertEquals(null,map.get("b"));
44 |         assertEquals(null,map.get("c"));
45 |     }
46 | 
47 |     @Test
48 |     public void testLeadOneRow() throws Exception {
49 |         final Map<String,Object> map = rowData.updateMap("1\t");
50 |         assertEquals("1",map.get("a"));
51 |         assertEquals(null,map.get("b"));
52 |         assertEquals(null,map.get("c"));
53 |     }
54 |     @Test
55 |     public void testLeadTwoRow() throws Exception {
56 |         final Map<String,Object> map = rowData.updateMap("1\t2");
57 |         assertEquals("1",map.get("a"));
58 |         assertEquals("2",map.get("b"));
59 |         assertEquals(null,map.get("c"));
60 |     }
61 |     @Test
62 |     public void testNormalCells() throws Exception {
63 |         final Map<String,Object> map = rowData.updateMap("1\t2\t3");
64 |         assertEquals("1",map.get("a"));
65 |         assertEquals("2",map.get("b"));
66 |         assertEquals("3",map.get("c"));
67 |     }
68 | 
69 |     @Test
70 |     public void testConvert() throws Exception {
71 |         final RowData rowData = new RowData("a:int\tb:float\tc:boolean", "\t", 0);
72 |         final Map<String,Object> map = rowData.updateMap("1\t2.1\ttrue");
73 |         assertEquals(1,map.get("a"));
74 |         assertEquals(2.1F,map.get("b"));
75 |         assertEquals(true,map.get("c"));
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/csv/StreamTokenizerTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.csv;
 2 | 
 3 | import org.junit.Test;
 4 | import org.neo4j.batchimport.utils.Chunker;
 5 | 
 6 | import java.io.*;
 7 | 
 8 | import static org.junit.Assert.assertEquals;
 9 | import static org.junit.Assert.assertTrue;
10 | 
11 | /**
12 |  * @author mh
13 |  * @since 13.11.12
14 |  */
15 | public class StreamTokenizerTest {
16 |     
17 |     String file = "FROM\tTO\tTYPE\tNAME\tAGE:INT\n"
18 |                   +"1\t2\tKNOWS\tFOO\t42\n"
19 |                   +"1\t2\tKNOWS\t\t42"
20 |             ;
21 | 
22 |     @Test
23 |     public void testReadHeader() throws Exception {
24 |         final BufferedReader reader = new BufferedReader(new StringReader(file));
25 |         final String[] header = reader.readLine().split("\t");
26 |         final Chunker chunker = new Chunker(reader, '\t');
27 |         readLine(header, chunker, "FOO", "42");
28 |         readLine(header, chunker, "", "42");
29 |         assertEquals(Chunker.EOF,chunker.nextWord());
30 |     }
31 | 
32 |     private void readLine(String[] header, Chunker st, Object...values) throws IOException {
33 |         long from = Long.parseLong(st.nextWord());
34 |         assertEquals(1,from);
35 |         long to = Long.parseLong(st.nextWord());
36 |         assertEquals(2,to);
37 |         String type = st.nextWord();
38 |         assertEquals("KNOWS", type);
39 | 
40 |         for (int i = 3; i < header.length; i++) {
41 |             assertEquals(header[i], values[i - 3], st.nextWord());
42 |         }
43 |         String token = st.nextWord();
44 |         assertTrue(Chunker.EOL==token || Chunker.EOF==token);
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/importer/AbstractLineDataTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.importer;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import java.io.StringReader;
 6 | import java.util.Collections;
 7 | 
 8 | import static org.junit.Assert.*;
 9 | import static org.junit.Assert.assertArrayEquals;
10 | import static org.neo4j.helpers.collection.MapUtil.map;
11 | 
12 | /**
13 |  * Created by mh on 26.08.13.
14 |  */
15 | public class AbstractLineDataTest {
16 | 
17 |     @Test
18 |     public void testLabelNamedHeaderIsNotTreatedDifferently() throws Exception {
19 |         StringReader reader = new StringReader("label\nfoo");
20 |         CsvLineData data = new CsvLineData(reader, '\t', 0);
21 |         assertTrue(data.readLine());
22 |         assertEquals(Type.STRING,data.getHeader()[0].type);
23 |         assertEquals("foo",data.getValue(0));
24 |     }
25 | 
26 |     @Test
27 |     public void testIdIsHandledCorrectly() throws Exception {
28 |         StringReader reader = new StringReader("id:id\n123");
29 |         CsvLineData data = new CsvLineData(reader, '\t', 0);
30 |         assertTrue(data.processLine(""));
31 |         assertEquals(Type.ID,data.getHeader()[0].type);
32 |         assertEquals(123L,data.getValue(0));
33 |         assertEquals(Collections.emptyMap(),data.getProperties());
34 |     }
35 | 
36 |     @Test
37 |     public void testLabelTypedHeaderHandledAsLabel() throws Exception {
38 |         StringReader reader = new StringReader("label:label\nfoo");
39 |         CsvLineData data = new CsvLineData(reader, '\t', 0);
40 |         assertTrue(data.processLine(null));
41 |         assertEquals(Type.LABEL, data.getHeader()[0].type);
42 |         assertArrayEquals(new String[]{"foo"}, (Object[]) data.getValue(0));
43 |         assertArrayEquals(new String[]{"foo"}, data.getTypeLabels());
44 |     }
45 |     @Test
46 |     public void testFileWithLabelHasCorrectProperties() throws Exception {
47 |         StringReader reader = new StringReader("prop\tlabel:label\nbar\tfoo");
48 |         CsvLineData data = new CsvLineData(reader, '\t', 0);
49 |         assertTrue(data.processLine(null));
50 |         assertEquals(Type.LABEL, data.getHeader()[1].type);
51 |         assertArrayEquals(new String[]{"foo"}, (Object[]) data.getValue(1));
52 |         assertArrayEquals(new String[]{"foo"}, data.getTypeLabels());
53 |         assertEquals(map("prop", "bar"), data.getProperties());
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/ChunkerTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import java.io.StringReader;
 6 | 
 7 | import static org.junit.Assert.assertEquals;
 8 | import static org.junit.Assert.assertSame;
 9 | 
10 | public class ChunkerTest {
11 |     @Test
12 |     public void testEmptyFile() throws Exception {
13 |         Chunker chunker = newChunker("");
14 |         assertEquals(Chunker.EOF, chunker.nextWord());
15 |     }
16 |     @Test
17 |     public void testEmptyField() throws Exception {
18 |         Chunker chunker = newChunker("\t");
19 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
20 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
21 |         assertEquals(Chunker.EOF, chunker.nextWord());
22 |     }
23 |     
24 |     @Test
25 |     public void testEmptyFieldWithNewline() throws Exception {
26 |         Chunker chunker = newChunker("\t\n");
27 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
28 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
29 |         assertEquals(Chunker.EOL, chunker.nextWord());
30 |         assertEquals(Chunker.EOF, chunker.nextWord());
31 |     }
32 | 
33 |     @Test
34 |     public void testEmptyLine() throws Exception {
35 |         Chunker chunker = newChunker("\n");
36 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
37 |         assertEquals(Chunker.EOL, chunker.nextWord());
38 |         assertEquals(Chunker.EOF, chunker.nextWord());
39 |     }
40 |     @Test
41 |     public void testLineWithFields() throws Exception {
42 |         Chunker chunker = newChunker("a\tb\n");
43 |         assertEquals("a", chunker.nextWord());
44 |         assertEquals("b", chunker.nextWord());
45 |         assertEquals(Chunker.EOL, chunker.nextWord());
46 |         assertSame(Chunker.EOF, chunker.nextWord());
47 |     }
48 |     @Test
49 |     public void testUtf8() throws Exception {
50 |         Chunker chunker = newChunker("ä\tá\n");
51 |         assertEquals("ä", chunker.nextWord());
52 |         assertEquals("á", chunker.nextWord());
53 |         assertEquals(Chunker.EOL, chunker.nextWord());
54 |         assertSame(Chunker.EOF, chunker.nextWord());
55 |     }
56 | 
57 |     @Test
58 |     public void testLineWithEmptyField() throws Exception {
59 |         Chunker chunker = newChunker("a\t\tb\n");
60 |         assertEquals("a", chunker.nextWord());
61 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
62 |         assertEquals("b", chunker.nextWord());
63 |         assertEquals(Chunker.EOL, chunker.nextWord());
64 |         assertSame(Chunker.EOF, chunker.nextWord());
65 |     }
66 |     @Test
67 |     public void testLineWithOnlyEmptyFields() throws Exception {
68 |         Chunker chunker = newChunker("\t\t\t\n");
69 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
70 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
71 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
72 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
73 |         assertEquals(Chunker.EOL, chunker.nextWord());
74 |         assertSame(Chunker.EOF, chunker.nextWord());
75 |     }
76 | 
77 |     @Test
78 |     public void testLineWithEmptyLines() throws Exception {
79 |         Chunker chunker = newChunker("a\t\n\nb\n");
80 |         assertEquals("a", chunker.nextWord());
81 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
82 |         assertEquals(Chunker.EOL, chunker.nextWord());
83 |         assertEquals(Chunker.NO_VALUE, chunker.nextWord());
84 |         assertEquals(Chunker.EOL, chunker.nextWord());
85 |         assertEquals("b", chunker.nextWord());
86 |         assertEquals(Chunker.EOL, chunker.nextWord());
87 |         assertSame(Chunker.EOF, chunker.nextWord());
88 |     }
89 | 
90 | 
91 | 
92 |     private Chunker newChunker(String lines) {
93 |         return new Chunker(new StringReader(lines), '\t');
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/ConfigTest.java:
--------------------------------------------------------------------------------
  1 | package org.neo4j.batchimport.utils;
  2 | 
  3 | import org.junit.Before;
  4 | import org.junit.Test;
  5 | 
  6 | import java.io.File;
  7 | import java.io.FileWriter;
  8 | import java.io.IOException;
  9 | import java.io.StringWriter;
 10 | import java.util.Collection;
 11 | 
 12 | import static org.junit.Assert.assertEquals;
 13 | 
 14 | public class ConfigTest {
 15 | 
 16 |     private final File testConfigFile;
 17 |     private final File nodesFile = createTempFile("nodes", "csv");
 18 |     private final File relsFile = createTempFile("rels", "csv");
 19 | 
 20 |     private File createTempFile(String prefix, String suffix) {
 21 |         final File tempFile;
 22 |         try {
 23 |             tempFile = File.createTempFile(prefix, "." + suffix, new File("target"));
 24 |            tempFile.deleteOnExit();
 25 |             return tempFile;
 26 |         } catch (IOException e) {
 27 |             throw new RuntimeException(e);
 28 |         }
 29 |     }
 30 | 
 31 |     public ConfigTest() throws IOException {
 32 |         testConfigFile = createTempFile("test", "properties");
 33 |         FileWriter fileWriter = new FileWriter(testConfigFile);
 34 |         fileWriter.write(Config.ARRAY_SEPARATOR_CONFIG+"=|");
 35 |         fileWriter.close();
 36 |     }
 37 | 
 38 |     @Before
 39 |     public void setUp() throws Exception {
 40 | 
 41 |     }
 42 | 
 43 | //        final String[] args = "data/dir nodes.csv relationships.csv [node_index node-index-name fulltext|exact nodes_index.csv rel_index rel-index-name fulltext|exact rels_index.csv ....]".split(" ");
 44 |     
 45 | 
 46 |     
 47 |     @Test
 48 |     public void testExtractDatabaseDir() throws Exception {
 49 |         assertCommandLine("data/dir",
 50 |                           "data/dir", Config.BATCH_IMPORT_GRAPH_DB);
 51 |     }
 52 | 
 53 |     @Test
 54 |     public void testToFiles() throws Exception {
 55 |         final Collection<File> files = Config.toFiles("null,,foo," + nodesFile.getAbsolutePath());
 56 |         assertEquals(1,files.size());
 57 |         assertEquals(nodesFile.getAbsolutePath(),files.iterator().next().getAbsolutePath());
 58 | 
 59 |     }
 60 | 
 61 |     @Test
 62 |     public void testExtractNodesFiles() throws Exception {
 63 |         assertCommandLine("data/dir "+nodesFile.getAbsolutePath(),
 64 |                           nodesFile.getAbsolutePath(), Config.BATCH_IMPORT_NODES_FILES);
 65 |     }
 66 | 
 67 |     @Test
 68 |     public void testExtractRelsFiles() throws Exception {
 69 |         assertCommandLine("data/dir "+nodesFile.getAbsolutePath()+" "+relsFile.getAbsolutePath(),
 70 |                           relsFile.getAbsolutePath(), Config.BATCH_IMPORT_RELS_FILES);
 71 |     }
 72 | 
 73 |     @Test
 74 |     public void testExtractExactNodeIndexFile() throws Exception {
 75 |         assertCommandLine("data/dir nodes.csv rels.csv node_index index-name exact node_index.csv",
 76 |                           "exact:node_index.csv", Config.NODE_INDEX("index-name"));
 77 |     }
 78 |     @Test
 79 |     public void testExtractFulltextNodeIndexFile() throws Exception {
 80 |         assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext node_index.csv",
 81 |                           "fulltext:node_index.csv", Config.NODE_INDEX("index-name"));
 82 |     }
 83 |     @Test
 84 |     public void testExtractExactNodeIndex() throws Exception {
 85 |         assertCommandLine("data/dir nodes.csv rels.csv node_index index-name exact",
 86 |                           "exact", Config.NODE_INDEX("index-name"));
 87 |     }
 88 |     @Test
 89 |     public void testExtractFulltextNodeIndex() throws Exception {
 90 |         assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext",
 91 |                           "fulltext", Config.NODE_INDEX("index-name"));
 92 |     }
 93 |     
 94 |     @Test
 95 |     public void testCustomArraySeparator() throws Exception {
 96 |             assertCommandLine("data/dir nodes.csv rels.csv node_index index-name fulltext",
 97 |                           "|", Config.ARRAY_SEPARATOR_CONFIG);
 98 |             Config.ARRAYS_SEPARATOR =",";
 99 |     }
100 | 
101 |     @Test
102 |     public void testExtractExactRelsIndexFile() throws Exception {
103 |         assertCommandLine("data/dir nodes.csv rels.csv relationship_index index-name exact rels_index.csv",
104 |                           "exact:rels_index.csv", Config.RELATIONSHIP_INDEX("index-name"));
105 |     }
106 | 
107 |     @Test
108 |     public void testExtractExactRelsIndex() throws Exception {
109 |         assertCommandLine("data/dir nodes.csv rels.csv relationship_index index-name exact",
110 |                           "exact", Config.RELATIONSHIP_INDEX("index-name"));
111 |     }
112 | 
113 |     @Test(expected = IllegalArgumentException.class)
114 |     public void testFailsOnNoArguments() throws Exception {
115 |         assertCommandLine("",null,null);
116 |     }
117 | 
118 |     private void assertCommandLine(String arguments, String expected, String optionName) {
119 |         final String configFileName = testConfigFile.getAbsolutePath();
120 |         final String[] args = (configFileName + " " +arguments).split(" ");
121 |         final Config config = Config.convertArgumentsToConfig(args);
122 |         assertEquals(expected, config.get(optionName));
123 |     }
124 | 
125 | }


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/FileIteratorTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.junit.Test;
 4 | import org.neo4j.helpers.collection.Iterators;
 5 | 
 6 | import java.io.FileWriter;
 7 | import java.io.IOException;
 8 | 
 9 | import static org.junit.Assert.assertEquals;
10 | import static org.junit.Assert.assertTrue;
11 | import static org.neo4j.batchimport.utils.FileIterator.DELIM;
12 | 
13 | /**
14 |  * @author Michael Hunger @since 04.11.13
15 |  */
16 | public class FileIteratorTest {
17 | 
18 |     public static final int LINES = 10;
19 |     public static final int LINES_1M = 1_000_000;
20 | 
21 |     @Test
22 |     public void testReadLines() throws Exception {
23 |         String fileName = writeFile(LINES);
24 | 
25 |         int count = 0;
26 |         FileIterator it = new FileIterator(fileName);
27 |         while (it.hasNext()) {
28 |             FileIterator.Line line = it.next();
29 |             assertEquals(count, line.lineNo);
30 |             assertEquals(""+start(count)+ DELIM+end(count)+ DELIM, line.line);
31 |             count++;
32 |         }
33 |         assertEquals(false,it.hasNext());
34 |         assertEquals(LINES, count);
35 |     }
36 | 
37 |     private String writeFile(int lines) throws IOException {
38 |         String fileName = "target/FileIteratorTest.txt";
39 |         FileWriter writer = new FileWriter(fileName);
40 |         for (int i=0;i< lines;i++) {
41 |             writer.write(String.format("%d%s%d%s%n", start(i),DELIM, end(i),DELIM));
42 |         }
43 |         writer.close();
44 |         return fileName;
45 |     }
46 | 
47 |     private int start(int i) {
48 |         return i;
49 |     }
50 | 
51 |     private int end(int i) {
52 |         return i+10-i%20;
53 |     }
54 | 
55 |     @Test
56 |     public void testPerformance() throws Exception {
57 |         String file = writeFile(LINES_1M);
58 |         FileIterator reader = new FileIterator(file);
59 |         long time = System.currentTimeMillis();
60 |         int count = (int) Iterators.count(reader);
61 |         long delta = System.currentTimeMillis() - time;
62 |         System.out.println("delta = " + delta);
63 |         assertTrue("timeout "+delta+" > 1000 ms", delta < 1000);
64 |         assertEquals(LINES_1M, count);
65 |         reader.close();
66 | 
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/ParamsTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import static org.junit.Assert.assertEquals;
 6 | import static org.junit.Assert.assertTrue;
 7 | 
 8 | /**
 9 |  * @author mh
10 |  * @since 02.11.12
11 |  */
12 | public class ParamsTest {
13 |     Params params = new Params("foo bar","file","42");
14 | 
15 |     @Test
16 |     public void testInvalid() throws Exception {
17 |         assertEquals(false,params.invalid());
18 |         assertEquals(true,new Params("foo bar", "file").invalid());
19 |     }
20 | 
21 |     @Test
22 |     public void testLength() throws Exception {
23 |         assertEquals(2,params.length());
24 |     }
25 | 
26 |     @Test
27 |     public void testToString() throws Exception {
28 |         assertEquals("foo bar",params.toString());
29 |     }
30 | 
31 |     @Test
32 |     public void testFile() throws Exception {
33 |         assertEquals("file",params.file("foo").getPath());
34 |     }
35 | 
36 |     @Test
37 |     public void testLongValue() throws Exception {
38 |         assertEquals(42L,params.longValue("bar"));
39 |     }
40 |     @Test
41 |     public void testIntValue() throws Exception {
42 |         assertEquals(42,params.intValue("bar"));
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/RelStartEndComparatorTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.junit.Assert;
 4 | import org.junit.Test;
 5 | 
 6 | import static org.junit.Assert.assertEquals;
 7 | import static org.neo4j.batchimport.utils.FileIterator.Line.from;
 8 | 
 9 | /**
10 |  * @author Michael Hunger @since 04.11.13
11 |  */
12 | public class RelStartEndComparatorTest {
13 | 
14 |     private final FileIterator.RelStartEndComparator comparator = new FileIterator.RelStartEndComparator();
15 | 
16 |     @Test
17 |     public void testCompareMinStartEnd() throws Exception {
18 |         Assert.assertEquals(-1, comparator.compare(from(1, "1\t2\t"), from(2, "1\t2\t")));
19 |         Assert.assertEquals(1, comparator.compare(from(2, "1\t2\t"), from(1, "1\t2\t")));
20 | 
21 |         assertEquals(0, "1\t2\t", "1\t2\t");
22 |         assertEquals(0, "2\t1\t", "1\t2\t");
23 |         assertEquals(-1,"2\t1\t", "1\t3\t");
24 |         assertEquals(-1, "1\t2\t", "3\t4\t");
25 |         assertEquals(1, "3\t1\t", "1\t2\t");
26 |         assertEquals(1, "3\t4\t", "1\t2\t");
27 |     }
28 | 
29 |     private void assertEquals(int expected, String line1, String line2) {
30 |         Assert.assertEquals(expected, comparator.compare(from(1, line1), from(1, line2)));
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/test/java/org/neo4j/batchimport/utils/RelationshipSorterTest.java:
--------------------------------------------------------------------------------
 1 | package org.neo4j.batchimport.utils;
 2 | 
 3 | import org.junit.Test;
 4 | import org.neo4j.helpers.collection.Iterators;
 5 | 
 6 | import java.io.BufferedReader;
 7 | import java.io.FileReader;
 8 | import java.io.FileWriter;
 9 | 
10 | import static org.junit.Assert.assertEquals;
11 | import static org.neo4j.batchimport.utils.FileIterator.DELIM;
12 | 
13 | /**
14 |  * @author Michael Hunger @since 04.11.13
15 |  */
16 | //@Ignore("Doesn't work because 'equal' lines are squashed in mapdb, idea add line # as last criterium")
17 | public class RelationshipSorterTest {
18 |     private static final int LINES = 10;
19 | 
20 |     @Test
21 |     public void testSortRelationshipFile() throws Exception {
22 |         String fileName = "target/RelationshipSorterTest.txt";
23 |         String targetFile = fileName + "_sorted";
24 | 
25 |         int written=1;
26 |         FileWriter writer = new FileWriter(fileName);
27 |         writer.write("" +"start" + DELIM + "end" + DELIM + '\n');
28 |         for (int i = LINES - 1; i >= 0; i--) {
29 |             for (int j = LINES - 1; j >= 0; j--) {
30 |                 String line = "" +i + DELIM + j + DELIM + '\n';
31 |                 writer.write(line);
32 |                 written++;
33 |             }
34 |         }
35 |         writer.close();
36 |         assertEquals(written, Iterators.count(new FileIterator(fileName)));
37 | 
38 |         RelationshipSorter.main(fileName, targetFile);
39 |         BufferedReader reader = new BufferedReader(new FileReader(targetFile));
40 |         String line = null;
41 |         String[] last = null;
42 |         int count = 1;
43 |         reader.readLine();
44 |         while ((line = reader.readLine()) != null) {
45 |             String[] parts = line.split("\t");
46 |             if (last!=null) {
47 |                 String msg = String.format("last min(%s,%s) < curr min(%s,%s)", last[0], last[1], parts[0], parts[1]);
48 |                 assertEquals(msg, true, Math.min(Integer.parseInt(last[0]),Integer.parseInt(last[1])) <= Math.min(Integer.parseInt(parts[0]),Integer.parseInt(parts[1])));
49 |             }
50 |             last = parts;
51 |             count++;
52 |         }
53 |         assertEquals(written,count);
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------