├── common
├── src
│ ├── main
│ │ ├── resources
│ │ │ ├── neo4j-spark-connector.properties
│ │ │ └── META-INF.services
│ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── scala
│ │ │ └── org
│ │ │ └── neo4j
│ │ │ └── spark
│ │ │ ├── util
│ │ │ ├── ValidationUtil.scala
│ │ │ ├── DriverCache.scala
│ │ │ └── Neo4jImplicits.scala
│ │ │ ├── reader
│ │ │ └── BasePartitionReader.scala
│ │ │ └── writer
│ │ │ └── BaseDataWriter.scala
│ └── test
│ │ ├── resources
│ │ └── neo4j-spark-connector.properties
│ │ └── scala
│ │ └── org
│ │ └── neo4j
│ │ └── spark
│ │ ├── util
│ │ ├── Neo4jUtilTest.scala
│ │ ├── Neo4jImplicitsTest.scala
│ │ └── Neo4jOptionsTest.scala
│ │ ├── SparkConnectorScalaBaseTSE.scala
│ │ ├── SparkConnectorScalaBaseWithApocTSE.scala
│ │ ├── SparkConnectorScalaSuiteIT.scala
│ │ └── SparkConnectorScalaSuiteWithApocIT.scala
└── pom.xml
├── spark-2.4
├── src
│ ├── main
│ │ ├── resources
│ │ │ ├── neo4j-spark-connector.properties
│ │ │ └── META-INF.services
│ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── scala
│ │ │ └── org
│ │ │ └── neo4j
│ │ │ └── spark
│ │ │ ├── writer
│ │ │ ├── Neo4jDataWriter.scala
│ │ │ ├── Neo4jDataWriterFactory.scala
│ │ │ └── Neo4jDataSourceWriter.scala
│ │ │ ├── DataSource.scala
│ │ │ └── reader
│ │ │ ├── Neo4jInputPartitionReader.scala
│ │ │ └── Neo4jDataSourceReader.scala
│ └── test
│ │ ├── java
│ │ └── org
│ │ │ └── neo4j
│ │ │ └── spark
│ │ │ └── SparkConnectorSuiteIT.java
│ │ └── scala
│ │ └── org
│ │ └── neo4j
│ │ └── spark
│ │ ├── DataSourceReaderNeo4j41xWithApocTSE.scala
│ │ ├── DataSourceReaderNeo4j35xTSE.scala
│ │ ├── DataSourceReaderNeo4j4xWithApocTSE.scala
│ │ ├── SparkConnectorScalaBaseTSE.scala
│ │ ├── SparkConnectorScalaBaseWithApocTSE.scala
│ │ ├── SparkConnectorScalaSuiteWithApocIT.scala
│ │ ├── SparkConnectorScalaSuiteIT.scala
│ │ └── DataSourceReaderNeo4j41xTSE.scala
└── pom.xml
├── spark-3.0
├── src
│ ├── main
│ │ ├── resources
│ │ │ ├── neo4j-spark-connector.properties
│ │ │ └── META-INF.services
│ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── scala
│ │ │ └── org
│ │ │ └── neo4j
│ │ │ └── spark
│ │ │ ├── writer
│ │ │ ├── Neo4jDataWriter.scala
│ │ │ ├── Neo4jDataWriterFactory.scala
│ │ │ ├── Neo4jBatchWriter.scala
│ │ │ └── Neo4jWriterBuilder.scala
│ │ │ ├── reader
│ │ │ ├── Neo4jPartitionReader.scala
│ │ │ ├── SimplePartitionReaderFactory.scala
│ │ │ ├── SimpleScanBuilder.scala
│ │ │ └── SimpleScan.scala
│ │ │ ├── Neo4jTable.scala
│ │ │ └── DataSource.scala
│ └── test
│ │ ├── resources
│ │ └── neo4j-spark-connector.properties
│ │ ├── java
│ │ └── org
│ │ │ └── neo4j
│ │ │ └── spark
│ │ │ └── SparkConnectorSuiteIT.java
│ │ └── scala
│ │ └── org
│ │ └── neo4j
│ │ └── spark
│ │ ├── DataSourceReaderNeo4j41xWithApocTSE.scala
│ │ ├── DataSourceReaderNeo4j35xTSE.scala
│ │ ├── DataSourceReaderNeo4j4xWithApocTSE.scala
│ │ ├── SparkConnectorScalaBaseTSE.scala
│ │ ├── SparkConnectorScalaBaseWithApocTSE.scala
│ │ ├── SparkConnectorScalaSuiteWithApocIT.scala
│ │ ├── SparkConnectorScalaSuiteIT.scala
│ │ └── DataSourceReaderNeo4j41xTSE.scala
└── pom.xml
├── .mvn
└── wrapper
│ ├── maven-wrapper.jar
│ ├── maven-wrapper.properties
│ └── MavenWrapperDownloader.java
├── test-support
├── src
│ └── main
│ │ ├── resources
│ │ └── neo4j-spark-connector.properties
│ │ ├── scala
│ │ └── org
│ │ │ └── neo4j
│ │ │ ├── spark
│ │ │ └── TestUtil.scala
│ │ │ └── Neo4jContainerExtension.scala
│ │ └── java
│ │ └── org
│ │ └── neo4j
│ │ └── spark
│ │ └── Assert.java
└── pom.xml
├── doc
├── docbook
│ ├── catalog
│ │ ├── CatalogManager.properties
│ │ └── catalog.xml
│ └── content-map.xml
├── devenv.local.template
├── server.js
├── docs
│ ├── antora.yml
│ └── modules
│ │ └── ROOT
│ │ ├── nav.adoc
│ │ └── pages
│ │ ├── aura.adoc
│ │ ├── index.adoc
│ │ ├── neo4j-cluster.adoc
│ │ ├── graphkeys.adoc
│ │ ├── python.adoc
│ │ ├── types.adoc
│ │ ├── overview.adoc
│ │ ├── quick-java-example.adoc
│ │ ├── gds.adoc
│ │ ├── faq.adoc
│ │ └── configuration.adoc
├── devenv
├── asciidoc
│ ├── cloud
│ │ └── index.adoc
│ ├── overview
│ │ └── index.adoc
│ ├── faq
│ │ └── index.adoc
│ ├── quickstart
│ │ └── index.adoc
│ ├── index.adoc
│ └── neo4j-cluster
│ │ └── index.adoc
├── javascript
│ ├── datatable.js
│ ├── version.js
│ ├── colorize.js
│ ├── versionswitcher.js
│ ├── mp-nav.js
│ └── tabs-for-chunked.js
├── docs.yml
├── settings.gradle
├── README.md
├── package.json
├── gradlew.bat
├── css
│ └── extra.css
├── pom.xml
└── gradlew
├── .gitignore
├── spark-packages.sh
├── README.md
├── .travis.yml
└── mvnw.cmd
/common/src/main/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | version=${project.version}
--------------------------------------------------------------------------------
/spark-2.4/src/main/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | version=${project.version}
--------------------------------------------------------------------------------
/spark-3.0/src/main/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | version=${project.version}
--------------------------------------------------------------------------------
/common/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | org.neo4j.spark.DataSource
--------------------------------------------------------------------------------
/spark-2.4/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | org.neo4j.spark.DataSource
--------------------------------------------------------------------------------
/spark-3.0/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | org.neo4j.spark.DataSource
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alexott/neo4j-spark-connector/4.0/.mvn/wrapper/maven-wrapper.jar
--------------------------------------------------------------------------------
/common/src/test/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | neo4j.version=${neo4j.version}
2 | neo4j.experimental=${neo4j.experimental}
--------------------------------------------------------------------------------
/spark-3.0/src/test/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | neo4j.version=${neo4j.version}
2 | neo4j.experimental=${neo4j.experimental}
--------------------------------------------------------------------------------
/test-support/src/main/resources/neo4j-spark-connector.properties:
--------------------------------------------------------------------------------
1 | neo4j.version=${neo4j.version}
2 | neo4j.experimental=${neo4j.experimental}
--------------------------------------------------------------------------------
/doc/docbook/catalog/CatalogManager.properties:
--------------------------------------------------------------------------------
1 | catalogs=catalog.xml
2 | relative-catalogs=false
3 | static-catalog=yes
4 | catalog-class-name=org.apache.xml.resolver.Resolver
5 | verbosity=1
6 |
--------------------------------------------------------------------------------
/doc/devenv.local.template:
--------------------------------------------------------------------------------
1 | # URI is necessary, empty user name and password are fine if accessing public artifacts.
2 | ARTIFACTORY_URI=
3 | ARTIFACTORY_USERNAME=
4 | ARTIFACTORY_PASSWORD=
5 |
6 | # vim: set sw=2 ts=2 ft=sh:
7 |
--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip
2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
3 |
--------------------------------------------------------------------------------
/doc/server.js:
--------------------------------------------------------------------------------
1 | const express = require('express')
2 |
3 | const app = express()
4 | app.use(express.static('./build/site'))
5 |
6 | app.get('/', (req, res) => res.redirect('/spark'))
7 |
8 | app.listen(8000, () => console.log('📘 http://localhost:8000'))
9 |
--------------------------------------------------------------------------------
/doc/docs/antora.yml:
--------------------------------------------------------------------------------
1 | name: spark
2 | version: master
3 | title: Neo4j Spark Connector
4 | start_page: ROOT:index.adoc
5 | nav:
6 | - modules/ROOT/nav.adoc
7 |
8 | asciidoc:
9 | attributes:
10 | theme: docs
11 | connector-version: 4.0.0
12 | copyright: Neo4j Inc.
13 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/util/Neo4jUtilTest.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | import org.junit.Test
4 |
5 | class Neo4jUtilTest {
6 |
7 | @Test
8 | def testSafetyCloseShouldNotFailWithNull(): Unit = {
9 | Neo4jUtil.closeSafety(null)
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/doc/devenv:
--------------------------------------------------------------------------------
1 | # source me
2 | if [ -f ./devenv.local ]; then
3 | . ./devenv.local
4 |
5 | export ARTIFACTORY_URI
6 | export ARTIFACTORY_USERNAME
7 | export ARTIFACTORY_PASSWORD
8 |
9 | else
10 | echo "Couldn't find ./devenv.local."
11 | fi
12 |
13 | # vim: set sw=2 ts=2 ft=sh:
14 |
--------------------------------------------------------------------------------
/doc/asciidoc/cloud/index.adoc:
--------------------------------------------------------------------------------
1 | [#cloud]
2 | == Using with Managed Cloud Spark
3 |
4 |
5 | ifdef::env-docs[]
6 | [abstract]
7 | --
8 | This chapter provides an introduction to the Neo4j Connector for Apache Spark with cloud managed sparks
9 | --
10 | endif::env-docs[]
11 |
12 | (Notes here)
13 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark;
2 |
3 | import org.junit.runner.RunWith;
4 | import org.junit.runners.Suite;
5 |
6 | @RunWith(Suite.class)
7 | @Suite.SuiteClasses({
8 | DataSourceReaderTypesTSE.class
9 | })
10 | public class SparkConnectorSuiteIT extends SparkConnectorScalaSuiteIT {
11 | }
12 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark;
2 |
3 | import org.junit.runner.RunWith;
4 | import org.junit.runners.Suite;
5 |
6 | @RunWith(Suite.class)
7 | @Suite.SuiteClasses({
8 | DataSourceReaderTypesTSE.class
9 | })
10 | public class SparkConnectorSuiteIT extends SparkConnectorScalaSuiteIT {
11 | }
12 |
--------------------------------------------------------------------------------
/doc/docbook/catalog/catalog.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/doc/javascript/datatable.js:
--------------------------------------------------------------------------------
1 | /*
2 |
3 |
4 |
6 | */
7 | $(document).ready(function() {
8 | $('#table-all table').DataTable({"aLengthMenu": [[20,40, 80, -1], [20,40, 80, "All"]],"pageLength": 20});
9 | } );
10 |
--------------------------------------------------------------------------------
/doc/asciidoc/overview/index.adoc:
--------------------------------------------------------------------------------
1 |
2 | == Project Overview
3 |
4 | ifdef::env-docs[]
5 | [abstract]
6 | --
7 | This chapter provides an introduction to the Neo4j Connector for Apache Spark.
8 | --
9 | endif::env-docs[]
10 |
11 | Overview here
12 |
13 | Neo4j Streams can run in two modes:
14 |
15 | * as a Neo4j plugin:
16 |
17 | ** Runmode1
18 | ** Runmode2
19 |
20 | [NOTE]
21 | **Be aware of stuff here.**
22 |
--------------------------------------------------------------------------------
/doc/docs.yml:
--------------------------------------------------------------------------------
1 | site:
2 | title: Neo4j Connector for Apache Spark User Guide
3 | url: /neo4j-spark-docs
4 | content:
5 | sources:
6 | - url: ../
7 | branches: HEAD
8 | start_path: doc/docs
9 | ui:
10 | bundle:
11 | url: https://s3-eu-west-1.amazonaws.com/static-content.neo4j.com/build/ui-bundle.zip
12 | snapshot: true
13 | asciidoc:
14 | attributes:
15 | page-theme: docs
16 | page-cdn: /_/
--------------------------------------------------------------------------------
/doc/settings.gradle:
--------------------------------------------------------------------------------
1 | pluginManagement {
2 | repositories {
3 | // mavenLocal()
4 | maven {
5 | url "https://neo.jfrog.io/neo/docs-maven" // System.env.ARTIFACTORY_URI
6 | /*
7 | credentials {
8 | username System.env.ARTIFACTORY_USERNAME
9 | password System.env.ARTIFACTORY_PASSWORD
10 | }
11 | */
12 | }
13 | gradlePluginPortal()
14 |
15 | }
16 | }
--------------------------------------------------------------------------------
/doc/asciidoc/faq/index.adoc:
--------------------------------------------------------------------------------
1 | [#faq]
2 | == Neo4j Connector for Apache Spark FAQ
3 |
4 | ifdef::env-docs[]
5 | [abstract]
6 | --
7 | This chapter answers frequently asked questions
8 | --
9 | endif::env-docs[]
10 |
11 | === Source Code License
12 |
13 | The source code to the Neo4j Connector for Apache Spark is available under the terms of the Apache License, version 2.0. See the LICENSE.txt file in
14 | the source code repository for full terms and conditions.
15 |
16 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | neo4j-home
2 | .gradle
3 | gradle/
4 | build/
5 | *~
6 | \#*
7 | target
8 | out
9 | .project
10 | .classpath
11 | .settings
12 | .externalToolBuilders/
13 | .scala_dependencies
14 | .factorypath
15 | .cache
16 | .cache-main
17 | .cache-tests
18 | *.iws
19 | *.ipr
20 | *.iml
21 | .idea
22 | .DS_Store
23 | .shell_history
24 | .mailmap
25 | .java-version
26 | .cache-main
27 | .cache-tests
28 | Thumbs.db
29 | .cache-main
30 | .cache-tests
31 | docs/guides
32 | doc/node
33 | doc/node_modules
34 | doc/package-lock.json
35 |
--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
1 | # Local Development
2 |
3 | In order to locally preview the Neo4j Connector for Apache Spark documentation built with Antora do the following steps:
4 |
5 | - open a terminal window and be sure to be at the root of the project
6 | - run the following command: `cd doc`
7 | - run the following command: `npm install && npm start`
8 | - browse to [localhost:8000](http://localhost:8000)
9 |
10 | Now everytime you change one of your `.adoc` files antora will rebuild everything,
11 | and you just need to refresh your page on [localhost:8000](http://localhost:8000)
12 |
--------------------------------------------------------------------------------
/test-support/src/main/scala/org/neo4j/spark/TestUtil.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.Properties
4 |
5 | object TestUtil {
6 |
7 | private val properties = new Properties()
8 | properties.load(Thread.currentThread().getContextClassLoader().getResourceAsStream("neo4j-spark-connector.properties"))
9 |
10 | def isTravis(): Boolean = Option(System.getenv("TRAVIS")).getOrElse("false").toBoolean
11 |
12 | def neo4jVersion(): String = properties.getProperty("neo4j.version")
13 |
14 | def experimental(): Boolean = properties.getProperty("neo4j.experimental", "false").toBoolean
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/spark-packages.sh:
--------------------------------------------------------------------------------
1 | if [[ $# -lt 2 ]] ; then
2 | echo "Usage ./spark-packages.sh "
3 | exit 1
4 | fi
5 |
6 | ARTIFACT=neo4j-connector-apache-spark_$2
7 | VERSION=$1
8 | ./mvnw clean install -Pscala-$2 -DskipTests
9 | cat << EOF > target/$ARTIFACT-$VERSION.pom
10 |
11 | 4.0.0
12 | neo4j-contrib
13 | $ARTIFACT
14 | $VERSION
15 |
16 | EOF
17 | cp pom.xml target/$ARTIFACT-$VERSION.pom
18 | cp target/$ARTIFACT-$VERSION.jar target/$ARTIFACT-$VERSION.jar
19 | zip -jv target/$ARTIFACT-$VERSION.zip target/$ARTIFACT-$VERSION.pom target/$ARTIFACT-$VERSION.jar
20 | xdg-open target
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/nav.adoc:
--------------------------------------------------------------------------------
1 | * xref::overview.adoc[Project overview]
2 | * xref::quickstart.adoc[Quick Start]
3 | * xref::configuration.adoc[Neo4j Driver Configuration]
4 | * xref::reading.adoc[Reading from Neo4j]
5 | * xref::writing.adoc[Writing to Neo4j]
6 | * xref::python.adoc[Using with Pyspark / Python]
7 | * xref::gds.adoc[Using with Graph Data Science]
8 | * xref::neo4j-cluster.adoc[Using with Neo4j Causal Cluster]
9 | * xref::aura.adoc[Using with Neo4j Aura]
10 | * xref::quick-java-example.adoc[Quick Java Example]
11 | * xref::architecture.adoc[Architecture Guidance for Implementing]
12 | * xref::types.adoc[Neo4j-Spark Data Types Reference]
13 | * xref::faq.adoc[Neo4j Connector for Apache Spark FAQ]
14 |
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriter.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.v2.writer.DataWriter
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.spark.util.Neo4jOptions
8 |
9 | class Neo4jDataWriter(jobId: String,
10 | partitionId: Int,
11 | structType: StructType,
12 | saveMode: SaveMode,
13 | options: Neo4jOptions,
14 | scriptResult: java.util.List[java.util.Map[String, AnyRef]])
15 | extends BaseDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult)
16 | with DataWriter[InternalRow]
--------------------------------------------------------------------------------
/doc/asciidoc/quickstart/index.adoc:
--------------------------------------------------------------------------------
1 |
2 | == Quick Start
3 |
4 | ifdef::env-docs[]
5 | [abstract]
6 | --
7 | Get started fast for common scenarios, using Neo4j Streams plugin or Kafka Connect plugin
8 | --
9 | endif::env-docs[]
10 |
11 | === Neo4j Connector for Apache Spark Plugin
12 |
13 | ==== Install the Plugin
14 |
15 | * Download the latest release jar from https://github.com/neo4j-contrib/neo4j-spark-connector/releases
16 | * (Installation instructions here)
17 |
18 | ==== Spark Settings
19 |
20 | (Setting detail here)
21 |
22 | .Most Common Needed Configuration Settings
23 | |===
24 | |Setting Name |Description |Default Value
25 |
26 | |SETTING A
27 | |description.
28 | |500
29 |
30 | |SETTING B
31 | |DESCRIPTION
32 | |33554432
33 |
34 | |SETTING C
35 | |DESCRIPTION
36 | |16384
37 | |===
38 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriter.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.connector.write.DataWriter
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.spark.util.Neo4jOptions
8 |
9 | class Neo4jDataWriter(jobId: String,
10 | partitionId: Int,
11 | structType: StructType,
12 | saveMode: SaveMode,
13 | options: Neo4jOptions,
14 | scriptResult: java.util.List[java.util.Map[String, AnyRef]])
15 | extends BaseDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult)
16 | with DataWriter[InternalRow]
17 |
18 |
--------------------------------------------------------------------------------
/doc/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@neo4j/docs",
3 | "version": "1.0.0",
4 | "description": "Neo4j Docs builder",
5 | "main": "index.js",
6 | "watch": {
7 | "build:docs": {
8 | "patterns": [
9 | "docs"
10 | ],
11 | "extensions": "adoc"
12 | }
13 | },
14 | "scripts": {
15 | "server": "forever start server.js",
16 | "start": "npm run server && npm-watch",
17 | "stop": "forever stop server.js",
18 | "build:docs": "antora --fetch --stacktrace docs.yml"
19 | },
20 | "license": "ISC",
21 | "dependencies": {
22 | "@antora/cli": "^2.3.3",
23 | "@antora/site-generator-default": "^2.3.3",
24 | "cheerio": "^1.0.0-rc.3"
25 | },
26 | "devDependencies": {
27 | "express": "^4.17.1",
28 | "npm-watch": "^0.7.0",
29 | "forever": "^3.0.2"
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/doc/javascript/version.js:
--------------------------------------------------------------------------------
1 | window.docMeta = (function () {
2 | var version = '4.0';
3 | var name = 'Streams';
4 | var href = window.location.href;
5 | var len = href.indexOf('/' + version) != -1 ? href.indexOf('/' + version) : href.length -1;
6 | return {
7 | name: name,
8 | version: version,
9 | availableDocVersions: ["3.4", "3.5", "4.0"],
10 | thisPubBaseUri: href.substring(0,len) + '/' + version,
11 | unversionedDocBaseUri: href.substring(0, len) + '/',
12 | commonDocsBaseUri: href.substring(0, href.indexOf(name) - 1)
13 | }
14 | })();
15 |
16 | (function () {
17 | var baseUri = window.docMeta.unversionedDocBaseUri; // + window.location.pathname.split(window.docMeta.name + '/')[1].split('/')[0] + '/';
18 | var docPath = window.location.href.replace(baseUri, '');
19 | window.neo4jPageId = docPath;
20 | })();
21 | // vim: set sw=2 ts=2:
22 |
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriterFactory.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.v2.writer.{DataWriter, DataWriterFactory}
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.spark.util.Neo4jOptions
8 |
9 | class Neo4jDataWriterFactory(jobId: String,
10 | structType: StructType,
11 | saveMode: SaveMode,
12 | options: Neo4jOptions,
13 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) extends DataWriterFactory[InternalRow] {
14 | override def createDataWriter(partitionId: Int, taskId: Long, epochId: Long): DataWriter[InternalRow] = new Neo4jDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult)
15 | }
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/aura.adoc:
--------------------------------------------------------------------------------
1 |
2 | [#aura]
3 | = Using with Neo4j Aura
4 |
5 | [abstract]
6 | --
7 | This chapter describes considerations around using Neo4j Connector for Apache Spark with link:https://neo4j.com/cloud/aura/[Neo4j Aura].
8 | --
9 |
10 | == Overview
11 |
12 | link:https://neo4j.com/cloud/aura/[Neo4j Aura] is a fully managed database as a service providing Neo4j.
13 |
14 | == Remote Clients
15 |
16 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use
17 | streams functionality. Best practices in these cases are:
18 |
19 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application. The optimal
20 | driver URI will be provided by Aura itself when you create a database
21 | * In Aura Enterprise consider creating a separate username/password for Spark access; avoid running all processes through the default
22 | `neo4j` account.
23 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriterFactory.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, PhysicalWriteInfo}
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.spark.util.Neo4jOptions
8 |
9 | class Neo4jDataWriterFactory(jobId: String,
10 | structType: StructType,
11 | saveMode: SaveMode,
12 | options: Neo4jOptions,
13 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) extends DataWriterFactory {
14 | override def createWriter(partitionId: Int, taskId:Long): DataWriter[InternalRow] = new Neo4jDataWriter(
15 | jobId,
16 | partitionId,
17 | structType,
18 | saveMode,
19 | options,
20 | scriptResult
21 | )
22 | }
23 |
--------------------------------------------------------------------------------
/doc/asciidoc/index.adoc:
--------------------------------------------------------------------------------
1 |
2 | = Neo4j Connector for Apache Spark v{docs-version}
3 | :toc:
4 | :toclevels: 5
5 | :sectids:
6 | :sectlinks:
7 | :sectnums:
8 | :sectnumlevels: 5
9 | :env-docs: true
10 |
11 | ifdef::backend-html5[(C) {copyright}]
12 |
13 | License: link:{common-license-page-uri}[Creative Commons 4.0]
14 |
15 | [abstract]
16 | --
17 | This is the user guide for Neo4j Connector for Apache Spark version {docs-version}, authored by the Neo4j Labs Team.
18 | --
19 |
20 | The guide covers the following areas:
21 |
22 | * <<_project_overview>> -- Project overview
23 | * <<_quick_start>> -- Get Started Fast with the most Common Scenarios
24 | * <> -- Using with Neo4j Causal Cluster
25 | * <> -- Configuring a connection to a Confluent Cloud
26 | * <> -- Neo4j Streams FAQ
27 |
28 |
29 | include::overview/index.adoc[]
30 |
31 | include::quickstart/index.adoc[]
32 |
33 | include::neo4j-cluster/index.adoc[]
34 |
35 | include::cloud/index.adoc[]
36 |
37 | include::faq/index.adoc[]
--------------------------------------------------------------------------------
/common/src/main/scala/org/neo4j/spark/util/ValidationUtil.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | object ValidationUtil {
4 |
5 | def isNotEmpty(str: String, message: String) = if (str.isEmpty) {
6 | throw new IllegalArgumentException(message)
7 | }
8 |
9 | def isNotBlank(str: String, message: String) = if (str.trim.isEmpty) {
10 | throw new IllegalArgumentException(message)
11 | }
12 |
13 | def isNotEmpty(seq: Seq[_], message: String) = if (seq.isEmpty) {
14 | throw new IllegalArgumentException(message)
15 | }
16 |
17 | def isNotEmpty(map: Map[_, _], message: String) = if (map.isEmpty) {
18 | throw new IllegalArgumentException(message)
19 | }
20 |
21 | def isTrue(boolean: Boolean, message: String) = if (!boolean) {
22 | throw new IllegalArgumentException(message)
23 | }
24 |
25 | def isFalse(boolean: Boolean, message: String) = if (boolean) {
26 | throw new IllegalArgumentException(message)
27 | }
28 |
29 | def isNotValid(message: String) = throw new IllegalArgumentException(message)
30 | }
31 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/index.adoc:
--------------------------------------------------------------------------------
1 |
2 | = Neo4j Connector for Apache Spark v{connector-version}
3 |
4 | ifdef::backend-html5[(C) {copyright}]
5 |
6 | License: link:{attachmentsdir}/LICENSE.txt[Creative Commons 4.0]
7 |
8 | [abstract]
9 | --
10 | This is the user guide for Neo4j Connector for Apache Spark version {connector-version}, authored by the Neo4j Labs Team.
11 | --
12 |
13 | The guide covers the following areas:
14 |
15 | * xref::overview.adoc[Project overview]
16 | * xref::quickstart.adoc[Quick Start]
17 | * xref::configuration.adoc[Neo4j Driver Configuration]
18 | * xref::reading.adoc[Reading from Neo4j]
19 | * xref::writing.adoc[Writing to Neo4j]
20 | * xref::python.adoc[Using with Pyspark / Python]
21 | * xref::gds.adoc[Using with Graph Data Science]
22 | * xref::neo4j-cluster.adoc[Using with Neo4j Causal Cluster]
23 | * xref::aura.adoc[Using with Neo4j Aura]
24 | * xref::architecture.adoc[Architecture Guidance for Implementing]
25 | * xref::types.adoc[Neo4j-Spark Data Types Reference]
26 | * xref::faq.adoc[Neo4j Connector for Apache Spark FAQ]
27 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.{Assume, BeforeClass, Test}
5 |
6 | object DataSourceReaderNeo4j41xWithApocTSE {
7 | @BeforeClass
8 | def checkNeo4jVersion() {
9 | val neo4jVersion = TestUtil.neo4jVersion()
10 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0"))
11 | }
12 | }
13 |
14 | class DataSourceReaderNeo4j41xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE {
15 |
16 | @Test
17 | def testReturnProcedure(): Unit = {
18 | val query =
19 | """RETURN apoc.convert.toSet([1,1,3]) AS foo, 'bar' AS bar
20 | |""".stripMargin
21 |
22 | val df = ss.read.format(classOf[DataSource].getName)
23 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
24 | .option("partitions", 1)
25 | .option("query", query)
26 | .load
27 |
28 | assertEquals(Seq("foo", "bar"), df.columns.toSeq) // ordering should be preserved
29 | assertEquals(1, df.count())
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.{Assume, BeforeClass, Test}
5 |
6 | object DataSourceReaderNeo4j41xWithApocTSE {
7 | @BeforeClass
8 | def checkNeo4jVersion() {
9 | val neo4jVersion = TestUtil.neo4jVersion()
10 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0"))
11 | }
12 | }
13 |
14 | class DataSourceReaderNeo4j41xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE {
15 |
16 | @Test
17 | def testReturnProcedure(): Unit = {
18 | val query =
19 | """RETURN apoc.convert.toSet([1,1,3]) AS foo, 'bar' AS bar
20 | |""".stripMargin
21 |
22 | val df = ss.read.format(classOf[DataSource].getName)
23 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
24 | .option("partitions", 1)
25 | .option("query", query)
26 | .load
27 |
28 | assertEquals(Seq("foo", "bar"), df.columns.toSeq) // ordering should be preserved
29 | assertEquals(1, df.count())
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/reader/Neo4jPartitionReader.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.internal.Logging
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.connector.read.PartitionReader
6 | import org.apache.spark.sql.sources.Filter
7 | import org.apache.spark.sql.types.StructType
8 | import org.neo4j.spark.service.PartitionSkipLimit
9 | import org.neo4j.spark.util.Neo4jOptions
10 |
11 | class Neo4jPartitionReader(private val options: Neo4jOptions,
12 | private val filters: Array[Filter],
13 | private val schema: StructType,
14 | private val jobId: String,
15 | private val partitionSkipLimit: PartitionSkipLimit,
16 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
17 | private val requiredColumns: StructType)
18 | extends BasePartitionReader(options, filters, schema, jobId, partitionSkipLimit, scriptResult, requiredColumns)
19 | with PartitionReader[InternalRow]
--------------------------------------------------------------------------------
/common/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | neo4j-connector-apache-spark_${scala.binary.version}_common
8 | neo4j-connector-apache-spark-common
9 | Common Services for Neo4j Connector for Apache Spark using the binary Bolt Driver
10 |
11 | 4.0.0
12 | jar
13 |
14 |
15 | neo4j-contrib
16 | neo4j-connector-apache-spark
17 | 4.0.0
18 |
19 |
20 |
21 |
22 | neo4j-contrib
23 | neo4j-connector-apache-spark_${scala.binary.version}_test-support
24 | 4.0.0
25 | test
26 |
27 |
28 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/neo4j-cluster.adoc:
--------------------------------------------------------------------------------
1 |
2 | [#neo4j_causal_cluster]
3 | = Using with Neo4j Causal Cluster
4 |
5 | [abstract]
6 | --
7 | This chapter describes considerations around using Neo4j Connector for Apache Spark with Neo4j Enterprise Causal Cluster.
8 | --
9 |
10 | == Overview
11 |
12 | link:https://neo4j.com/docs/operations-manual/current/clustering/[Neo4j Clustering] is a feature available in
13 | Enterprise Edition which allows high availability of the database through having multiple database members.
14 |
15 | Neo4j Enterprise uses a link:https://neo4j.com/docs/operations-manual/current/clustering/introduction/#causal-clustering-introduction-operational[LEADER/FOLLOWER]
16 | operational view, where writes are always processed by the leader, while reads can be serviced by either followers,
17 | or optionally be read replicas, which maintain a copy of the database and serve to scale out read operations
18 | horizontally.
19 |
20 | == Remote Clients
21 |
22 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use
23 | streams functionality. Best practices in these cases are:
24 |
25 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application.
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/reader/SimplePartitionReaderFactory.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.sql.catalyst.InternalRow
4 | import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
5 | import org.apache.spark.sql.sources.Filter
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.spark.service.PartitionSkipLimit
8 | import org.neo4j.spark.util.Neo4jOptions
9 |
10 | class SimplePartitionReaderFactory(private val neo4jOptions: Neo4jOptions,
11 | private val filters: Array[Filter],
12 | private val schema: StructType,
13 | private val jobId: String,
14 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
15 | private val requiredColumns: StructType) extends PartitionReaderFactory {
16 | override def createReader(partition: InputPartition): PartitionReader[InternalRow] = new Neo4jPartitionReader(
17 | neo4jOptions,
18 | filters,
19 | schema,
20 | jobId,
21 | partition.asInstanceOf[Neo4jPartition].partitionSkipLimit,
22 | scriptResult,
23 | requiredColumns
24 | )
25 | }
26 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j35xTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkException
4 | import org.junit.Assert.{assertTrue, fail}
5 | import org.junit.{Assume, BeforeClass, Test}
6 | import org.neo4j.driver.exceptions.ClientException
7 |
8 | object DataSourceReaderNeo4j35xTSE {
9 | @BeforeClass
10 | def checkNeo4jVersion() {
11 | Assume.assumeTrue(TestUtil.neo4jVersion().startsWith("3.5"))
12 | }
13 | }
14 |
15 | class DataSourceReaderNeo4j35xTSE extends SparkConnectorScalaBaseTSE {
16 | @Test
17 | def testShouldThrowClearErrorIfADbIsSpecified(): Unit = {
18 | try {
19 | ss.read.format(classOf[DataSource].getName)
20 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
21 | .option("database", "db1")
22 | .option("labels", "MATCH (h:Household) RETURN id(h)")
23 | .load()
24 | .show()
25 | }
26 | catch {
27 | case clientException: ClientException => {
28 | assertTrue(clientException.getMessage.equals(
29 | "Database name parameter for selecting database is not supported in Bolt Protocol Version 3.0. Database name: 'db1'"
30 | ))
31 | }
32 | case generic => fail(s"should be thrown a ${classOf[SparkException].getName}, got ${generic.getClass} instead")
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j35xTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkException
4 | import org.junit.Assert.{assertTrue, fail}
5 | import org.junit.{Assume, BeforeClass, Test}
6 | import org.neo4j.driver.exceptions.ClientException
7 |
8 | object DataSourceReaderNeo4j35xTSE {
9 | @BeforeClass
10 | def checkNeo4jVersion() {
11 | Assume.assumeTrue(TestUtil.neo4jVersion().startsWith("3.5"))
12 | }
13 | }
14 |
15 | class DataSourceReaderNeo4j35xTSE extends SparkConnectorScalaBaseTSE {
16 | @Test
17 | def testShouldThrowClearErrorIfADbIsSpecified(): Unit = {
18 | try {
19 | ss.read.format(classOf[DataSource].getName)
20 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
21 | .option("database", "db1")
22 | .option("labels", "MATCH (h:Household) RETURN id(h)")
23 | .load()
24 | .show()
25 | }
26 | catch {
27 | case clientException: ClientException => {
28 | assertTrue(clientException.getMessage.equals(
29 | "Database name parameter for selecting database is not supported in Bolt Protocol Version 3.0. Database name: 'db1'"
30 | ))
31 | }
32 | case generic => fail(s"should be thrown a ${classOf[SparkException].getName}, got ${generic.getClass} instead")
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/common/src/main/scala/org/neo4j/spark/util/DriverCache.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | import org.neo4j.driver.{Driver, GraphDatabase}
4 | import org.neo4j.spark.util.DriverCache.{cache, jobIdCache}
5 |
6 | import java.util.concurrent.ConcurrentHashMap
7 | import java.util.{Collections, function}
8 |
9 | object DriverCache {
10 | private val cache: ConcurrentHashMap[Neo4jDriverOptions, Driver] = new ConcurrentHashMap[Neo4jDriverOptions, Driver]
11 | private val jobIdCache = Collections.newSetFromMap[String](new ConcurrentHashMap[String, java.lang.Boolean]())
12 | }
13 |
14 | class DriverCache(private val options: Neo4jDriverOptions, private val jobId: String) extends Serializable with AutoCloseable {
15 | def getOrCreate(): Driver = {
16 | this.synchronized {
17 | jobIdCache.add(jobId)
18 | cache.computeIfAbsent(options, new function.Function[Neo4jDriverOptions, Driver] {
19 | override def apply(t: Neo4jDriverOptions): Driver = GraphDatabase.driver(t.url, t.toNeo4jAuth, t.toDriverConfig)
20 | })
21 | }
22 | }
23 |
24 | def close(): Unit = {
25 | this.synchronized {
26 | jobIdCache.remove(jobId)
27 | if(jobIdCache.isEmpty) {
28 | val driver = cache.remove(options)
29 | if (driver != null) {
30 | Neo4jUtil.closeSafety(driver)
31 | }
32 | }
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jBatchWriter.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
5 | import org.apache.spark.sql.types.StructType
6 | import org.neo4j.spark.service.SchemaService
7 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions}
8 |
9 | class Neo4jBatchWriter(jobId: String,
10 | structType: StructType,
11 | saveMode: SaveMode,
12 | neo4jOptions: Neo4jOptions) extends BatchWrite{
13 | override def createBatchWriterFactory(physicalWriteInfo: PhysicalWriteInfo): DataWriterFactory = {
14 | val schemaService = new SchemaService(neo4jOptions, driverCache)
15 | schemaService.createOptimizations()
16 | val scriptResult = schemaService.execute(neo4jOptions.script)
17 | schemaService.close()
18 |
19 | new Neo4jDataWriterFactory(
20 | jobId,
21 | structType,
22 | saveMode,
23 | neo4jOptions,
24 | scriptResult
25 | )
26 | }
27 |
28 | private val driverCache = new DriverCache(neo4jOptions.connection, jobId)
29 |
30 | override def commit(messages: Array[WriterCommitMessage]): Unit = {
31 | driverCache.close()
32 | }
33 |
34 | override def abort(messages: Array[WriterCommitMessage]): Unit = {
35 | driverCache.close()
36 | }
37 | }
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/reader/SimpleScanBuilder.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
4 | import org.apache.spark.sql.sources.Filter
5 | import org.apache.spark.sql.types.StructType
6 | import org.neo4j.spark.util.{Neo4jOptions}
7 |
8 | class SimpleScanBuilder(neo4jOptions: Neo4jOptions, jobId: String, schema: StructType) extends ScanBuilder
9 | with SupportsPushDownFilters
10 | with SupportsPushDownRequiredColumns {
11 |
12 | private var filters: Array[Filter] = Array[Filter]()
13 |
14 | private var requiredColumns: StructType = new StructType()
15 |
16 | override def build(): Scan = {
17 | new SimpleScan(neo4jOptions, jobId, schema, filters, requiredColumns)
18 | }
19 |
20 | override def pushFilters(filtersArray: Array[Filter]): Array[Filter] = {
21 | if (neo4jOptions.pushdownFiltersEnabled) {
22 | filters = filtersArray
23 | }
24 |
25 | filtersArray
26 | }
27 |
28 | override def pushedFilters(): Array[Filter] = filters
29 |
30 | override def pruneColumns(requiredSchema: StructType): Unit = {
31 | requiredColumns = if (
32 | !neo4jOptions.pushdownColumnsEnabled || neo4jOptions.relationshipMetadata.nodeMap
33 | || requiredSchema == schema
34 | ) {
35 | new StructType()
36 | } else {
37 | requiredSchema
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/doc/docbook/content-map.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/spark-2.4/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | neo4j-connector-apache-spark_${scala.binary.version}_2.4
8 | neo4j-connector-apache-spark-2.4
9 | Spark 2.4 for Neo4j Connector for Apache Spark using the binary Bolt Driver
10 |
11 | 4.0.0
12 | jar
13 |
14 |
15 | neo4j-contrib
16 | neo4j-connector-apache-spark
17 | 4.0.0
18 |
19 |
20 |
21 | 2.4.5
22 |
23 |
24 |
25 |
26 | neo4j-contrib
27 | neo4j-connector-apache-spark_${scala.binary.version}_common
28 | 4.0.0
29 |
30 |
31 | neo4j-contrib
32 | neo4j-connector-apache-spark_${scala.binary.version}_test-support
33 | 4.0.0
34 | test
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/spark-3.0/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | neo4j-connector-apache-spark_${scala.binary.version}_3.0
8 | neo4j-connector-apache-spark-3.0
9 | Spark 3.0 for Neo4j Connector for Apache Spark using the binary Bolt Driver
10 |
11 | 4.0.0
12 | jar
13 |
14 |
15 | neo4j-contrib
16 | neo4j-connector-apache-spark
17 | 4.0.0
18 |
19 |
20 |
21 | 3.0.1
22 |
23 |
24 |
25 |
26 | neo4j-contrib
27 | neo4j-connector-apache-spark_${scala.binary.version}_common
28 | 4.0.0
29 |
30 |
31 | neo4j-contrib
32 | neo4j-connector-apache-spark_${scala.binary.version}_test-support
33 | 4.0.0
34 | test
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/doc/asciidoc/neo4j-cluster/index.adoc:
--------------------------------------------------------------------------------
1 |
2 | [#neo4j_causal_cluster]
3 | == Using with Neo4j Causal Cluster
4 |
5 | ifdef::env-docs[]
6 | [abstract]
7 | --
8 | This chapter describes considerations around using Neo4j Connector for Apache Spark with Neo4j Enterprise Causal Cluster.
9 | --
10 | endif::env-docs[]
11 |
12 | === Overview
13 |
14 | link:https://neo4j.com/docs/operations-manual/current/clustering/[Neo4j Clustering] is a feature available in
15 | Enterprise Edition which allows high availability of the database through having multiple database members.
16 |
17 | Neo4j Enterprise uses a link:https://neo4j.com/docs/operations-manual/current/clustering/introduction/#causal-clustering-introduction-operational[LEADER/FOLLOWER]
18 | operational view, where writes are always processed by the leader, while reads can be serviced by either followers,
19 | or optionally be read replicas, which maintain a copy of the database and serve to scale out read operations
20 | horizontally.
21 |
22 | === Remote Clients
23 |
24 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use
25 | streams functionality. Best practices in these cases are:
26 |
27 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application.
28 | * Use link:https://neo4j.com/docs/driver-manual/current/sessions-transactions/#driver-transactions[Explicit Write Transactions] in
29 | your client application when using procedure calls such as `CALL streams.consume` to ensure that the routing
30 | driver routes the query to the leader.
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/DataSource.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.{Optional, UUID}
4 | import org.apache.spark.sql.SaveMode
5 | import org.apache.spark.sql.sources.DataSourceRegister
6 | import org.apache.spark.sql.sources.v2.writer.DataSourceWriter
7 | import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, ReadSupport, WriteSupport}
8 | import org.apache.spark.sql.types.StructType
9 | import org.neo4j.spark.reader.Neo4jDataSourceReader
10 | import org.neo4j.spark.util.Neo4jOptions
11 | import org.neo4j.spark.writer.Neo4jDataSourceWriter
12 |
13 | class DataSource extends DataSourceV2 with ReadSupport with DataSourceRegister with WriteSupport {
14 |
15 | private val jobId: String = UUID.randomUUID().toString
16 |
17 | def createReader(options: DataSourceOptions) = new Neo4jDataSourceReader(options, jobId)
18 |
19 | override def shortName: String = "neo4j"
20 |
21 | override def createWriter(jobId: String,
22 | structType: StructType,
23 | saveMode: SaveMode,
24 | options: DataSourceOptions): Optional[DataSourceWriter] =
25 | if (Neo4jOptions.SUPPORTED_SAVE_MODES.contains(saveMode)) {
26 | Optional.of(new Neo4jDataSourceWriter(jobId, structType, saveMode, options))
27 | } else {
28 | throw new IllegalArgumentException(
29 | s"""Unsupported SaveMode.
30 | |You provided $saveMode, supported are:
31 | |${Neo4jOptions.SUPPORTED_SAVE_MODES.mkString(",")}
32 | |""".stripMargin)
33 | }
34 | }
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jWriterBuilder.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.connector.write.{BatchWrite, SupportsOverwrite, SupportsTruncate, WriteBuilder}
5 | import org.apache.spark.sql.sources.Filter
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.driver.AccessMode
8 | import org.neo4j.spark.util.{Neo4jOptions, NodeSaveMode, ValidationUtil, Validations}
9 |
10 | class Neo4jWriterBuilder(jobId: String,
11 | structType: StructType,
12 | saveMode: SaveMode,
13 | neo4jOptions: Neo4jOptions) extends WriteBuilder
14 | with SupportsOverwrite
15 | with SupportsTruncate {
16 |
17 | def validOptions(): Neo4jOptions = {
18 | neo4jOptions.validate(neo4jOptions =>
19 | Validations.writer(neo4jOptions, jobId, saveMode, (o: Neo4jOptions) => {
20 | ValidationUtil.isFalse(
21 | o.relationshipMetadata.sourceSaveMode.equals(NodeSaveMode.ErrorIfExists)
22 | && o.relationshipMetadata.targetSaveMode.equals(NodeSaveMode.ErrorIfExists),
23 | "Save mode 'ErrorIfExists' is not supported on Spark 3.0, use 'Append' instead.")
24 | }))
25 | }
26 |
27 | override def buildForBatch(): BatchWrite = new Neo4jBatchWriter(jobId,
28 | structType,
29 | saveMode,
30 | validOptions()
31 | )
32 |
33 | override def overwrite(filters: Array[Filter]): WriteBuilder = {
34 | new Neo4jWriterBuilder(jobId, structType, SaveMode.Overwrite, neo4jOptions)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/doc/javascript/colorize.js:
--------------------------------------------------------------------------------
1 | // CodeMirror, copyright (c) by Marijn Haverbeke and others
2 | // Distributed under an MIT license: http://codemirror.net/LICENSE
3 | // Modified by the Neo4j team.
4 |
5 | "use strict";
6 |
7 | CodeMirror.colorize = (function() {
8 |
9 | var isBlock = /^(p|li|div|h\\d|pre|blockquote|td)$/;
10 |
11 | function textContent(node, out) {
12 | if (node.nodeType == 3) return out.push(node.nodeValue);
13 | for (var ch = node.firstChild; ch; ch = ch.nextSibling) {
14 | textContent(ch, out);
15 | if (isBlock.test(node.nodeType)) out.push("\n");
16 | }
17 | }
18 |
19 | return function() {
20 | var collection = document.body.getElementsByTagName("code");
21 |
22 | for (var i = 0; i < collection.length; ++i) {
23 | var theme = " cm-s-default";
24 | var node = collection[i];
25 | var mode = node.getAttribute("data-lang");
26 | if (!mode) continue;
27 | if (mode === "cypher") {
28 | theme = " cm-s-neo";
29 | } else if (mode === "cypher-noexec") {
30 | mode = "cypher";
31 | theme = " cm-s-neo";
32 | } else if (mode === "java") {
33 | mode = "text/x-java";
34 | } else if (mode === "csharp") {
35 | mode = "text/x-csharp";
36 | } else if (mode === "sql") {
37 | mode = "text/x-sql";
38 | } else if (mode === "properties") {
39 | mode = "text/x-properties";
40 | } else if (mode === "json") {
41 | mode = "application/json";
42 | }
43 |
44 | var text = [];
45 | textContent(node, text);
46 | node.innerHTML = "";
47 | CodeMirror.runMode(text.join(""), mode, node);
48 |
49 | node.className += theme;
50 | }
51 | };
52 | })();
53 |
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataSourceWriter.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.sql.SaveMode
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.v2.DataSourceOptions
6 | import org.apache.spark.sql.sources.v2.writer.{DataSourceWriter, DataWriterFactory, WriterCommitMessage}
7 | import org.apache.spark.sql.types.StructType
8 | import org.neo4j.driver.AccessMode
9 | import org.neo4j.spark.service.SchemaService
10 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, NodeSaveMode, ValidationUtil, Validations}
11 |
12 | class Neo4jDataSourceWriter(jobId: String,
13 | structType: StructType,
14 | saveMode: SaveMode,
15 | options: DataSourceOptions) extends DataSourceWriter {
16 |
17 | private val optionsMap = options.asMap()
18 | optionsMap.put(Neo4jOptions.ACCESS_MODE, AccessMode.WRITE.toString)
19 |
20 | private val neo4jOptions: Neo4jOptions = new Neo4jOptions(optionsMap)
21 | .validate((neo4jOptions: Neo4jOptions) => Validations.writer(neo4jOptions, jobId, saveMode, _ => Unit))
22 |
23 | private val driverCache = new DriverCache(neo4jOptions.connection, jobId)
24 |
25 | override def createWriterFactory(): DataWriterFactory[InternalRow] = {
26 | val schemaService = new SchemaService(neo4jOptions, driverCache)
27 | schemaService.createOptimizations()
28 | val scriptResult = schemaService.execute(neo4jOptions.script)
29 | schemaService.close()
30 | new Neo4jDataWriterFactory(jobId, structType, saveMode, neo4jOptions, scriptResult)
31 | }
32 |
33 | override def commit(messages: Array[WriterCommitMessage]): Unit = {
34 | driverCache.close()
35 | }
36 |
37 | override def abort(messages: Array[WriterCommitMessage]): Unit = {
38 | driverCache.close()
39 | }
40 | }
--------------------------------------------------------------------------------
/test-support/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | neo4j-connector-apache-spark_${scala.binary.version}_test-support
8 | neo4j-connector-apache-spark-test-support
9 | Test Utilities for Neo4j Connector for Apache Spark using the binary Bolt Driver
10 |
11 | 4.0.0
12 | jar
13 |
14 |
15 | neo4j-contrib
16 | neo4j-connector-apache-spark
17 | 4.0.0
18 |
19 |
20 |
21 |
22 |
23 | junit
24 | junit
25 | 4.13.1
26 |
27 |
28 |
29 | org.hamcrest
30 | hamcrest-library
31 | 1.3
32 |
33 |
34 |
35 | org.testcontainers
36 | testcontainers
37 | ${testcontainers.version}
38 |
39 |
40 |
41 | org.testcontainers
42 | neo4j
43 | ${testcontainers.version}
44 |
45 |
46 |
47 | com.fasterxml.jackson.core
48 | jackson-annotations
49 | 2.12.0
50 |
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/Neo4jTable.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.internal.Logging
4 | import org.apache.spark.sql.SaveMode
5 | import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
6 | import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
7 | import org.apache.spark.sql.types.StructType
8 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
9 | import org.neo4j.driver.AccessMode
10 | import org.neo4j.spark.reader.SimpleScanBuilder
11 | import org.neo4j.spark.util.{Neo4jOptions, Validations}
12 | import org.neo4j.spark.writer.Neo4jWriterBuilder
13 |
14 | import scala.collection.JavaConverters._
15 |
16 | class Neo4jTable(schema: StructType, options: java.util.Map[String, String], jobId: String) extends Table
17 | with SupportsRead
18 | with SupportsWrite
19 | with Logging {
20 |
21 | private val neo4jOptions = new Neo4jOptions(options)
22 |
23 | override def name(): String = neo4jOptions.getTableName
24 |
25 | override def schema(): StructType = schema
26 |
27 | override def capabilities(): java.util.Set[TableCapability] = Set(
28 | TableCapability.BATCH_READ,
29 | TableCapability.BATCH_WRITE,
30 | TableCapability.ACCEPT_ANY_SCHEMA,
31 | TableCapability.OVERWRITE_BY_FILTER,
32 | TableCapability.OVERWRITE_DYNAMIC
33 | ).asJava
34 |
35 | override def newScanBuilder(options: CaseInsensitiveStringMap): SimpleScanBuilder = {
36 | val validOptions = neo4jOptions.validate(neo4jOptions => Validations.read(neo4jOptions, jobId))
37 | new SimpleScanBuilder(validOptions, jobId, schema())
38 | }
39 |
40 | override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
41 | val mapOptions = new java.util.HashMap[String, String](options)
42 | mapOptions.put(Neo4jOptions.ACCESS_MODE, AccessMode.WRITE.toString)
43 | val writeNeo4jOptions = new Neo4jOptions(mapOptions)
44 | new Neo4jWriterBuilder(jobId, info.schema(), SaveMode.Append, writeNeo4jOptions)
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j4xWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.{Assume, BeforeClass, Test}
5 | import org.neo4j.driver.summary.ResultSummary
6 | import org.neo4j.driver.{SessionConfig, Transaction, TransactionWork}
7 |
8 | object DataSourceReaderNeo4j4xWithApocTSE {
9 | @BeforeClass
10 | def checkNeo4jVersion() {
11 | Assume.assumeFalse(TestUtil.neo4jVersion().startsWith("3.5"))
12 | }
13 | }
14 |
15 | class DataSourceReaderNeo4j4xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE {
16 |
17 | @Test
18 | def testMultiDbJoin(): Unit = {
19 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db1"))
20 | .writeTransaction(
21 | new TransactionWork[ResultSummary] {
22 | override def execute(tx: Transaction): ResultSummary = tx.run(
23 | """
24 | CREATE (p1:Person:Customer {name: 'John Doe'}),
25 | (p2:Person:Customer {name: 'Mark Brown'}),
26 | (p3:Person:Customer {name: 'Cindy White'})
27 | """).consume()
28 | })
29 |
30 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db2"))
31 | .writeTransaction(
32 | new TransactionWork[ResultSummary] {
33 | override def execute(tx: Transaction): ResultSummary = tx.run(
34 | """
35 | CREATE (p1:Person:Employee {name: 'Jane Doe'}),
36 | (p2:Person:Employee {name: 'John Doe'})
37 | """).consume()
38 | })
39 |
40 | val df1 = ss.read.format(classOf[DataSource].getName)
41 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
42 | .option("database", "db1")
43 | .option("labels", "Person")
44 | .load()
45 |
46 | val df2 = ss.read.format(classOf[DataSource].getName)
47 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
48 | .option("database", "db2")
49 | .option("labels", "Person")
50 | .load()
51 |
52 | assertEquals(3, df1.count())
53 | assertEquals(2, df2.count())
54 |
55 | val dfJoin = df1.join(df2, df1("name") === df2("name"))
56 | assertEquals(1, dfJoin.count())
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j4xWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.{Assume, BeforeClass, Test}
5 | import org.neo4j.driver.summary.ResultSummary
6 | import org.neo4j.driver.{SessionConfig, Transaction, TransactionWork}
7 |
8 | object DataSourceReaderNeo4j4xWithApocTSE {
9 | @BeforeClass
10 | def checkNeo4jVersion() {
11 | Assume.assumeFalse(TestUtil.neo4jVersion().startsWith("3.5"))
12 | }
13 | }
14 |
15 | class DataSourceReaderNeo4j4xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE {
16 |
17 | @Test
18 | def testMultiDbJoin(): Unit = {
19 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db1"))
20 | .writeTransaction(
21 | new TransactionWork[ResultSummary] {
22 | override def execute(tx: Transaction): ResultSummary = tx.run(
23 | """
24 | CREATE (p1:Person:Customer {name: 'John Doe'}),
25 | (p2:Person:Customer {name: 'Mark Brown'}),
26 | (p3:Person:Customer {name: 'Cindy White'})
27 | """).consume()
28 | })
29 |
30 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db2"))
31 | .writeTransaction(
32 | new TransactionWork[ResultSummary] {
33 | override def execute(tx: Transaction): ResultSummary = tx.run(
34 | """
35 | CREATE (p1:Person:Employee {name: 'Jane Doe'}),
36 | (p2:Person:Employee {name: 'John Doe'})
37 | """).consume()
38 | })
39 |
40 | val df1 = ss.read.format(classOf[DataSource].getName)
41 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
42 | .option("database", "db1")
43 | .option("labels", "Person")
44 | .load()
45 |
46 | val df2 = ss.read.format(classOf[DataSource].getName)
47 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl)
48 | .option("database", "db2")
49 | .option("labels", "Person")
50 | .load()
51 |
52 | assertEquals(3, df1.count())
53 | assertEquals(2, df2.count())
54 |
55 | val dfJoin = df1.join(df2, df1("name") === df2("name"))
56 | assertEquals(1, dfJoin.count())
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 | import org.apache.spark.sql.SparkSession
5 | import org.apache.spark.SparkConf
6 | import org.hamcrest.Matchers
7 | import org.junit._
8 | import org.junit.rules.TestName
9 | import org.neo4j.driver.summary.ResultSummary
10 | import org.neo4j.driver.{Transaction, TransactionWork}
11 |
12 | object SparkConnectorScalaBaseTSE {
13 |
14 | private var startedFromSuite = true
15 |
16 | @BeforeClass
17 | def setUpContainer() = {
18 | if (!SparkConnectorScalaSuiteIT.server.isRunning) {
19 | startedFromSuite = false
20 | SparkConnectorScalaSuiteIT.setUpContainer()
21 | }
22 | }
23 |
24 | @AfterClass
25 | def tearDownContainer() = {
26 | if (!startedFromSuite) {
27 | SparkConnectorScalaSuiteIT.tearDownContainer()
28 | }
29 | }
30 |
31 | }
32 |
33 | class SparkConnectorScalaBaseTSE {
34 |
35 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf
36 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss
37 |
38 | val _testName: TestName = new TestName
39 |
40 | @Rule
41 | def testName = _testName
42 |
43 | @Before
44 | def before() {
45 | SparkConnectorScalaSuiteIT.session()
46 | .writeTransaction(new TransactionWork[ResultSummary] {
47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
48 | })
49 | }
50 |
51 | @After
52 | def after() {
53 | if (!TestUtil.isTravis()) {
54 | try {
55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
56 | override def get(): Boolean = {
57 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
58 | SparkConnectorScalaSuiteIT.connections == afterConnections
59 | }
60 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS)
61 | } finally {
62 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
63 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes
64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections")
65 | }
66 | }
67 | }
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/DataSource.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.UUID
4 | import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
5 | import org.apache.spark.sql.connector.expressions.Transform
6 | import org.apache.spark.sql.sources.DataSourceRegister
7 | import org.apache.spark.sql.types.StructType
8 | import org.apache.spark.sql.util.CaseInsensitiveStringMap
9 | import org.neo4j.spark.service.SchemaService
10 | import org.neo4j.spark.util.Validations.validateConnection
11 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions}
12 |
13 | class DataSource extends TableProvider
14 | with DataSourceRegister {
15 |
16 | private val jobId: String = UUID.randomUUID().toString
17 |
18 | private var schema: StructType = null
19 |
20 | private var neo4jOptions: Neo4jOptions = null
21 |
22 | private def callSchemaService[T](neo4jOptions: Neo4jOptions, function: SchemaService => T): T = {
23 | val driverCache = new DriverCache(neo4jOptions.connection, jobId)
24 | val schemaService = new SchemaService(neo4jOptions, driverCache)
25 | try {
26 | validateConnection(driverCache.getOrCreate().session(neo4jOptions.session.toNeo4jSession))
27 | function(schemaService)
28 | } catch {
29 | case e: Throwable =>
30 | throw e
31 | } finally {
32 | schemaService.close()
33 | driverCache.close()
34 | }
35 | }
36 |
37 | override def inferSchema(caseInsensitiveStringMap: CaseInsensitiveStringMap): StructType = {
38 | if (schema == null) {
39 | schema = callSchemaService(getNeo4jOptions(caseInsensitiveStringMap), { schemaService => schemaService.struct() })
40 | }
41 |
42 | schema
43 | }
44 |
45 | private def getNeo4jOptions(caseInsensitiveStringMap: CaseInsensitiveStringMap) = {
46 | if(neo4jOptions == null) {
47 | neo4jOptions = new Neo4jOptions(caseInsensitiveStringMap.asCaseSensitiveMap())
48 | }
49 |
50 | neo4jOptions
51 | }
52 |
53 | override def getTable(structType: StructType, transforms: Array[Transform], map: java.util.Map[String, String]): Table = {
54 | val caseInsensitiveStringMapNeo4jOptions = new CaseInsensitiveStringMap(map);
55 | new Neo4jTable(inferSchema(caseInsensitiveStringMapNeo4jOptions), map, jobId)
56 | }
57 |
58 | override def shortName(): String = "neo4j"
59 | }
60 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 | import org.apache.spark.sql.SparkSession
5 | import org.apache.spark.SparkConf
6 | import org.hamcrest.Matchers
7 | import org.junit._
8 | import org.junit.rules.TestName
9 | import org.neo4j.driver.summary.ResultSummary
10 | import org.neo4j.driver.{Transaction, TransactionWork}
11 |
12 | object SparkConnectorScalaBaseTSE {
13 |
14 | private var startedFromSuite = true
15 |
16 | @BeforeClass
17 | def setUpContainer() = {
18 | if (!SparkConnectorScalaSuiteIT.server.isRunning) {
19 | startedFromSuite = false
20 | SparkConnectorScalaSuiteIT.setUpContainer()
21 | }
22 | }
23 |
24 | @AfterClass
25 | def tearDownContainer() = {
26 | if (!startedFromSuite) {
27 | SparkConnectorScalaSuiteIT.tearDownContainer()
28 | }
29 | }
30 |
31 | }
32 |
33 | class SparkConnectorScalaBaseTSE {
34 |
35 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf
36 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss
37 |
38 | val _testName: TestName = new TestName
39 |
40 | @Rule
41 | def testName = _testName
42 |
43 | @Before
44 | def before() {
45 | SparkConnectorScalaSuiteIT.session()
46 | .writeTransaction(new TransactionWork[ResultSummary] {
47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
48 | })
49 | }
50 |
51 | @After
52 | def after() {
53 | if (!TestUtil.isTravis()) {
54 | try {
55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
56 | override def get(): Boolean = {
57 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
58 | SparkConnectorScalaSuiteIT.connections == afterConnections
59 | }
60 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS)
61 | } finally {
62 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
63 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes
64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections")
65 | }
66 | }
67 | }
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 |
5 | import org.apache.spark.sql.SparkSession
6 | import org.apache.spark.SparkConf
7 | import org.hamcrest.Matchers
8 | import org.junit._
9 | import org.junit.rules.TestName
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.driver.{Transaction, TransactionWork}
12 |
13 | object SparkConnectorScalaBaseTSE {
14 |
15 | private var startedFromSuite = true
16 |
17 | @BeforeClass
18 | def setUpContainer() = {
19 | if (!SparkConnectorScalaSuiteIT.server.isRunning) {
20 | startedFromSuite = false
21 | SparkConnectorScalaSuiteIT.setUpContainer()
22 | }
23 | }
24 |
25 | @AfterClass
26 | def tearDownContainer() = {
27 | if (!startedFromSuite) {
28 | SparkConnectorScalaSuiteIT.tearDownContainer()
29 | }
30 | }
31 |
32 | }
33 |
34 | class SparkConnectorScalaBaseTSE {
35 |
36 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf
37 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss
38 |
39 | val _testName: TestName = new TestName
40 |
41 | @Rule
42 | def testName = _testName
43 |
44 | @Before
45 | def before() {
46 | SparkConnectorScalaSuiteIT.session()
47 | .writeTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
49 | })
50 | }
51 |
52 | @After
53 | def after() {
54 | if (!TestUtil.isTravis()) {
55 | try {
56 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
57 | override def get(): Boolean = {
58 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
59 | SparkConnectorScalaSuiteIT.connections == afterConnections
60 | }
61 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS)
62 | } finally {
63 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections
64 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes
65 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections")
66 | }
67 | }
68 | }
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 | import org.apache.spark.sql.SparkSession
5 | import org.apache.spark.SparkConf
6 | import org.hamcrest.Matchers
7 | import org.junit._
8 | import org.junit.rules.TestName
9 | import org.neo4j.driver.summary.ResultSummary
10 | import org.neo4j.driver.{Transaction, TransactionWork}
11 |
12 | object SparkConnectorScalaBaseWithApocTSE {
13 |
14 | private var startedFromSuite = true
15 |
16 | @BeforeClass
17 | def setUpContainer() = {
18 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) {
19 | startedFromSuite = false
20 | SparkConnectorScalaSuiteWithApocIT.setUpContainer()
21 | }
22 | }
23 |
24 | @AfterClass
25 | def tearDownContainer() = {
26 | if (!startedFromSuite) {
27 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer()
28 | }
29 | }
30 |
31 | }
32 |
33 | class SparkConnectorScalaBaseWithApocTSE {
34 |
35 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf
36 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss
37 |
38 | val _testName: TestName = new TestName
39 |
40 | @Rule
41 | def testName = _testName
42 |
43 | @Before
44 | def before() {
45 | SparkConnectorScalaSuiteWithApocIT.session()
46 | .writeTransaction(new TransactionWork[ResultSummary] {
47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
48 | })
49 | }
50 |
51 | @After
52 | def after() {
53 | if (!TestUtil.isTravis()) {
54 | try {
55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
56 | override def get(): Boolean = {
57 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
58 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections
59 | }
60 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS)
61 | } finally {
62 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
63 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes
64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections")
65 | }
66 | }
67 | }
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 | import org.apache.spark.sql.SparkSession
5 | import org.apache.spark.SparkConf
6 | import org.hamcrest.Matchers
7 | import org.junit._
8 | import org.junit.rules.TestName
9 | import org.neo4j.driver.summary.ResultSummary
10 | import org.neo4j.driver.{Transaction, TransactionWork}
11 |
12 | object SparkConnectorScalaBaseWithApocTSE {
13 |
14 | private var startedFromSuite = true
15 |
16 | @BeforeClass
17 | def setUpContainer() = {
18 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) {
19 | startedFromSuite = false
20 | SparkConnectorScalaSuiteWithApocIT.setUpContainer()
21 | }
22 | }
23 |
24 | @AfterClass
25 | def tearDownContainer() = {
26 | if (!startedFromSuite) {
27 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer()
28 | }
29 | }
30 |
31 | }
32 |
33 | class SparkConnectorScalaBaseWithApocTSE {
34 |
35 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf
36 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss
37 |
38 | val _testName: TestName = new TestName
39 |
40 | @Rule
41 | def testName = _testName
42 |
43 | @Before
44 | def before() {
45 | SparkConnectorScalaSuiteWithApocIT.session()
46 | .writeTransaction(new TransactionWork[ResultSummary] {
47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
48 | })
49 | }
50 |
51 | @After
52 | def after() {
53 | if (!TestUtil.isTravis()) {
54 | try {
55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
56 | override def get(): Boolean = {
57 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
58 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections
59 | }
60 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS)
61 | } finally {
62 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
63 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes
64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections")
65 | }
66 | }
67 | }
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import java.util.concurrent.TimeUnit
4 |
5 | import org.apache.spark.sql.SparkSession
6 | import org.apache.spark.SparkConf
7 | import org.hamcrest.Matchers
8 | import org.junit._
9 | import org.junit.rules.TestName
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.driver.{Transaction, TransactionWork}
12 |
13 | object SparkConnectorScalaBaseWithApocTSE {
14 |
15 | private var startedFromSuite = true
16 |
17 | @BeforeClass
18 | def setUpContainer() = {
19 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) {
20 | startedFromSuite = false
21 | SparkConnectorScalaSuiteWithApocIT.setUpContainer()
22 | }
23 | }
24 |
25 | @AfterClass
26 | def tearDownContainer() = {
27 | if (!startedFromSuite) {
28 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer()
29 | }
30 | }
31 |
32 | }
33 |
34 | class SparkConnectorScalaBaseWithApocTSE {
35 |
36 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf
37 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss
38 |
39 | val _testName: TestName = new TestName
40 |
41 | @Rule
42 | def testName = _testName
43 |
44 | @Before
45 | def before() {
46 | SparkConnectorScalaSuiteWithApocIT.session()
47 | .writeTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume()
49 | })
50 | }
51 |
52 | @After
53 | def after() {
54 | if (!TestUtil.isTravis()) {
55 | try {
56 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] {
57 | override def get(): Boolean = {
58 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
59 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections
60 | }
61 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS)
62 | } finally {
63 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections
64 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes
65 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections")
66 | }
67 | }
68 | }
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/graphkeys.adoc:
--------------------------------------------------------------------------------
1 | [#graphkeys]
2 | = Graph Keys
3 |
4 | When using the connector to write data, it's necessary to indicate which elements of the dataframe correspond to
5 | the identifying properties / keys of the node that you're writing.
6 |
7 | In the link:writing.html[Writing] section, the following options were discussed, applying to the "Keys" strategy.
8 |
9 | * `node.keys`
10 | * `relationship.source.node.keys`
11 | * `relationship.target.node.keys`
12 |
13 | The following sections describe how to use key mappings to express the connection between DataFrame format and desired graph schema.
14 |
15 | == Graph Key Format
16 |
17 | Each of these fields is a comma-separated list of keys, such as `field1,field2`. In turn, each of the
18 | keys themselves can contain a _mapping_ from a DataFrame attribute to a node property, such as `EventID:id`.
19 |
20 | This mapping is always expressed in the order `DataFrameID:NodeID`, and allows for the data frame column name,
21 | and the Neo4j node property name to differ.
22 |
23 | == Simple Example
24 |
25 | Probably the most common example will be to simply provide the name of a single attribute in the DataFrame;
26 | the node will receive a property of the same name.
27 |
28 | ```
29 | my_person_dataframe.write
30 | .format("org.neo4j.spark.DataSource")
31 | .mode(SaveMode.Overwrite)
32 | .option("url", "bolt://localhost:7687")
33 | .option("labels", ":Person")
34 | .option("node.keys", "id")
35 | .save()
36 | ```
37 |
38 | == Complex Example
39 |
40 | For example, let's say that we wanted to write a dataframe of "Location" nodes. Imagine we had a dataframe
41 | that looked like this:
42 |
43 | ```
44 | LocationName,LocationType
45 | USA,Country
46 | Richmond,City
47 | ```
48 |
49 | Further, let's assume that we need a compound key (both attributes must be used to uniquely identify a node)
50 | and that we want to use simpler names on node properties, so that we end up with Neo4j nodes like this:
51 |
52 | ```
53 | (:Location { name: 'USA', type: 'Country' })
54 | (:Location { name: 'Richmond', type: 'City' })
55 | ```
56 |
57 | In order to do this, we would use the Graph Key expression of `"LocationName:name,LocationType:type"`
58 |
59 | ```
60 | locations_dataframe.write
61 | .format("org.neo4j.spark.DataSource")
62 | .mode(SaveMode.Overwrite)
63 | .option("url", "bolt://localhost:7687")
64 | .option("labels", ":Location")
65 | .option("node.keys", "LocationName:name,LocationType:type")
66 | .save()
67 | ```
68 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/python.adoc:
--------------------------------------------------------------------------------
1 | = Using with Pyspark / Python
2 |
3 | [abstract]
4 | --
5 | This chapter provides an information on using the Neo4j Connector for Apache Spark with Python
6 | --
7 |
8 | This connector uses the link:https://jaceklaskowski.gitbooks.io/mastering-spark-sql/content/spark-sql-data-source-api-v2.html[DataSource V2 API] in
9 | Spark.
10 |
11 | With a properly configured pyspark interpreter, you should be able to use python to call the connector and do any/all spark
12 | work.
13 |
14 | Here, we present examples of what the API looks like in scala versus Python, to aid adaptation of any code examples you might have, and get
15 | started quickly.
16 |
17 | This first listing is a simple program that reads all "Person" nodes out of a Neo4j instance into a dataframe, in Scala.
18 |
19 | [source,scala]
20 | ----
21 | import org.apache.spark.sql.{SaveMode, SparkSession}
22 |
23 | val spark = SparkSession.builder().getOrCreate()
24 |
25 | spark.read.format("org.neo4j.spark.DataSource")
26 | .option("url", "bolt://localhost:7687")
27 | .option("labels", "Person:Customer:Confirmed")
28 | .load()
29 | ----
30 |
31 | Here is the same program in Python:
32 |
33 | [source,python]
34 | ----
35 | spark.read.format("org.neo4j.spark.DataSource") \
36 | .option("url", "bolt://localhost:7687") \
37 | .option("labels", "Person:Customer:Confirmed") \
38 | .load()
39 | ----
40 |
41 | For the most part, the API is the same, and we are only adapting the syntax for Python, by adding backslashes to allow line continuance,
42 | and avoid running into Python's indentation rules.
43 |
44 | == API Differences
45 |
46 | Some common API constants may need to be referred to as strings in the pyspark API. Consider these two examples in Scala & Python,
47 | focusing on the `SaveMode`.
48 |
49 | [source,scala]
50 | ----
51 | import org.apache.spark.sql.{SaveMode, SparkSession}
52 |
53 | df.write
54 | .format("org.neo4j.spark.DataSource")
55 | .mode(SaveMode.ErrorIfExists)
56 | .option("url", "bolt://localhost:7687")
57 | .option("labels", ":Person")
58 | .save()
59 | ----
60 |
61 | The same program in python is very similar, again just with language syntax differences, but note the "mode":
62 |
63 | [source,python]
64 | ----
65 | import org.apache.spark.sql.{SaveMode, SparkSession}
66 |
67 | df.write \
68 | .format("org.neo4j.spark.DataSource") \
69 | .mode("ErrorIfExists") \
70 | .option("url", "bolt://localhost:7687") \
71 | .option("labels", ":Person") \
72 | .save()
73 | ----
74 |
--------------------------------------------------------------------------------
/doc/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 |
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 |
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 |
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 |
61 | set CMD_LINE_ARGS=%*
62 |
63 | :execute
64 | @rem Setup the command line
65 |
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 |
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 |
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 |
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 |
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 |
84 | :omega
85 |
--------------------------------------------------------------------------------
/spark-3.0/src/main/scala/org/neo4j/spark/reader/SimpleScan.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan}
4 | import org.apache.spark.sql.sources.Filter
5 | import org.apache.spark.sql.types.StructType
6 | import org.neo4j.spark.service.{PartitionSkipLimit, SchemaService}
7 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions}
8 |
9 | import scala.collection.JavaConverters.seqAsJavaListConverter
10 |
11 | case class Neo4jPartition(partitionSkipLimit: PartitionSkipLimit) extends InputPartition
12 |
13 | class SimpleScan(
14 | neo4jOptions: Neo4jOptions,
15 | jobId: String,
16 | schema: StructType,
17 | filters: Array[Filter],
18 | requiredColumns: StructType
19 | ) extends Scan with Batch {
20 |
21 | override def toBatch: Batch = this
22 |
23 | var scriptResult: java.util.List[java.util.Map[String, AnyRef]] = _
24 |
25 | private def callSchemaService[T](function: SchemaService => T): T = {
26 | val driverCache = new DriverCache(neo4jOptions.connection, jobId)
27 | val schemaService = new SchemaService(neo4jOptions, driverCache)
28 | var hasError = false
29 | try {
30 | function(schemaService)
31 | } catch {
32 | case e: Throwable =>
33 | hasError = true
34 | throw e
35 | } finally {
36 | schemaService.close()
37 | if (hasError) {
38 | driverCache.close()
39 | }
40 | }
41 | }
42 |
43 | private def createPartitions() = {
44 | // we get the skip/limit for each partition and execute the "script"
45 | val (partitionSkipLimitList, scriptResult) = callSchemaService { schemaService =>
46 | (schemaService.skipLimitFromPartition(), schemaService.execute(neo4jOptions.script))
47 | }
48 | // we generate a partition for each element
49 | this.scriptResult = scriptResult
50 | partitionSkipLimitList
51 | .map(partitionSkipLimit => Neo4jPartition(partitionSkipLimit))
52 | }
53 |
54 | override def planInputPartitions(): Array[InputPartition] = {
55 | val neo4jPartitions: Seq[Neo4jPartition] = createPartitions()
56 | neo4jPartitions.toArray
57 | }
58 |
59 | override def createReaderFactory(): PartitionReaderFactory = {
60 | new SimplePartitionReaderFactory(
61 | neo4jOptions, filters, schema, jobId, scriptResult, requiredColumns
62 | )
63 | }
64 |
65 | override def readSchema(): StructType = schema
66 | }
67 |
--------------------------------------------------------------------------------
/common/src/main/scala/org/neo4j/spark/reader/BasePartitionReader.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.internal.Logging
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.Filter
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.driver.{Record, Session, Transaction, Values}
8 | import org.neo4j.spark.service.{MappingService, Neo4jQueryReadStrategy, Neo4jQueryService, Neo4jQueryStrategy, Neo4jReadMappingStrategy, PartitionSkipLimit}
9 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, Neo4jUtil}
10 | import org.neo4j.spark.util.Neo4jImplicits.StructTypeImplicit
11 |
12 | import scala.collection.JavaConverters._
13 |
14 | abstract class BasePartitionReader(private val options: Neo4jOptions,
15 | private val filters: Array[Filter],
16 | private val schema: StructType,
17 | private val jobId: String,
18 | private val partitionSkipLimit: PartitionSkipLimit,
19 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
20 | private val requiredColumns: StructType) extends Logging {
21 | private var result: Iterator[Record] = _
22 | private var session: Session = _
23 | private var transaction: Transaction = _
24 | private val driverCache: DriverCache = new DriverCache(options.connection,
25 | if (partitionSkipLimit.partitionNumber > 0) s"$jobId-${partitionSkipLimit.partitionNumber}" else jobId)
26 |
27 | private val query: String = new Neo4jQueryService(options, new Neo4jQueryReadStrategy(filters, partitionSkipLimit, requiredColumns.getFieldsName))
28 | .createQuery()
29 |
30 | private val mappingService = new MappingService(new Neo4jReadMappingStrategy(options, requiredColumns), options)
31 |
32 | def next: Boolean = {
33 | if (result == null) {
34 | session = driverCache.getOrCreate().session(options.session.toNeo4jSession)
35 | transaction = session.beginTransaction()
36 | log.info(s"Running the following query on Neo4j: $query")
37 | result = transaction.run(query, Values
38 | .value(Map[String, AnyRef](Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT -> scriptResult).asJava))
39 | .asScala
40 | }
41 |
42 | result.hasNext
43 | }
44 |
45 | def get: InternalRow = mappingService.convert(result.next(), schema)
46 |
47 | def close(): Unit = {
48 | Neo4jUtil.closeSafety(transaction, log)
49 | Neo4jUtil.closeSafety(session, log)
50 | driverCache.close()
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/doc/javascript/versionswitcher.js:
--------------------------------------------------------------------------------
1 | jQuery( window ).load( function() {
2 | var location = window.location;
3 | versionSwitcher( jQuery );
4 | } );
5 |
6 | /**
7 | * Utility to browse different versions of the documentation. Requires the versions.js file loaded, which lists the
8 | * available (relevant) versions of a particular publication.
9 | */
10 | function versionSwitcher( $ )
11 | {
12 | $('.searchbox').hide();
13 | var MAX_STABLE_COUNT = 2;
14 | var DOCS_BASE_URL = window.docMeta.commonDocsBaseUri;
15 | var THIS_DOC_BASE_URI = window.docMeta.unversionedDocBaseUri;
16 |
17 | var currentVersion = window.docMeta.version;
18 | var currentPage = window.neo4jPageId;
19 |
20 | // TODO re-enable loadVersions();
21 |
22 | /**
23 | * Load an array of version into a div element and check if the current page actually exists in these versions.
24 | * Non-existing entries will be unlinked. Current version will be marked as such.
25 | */
26 | function loadVersions() {
27 | var $navHeader = $( 'header' );
28 | var $additionalVersions = $( '' );
29 | $.each( window.docMeta.availableDocVersions, function( index, version ) {
30 | if ( version === currentVersion ) {
31 | return;
32 | }
33 | else {
34 | addVersion( version, $additionalVersions );
35 | }
36 | } );
37 |
38 | var $dropdown = $( '' );
39 | $dropdown.children().first().append( $additionalVersions );
40 | $navHeader.append( $dropdown );
41 | }
42 |
43 | function addVersion( version, $container ) {
44 | var $optionWrapper = $( '' );
45 | var $newOption = $( '' + version + '' ).appendTo( $optionWrapper );
46 | var url = THIS_DOC_BASE_URI + version + '/' + currentPage;
47 | $container.append( $optionWrapper );
48 | checkUrlExistence( url, function() {
49 | $newOption.attr( 'href', url );
50 | $newOption.attr( 'title', 'See this page in version ' + version + '.' );
51 | }, function() {
52 | $newOption.attr( 'title', 'This page does not exist in version ' + version + '.' );
53 | $optionWrapper.addClass( 'disabled' );
54 | }
55 | );
56 | }
57 |
58 | /**
59 | * Check if a specific URL exists. The success and failure functions will be automatically called on finish.
60 | */
61 | function checkUrlExistence( url, success, failure ) {
62 | var settings = {
63 | 'type' : 'HEAD',
64 | 'async' : true,
65 | 'url' : url
66 | };
67 | if ( success )
68 | settings.success = success;
69 | if ( failure )
70 | settings.error = failure;
71 | $.ajax( settings );
72 | }
73 | }
74 | // vim: set ts=2 sw=2:
75 |
--------------------------------------------------------------------------------
/doc/css/extra.css:
--------------------------------------------------------------------------------
1 | .listingblock pre, .literalblock pre, .listingblock pre[class="highlight"], .listingblock pre[class^="highlight "], .listingblock pre.CodeRay, .listingblock pre.prettyprint {
2 | background: #f9f9f9;
3 | border: 1px solid #dddddd;;
4 | }
5 |
6 | #header, #content, #footnotes {
7 | max-width: 940px;
8 | }
9 |
10 | html, body {
11 | font-size: 16px;
12 | }
13 |
14 | #content ul.nav-tabs li {
15 | background: none;
16 | padding-left: 0;
17 | }
18 |
19 | .admonitionblock > table td.content {
20 | color: inherit;
21 | font-size: 1em;
22 | }
23 |
24 | .quoteblock.abstract blockquote {
25 | padding: 8px;
26 | background: none;
27 | border: 1px solid #666666;
28 | border-radius: 4px;
29 | }
30 |
31 | .comment {
32 | background: #fff3b8;
33 | }
34 |
35 | pre {
36 | line-height: 1;
37 | }
38 |
39 | pre code {
40 | font-size: .85em;
41 | line-height: 1;
42 | word-wrap: normal;
43 | overflow-wrap: normal;
44 | white-space: pre;
45 | }
46 |
47 | .tabbed-example .content > div:last-child,
48 | .tabbed-example .content > div:last-child p,
49 | .tabbed-example .content > div:last-child .highlight:last-child {
50 | margin-bottom: 0px;
51 | }
52 |
53 | html, body, p, ul, ol, dl {
54 | font-family: "Open Sans", "DejaVu Sans", sans-serif;
55 | }
56 |
57 | #header > h1:first-child, h1, h2, h3, h4, h5, h6, #toctitle {
58 | font-family: "Open Sans", "DejaVu Sans", sans-serif;
59 | color: #3d5360;
60 | font-weight: 300;
61 | }
62 |
63 | #toctitle, h3, h4, h5, h6 {
64 | font-weight: normal;
65 | color: #000000;
66 | }
67 |
68 | h1 {
69 | font-size: 42px;
70 | }
71 |
72 | h2 {
73 | font-size: 34px;
74 | }
75 |
76 | h3 {
77 | font-size: 28px;
78 | }
79 |
80 | h4 {
81 | font-size: 22px;
82 | }
83 |
84 | h5 {
85 | font-size: 20px;
86 | }
87 |
88 | h6 {
89 | font-size: 18px;
90 | }
91 |
92 | .h4, .h5, .h6, h4, h5, h6 {
93 | margin-top: 1.2em;
94 | margin-bottom: 0.6em;
95 | }
96 |
97 | #content ul {
98 | list-style: outside disc;
99 | margin-left: 1em;
100 | }
101 |
102 | #content ul li {
103 | position: relative;
104 | left: 1em;
105 | padding-right: 1em;
106 | background: none;
107 | }
108 |
109 | div.admonitionblock td.content > div.title,
110 | div.exampleblock > div.title {
111 | color: #000000;
112 | font-size: 20px;
113 | }
114 |
115 | div.exampleblock > div.title {
116 | font-size: 18px;
117 | }
118 |
119 | a:hover, a:focus, a:hover:visited {
120 | color: #5dade2;
121 | }
122 | a:hover {
123 | cursor: pointer;
124 | text-decoration: none;
125 | }
126 | a {
127 | font-weight: 400;
128 | color: #428bca;
129 | text-decoration: none;
130 | line-height: inherit;
131 | }
132 |
133 | /* No italics in TOC */
134 |
135 | #toc ul.sectlevel0 > li > a {
136 | font-style: normal;
137 | }
138 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/util/Neo4jImplicitsTest.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | import org.apache.spark.sql.sources.{And, EqualTo, Not}
4 | import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
5 | import org.junit.Test
6 | import org.junit.Assert._
7 | import org.neo4j.spark.util.Neo4jImplicits._
8 |
9 | class Neo4jImplicitsTest {
10 |
11 | @Test
12 | def `should quote the string` {
13 | // given
14 | val value = "Test with space"
15 |
16 | // when
17 | val actual = value.quote
18 |
19 | // then
20 | assertEquals(s"`$value`", actual)
21 | }
22 |
23 | @Test
24 | def `should quote text that starts with $` {
25 | // given
26 | val value = "$tring"
27 |
28 | // when
29 | val actual = value.quote
30 |
31 | // then
32 | assertEquals(s"`$value`", actual)
33 | }
34 |
35 | @Test
36 | def `should not re-quote the string` {
37 | // given
38 | val value = "`Test with space`"
39 |
40 | // when
41 | val actual = value.quote
42 |
43 | // then
44 | assertEquals(value, actual)
45 | }
46 |
47 | @Test
48 | def `should not quote the string` {
49 | // given
50 | val value = "Test"
51 |
52 | // when
53 | val actual = value.quote
54 |
55 | // then
56 | assertEquals(value, actual)
57 | }
58 |
59 | @Test
60 | def `should return attribute if filter has it` {
61 | // given
62 | val filter = EqualTo("name", "John")
63 |
64 | // when
65 | val attribute = filter.getAttribute
66 |
67 | // then
68 | assertTrue(attribute.isDefined)
69 | }
70 |
71 | @Test
72 | def `should return an empty option if the filter doesn't have an attribute` {
73 | // given
74 | val filter = And(EqualTo("name", "John"), EqualTo("age", 32))
75 |
76 | // when
77 | val attribute = filter.getAttribute
78 |
79 | // then
80 | assertFalse(attribute.isDefined)
81 | }
82 |
83 | @Test
84 | def `should return the attribute without the entity identifier` {
85 | // given
86 | val filter = EqualTo("person.address.coords", 32)
87 |
88 | // when
89 | val attribute = filter.getAttributeWithoutEntityName
90 |
91 | // then
92 | assertEquals("address.coords", attribute.get)
93 | }
94 |
95 | @Test
96 | def `struct should return true if contains fields`: Unit = {
97 | val struct = StructType(Seq(StructField("is_hero", DataTypes.BooleanType), StructField("name", DataTypes.StringType)))
98 |
99 | assertEquals(0, struct.getMissingFields(Set("is_hero", "name")).size)
100 | }
101 |
102 | @Test
103 | def `struct should return false if not contains fields`: Unit = {
104 | val struct = StructType(Seq(StructField("is_hero", DataTypes.BooleanType), StructField("name", DataTypes.StringType)))
105 |
106 | assertEquals(Set[String]("hero_name"), struct.getMissingFields(Set("is_hero", "hero_name")))
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.service.SchemaServiceTSE
12 | import org.neo4j.spark.util.Neo4jUtil
13 |
14 |
15 | object SparkConnectorScalaSuiteIT {
16 | val server: Neo4jContainerExtension = new Neo4jContainerExtension(s"neo4j:${TestUtil.neo4jVersion()}-enterprise")
17 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
18 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
19 | .withDatabases(Seq("db1", "db2"))
20 |
21 | var conf: SparkConf = _
22 | var ss: SparkSession = _
23 | var driver: Driver = _
24 |
25 | private var _session: Session = _
26 |
27 | var connections: Long = 0
28 |
29 | @BeforeClass
30 | def setUpContainer(): Unit = {
31 | if (!server.isRunning) {
32 | try {
33 | server.start()
34 | } catch {
35 | case _: Throwable => //
36 | }
37 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
38 | conf = new SparkConf().setAppName("neoTest")
39 | .setMaster("local[*]")
40 | ss = SparkSession.builder.config(conf).getOrCreate()
41 | if (TestUtil.isTravis()) {
42 | org.apache.log4j.LogManager.getLogger("org")
43 | .setLevel(org.apache.log4j.Level.OFF)
44 | }
45 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
46 | session()
47 | .readTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
49 | })
50 | connections = getActiveConnections
51 | Unit
52 | }
53 | }
54 |
55 | @AfterClass
56 | def tearDownContainer() = {
57 | if (server.isRunning) {
58 | Neo4jUtil.closeSafety(session())
59 | Neo4jUtil.closeSafety(driver)
60 | server.stop()
61 | ss.stop()
62 | }
63 | }
64 |
65 | def session(): Session = {
66 | if (_session == null || !_session.isOpen) {
67 | _session = driver.session
68 | }
69 | _session
70 | }
71 |
72 | def getActiveConnections = session()
73 | .readTransaction(new TransactionWork[Long] {
74 | override def execute(tx: Transaction): Long = tx.run(
75 | """|CALL dbms.listConnections() YIELD connectionId, connector
76 | |WHERE connector = 'bolt'
77 | |RETURN count(*) AS connections""".stripMargin)
78 | .single()
79 | .get("connections")
80 | .asLong()
81 | })
82 | }
83 |
84 | @RunWith(classOf[Suite])
85 | @Suite.SuiteClasses(Array(
86 | classOf[SchemaServiceTSE]
87 | ))
88 | class SparkConnectorScalaSuiteIT {}
89 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteWithApocIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.service.SchemaServiceWithApocTSE
12 | import org.neo4j.spark.util.Neo4jUtil
13 |
14 | object SparkConnectorScalaSuiteWithApocIT {
15 | val server: Neo4jContainerExtension = new Neo4jContainerExtension()
16 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
17 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
18 | .withEnv("NEO4JLABS_PLUGINS", "[\"apoc\"]")
19 | .withDatabases(Seq("db1", "db2"))
20 |
21 | var conf: SparkConf = _
22 | var ss: SparkSession = _
23 | var driver: Driver = _
24 |
25 | private var _session: Session = _
26 |
27 | var connections: Long = 0
28 |
29 | @BeforeClass
30 | def setUpContainer(): Unit = {
31 | if (!server.isRunning) {
32 | try {
33 | server.start()
34 | } catch {
35 | case _ => //
36 | }
37 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
38 | conf = new SparkConf().setAppName("neoTest")
39 | .setMaster("local[*]")
40 | ss = SparkSession.builder.config(conf).getOrCreate()
41 | if (TestUtil.isTravis()) {
42 | org.apache.log4j.LogManager.getLogger("org")
43 | .setLevel(org.apache.log4j.Level.OFF)
44 | }
45 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
46 | session()
47 | .readTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
49 | })
50 | connections = getActiveConnections
51 | Unit
52 | }
53 | }
54 |
55 | @AfterClass
56 | def tearDownContainer() = {
57 | if (server.isRunning) {
58 | Neo4jUtil.closeSafety(session())
59 | Neo4jUtil.closeSafety(driver)
60 | server.stop()
61 | ss.stop()
62 | }
63 | }
64 |
65 | def session(): Session = {
66 | if (_session == null || !_session.isOpen) {
67 | _session = driver.session
68 | }
69 | _session
70 | }
71 |
72 | def getActiveConnections = session()
73 | .readTransaction(new TransactionWork[Long] {
74 | override def execute(tx: Transaction): Long = tx.run(
75 | """|CALL dbms.listConnections() YIELD connectionId, connector
76 | |WHERE connector = 'bolt'
77 | |RETURN count(*) AS connections""".stripMargin)
78 | .single()
79 | .get("connections")
80 | .asLong()
81 | })
82 | }
83 |
84 | @RunWith(classOf[Suite])
85 | @Suite.SuiteClasses(Array(
86 | classOf[SchemaServiceWithApocTSE]
87 | ))
88 | class SparkConnectorScalaSuiteWithApocIT {}
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteWithApocIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.util.Neo4jUtil
12 |
13 |
14 | object SparkConnectorScalaSuiteWithApocIT {
15 | val server: Neo4jContainerExtension = new Neo4jContainerExtension()
16 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
17 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
18 | .withEnv("NEO4JLABS_PLUGINS", "[\"apoc\"]")
19 | .withDatabases(Seq("db1", "db2"))
20 |
21 | var conf: SparkConf = _
22 | var ss: SparkSession = _
23 | var driver: Driver = _
24 |
25 | private var _session: Session = _
26 |
27 | var connections: Long = 0
28 |
29 | @BeforeClass
30 | def setUpContainer(): Unit = {
31 | if (!server.isRunning) {
32 | try {
33 | server.start()
34 | } catch {
35 | case _ => //
36 | }
37 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
38 | conf = new SparkConf().setAppName("neoTest")
39 | .setMaster("local[*]")
40 | ss = SparkSession.builder.config(conf).getOrCreate()
41 | if (TestUtil.isTravis()) {
42 | org.apache.log4j.LogManager.getLogger("org")
43 | .setLevel(org.apache.log4j.Level.OFF)
44 | }
45 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
46 | session()
47 | .readTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
49 | })
50 | connections = getActiveConnections
51 | Unit
52 | }
53 | }
54 |
55 | @AfterClass
56 | def tearDownContainer() = {
57 | if (server.isRunning) {
58 | Neo4jUtil.closeSafety(session())
59 | Neo4jUtil.closeSafety(driver)
60 | server.stop()
61 | ss.stop()
62 | }
63 | }
64 |
65 | def session(): Session = {
66 | if (_session == null || !_session.isOpen) {
67 | _session = driver.session
68 | }
69 | _session
70 | }
71 |
72 | def getActiveConnections = session()
73 | .readTransaction(new TransactionWork[Long] {
74 | override def execute(tx: Transaction): Long = tx.run(
75 | """|CALL dbms.listConnections() YIELD connectionId, connector
76 | |WHERE connector = 'bolt'
77 | |RETURN count(*) AS connections""".stripMargin)
78 | .single()
79 | .get("connections")
80 | .asLong()
81 | })
82 | }
83 |
84 | @RunWith(classOf[Suite])
85 | @Suite.SuiteClasses(Array(
86 | classOf[DataSourceReaderWithApocTSE],
87 | classOf[DataSourceReaderNeo4j4xWithApocTSE]
88 | ))
89 | class SparkConnectorScalaSuiteWithApocIT {}
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/reader/Neo4jInputPartitionReader.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import org.apache.spark.internal.Logging
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.Filter
6 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
7 | import org.apache.spark.sql.types.StructType
8 | import org.neo4j.driver.{Record, Session, Transaction, Values}
9 | import org.neo4j.spark.service.{MappingService, Neo4jQueryReadStrategy, Neo4jQueryService, Neo4jQueryStrategy, Neo4jReadMappingStrategy, PartitionSkipLimit}
10 | import org.neo4j.spark.util.Neo4jImplicits.StructTypeImplicit
11 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, Neo4jUtil}
12 | import org.neo4j.spark.util.Neo4jOptions
13 |
14 | import scala.collection.JavaConverters._
15 |
16 | class Neo4jInputPartitionReader(private val options: Neo4jOptions,
17 | private val filters: Array[Filter],
18 | private val schema: StructType,
19 | private val jobId: String,
20 | private val partitionSkipLimit: PartitionSkipLimit,
21 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
22 | private val requiredColumns: StructType) extends InputPartition[InternalRow]
23 | with InputPartitionReader[InternalRow]
24 | with Logging {
25 |
26 | private var result: Iterator[Record] = _
27 | private var session: Session = _
28 | private var transaction: Transaction = _
29 | private val driverCache: DriverCache = new DriverCache(options.connection,
30 | if (partitionSkipLimit.partitionNumber > 0) s"$jobId-${partitionSkipLimit.partitionNumber}" else jobId)
31 |
32 | private val query: String = new Neo4jQueryService(options, new Neo4jQueryReadStrategy(filters, partitionSkipLimit, requiredColumns.getFieldsName))
33 | .createQuery()
34 |
35 | private val mappingService = new MappingService(new Neo4jReadMappingStrategy(options, requiredColumns), options)
36 |
37 | override def createPartitionReader(): InputPartitionReader[InternalRow] = new Neo4jInputPartitionReader(options, filters, schema,
38 | jobId, partitionSkipLimit, scriptResult, requiredColumns)
39 |
40 | def next: Boolean = {
41 | if (result == null) {
42 | session = driverCache.getOrCreate().session(options.session.toNeo4jSession)
43 | transaction = session.beginTransaction()
44 | log.info(s"Running the following query on Neo4j: $query")
45 | result = transaction.run(query, Values
46 | .value(Map[String, AnyRef](Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT -> scriptResult).asJava))
47 | .asScala
48 | }
49 |
50 | result.hasNext
51 | }
52 |
53 | def get: InternalRow = mappingService.convert(result.next(), schema)
54 |
55 | def close(): Unit = {
56 | Neo4jUtil.closeSafety(transaction, log)
57 | Neo4jUtil.closeSafety(session, log)
58 | driverCache.close()
59 | }
60 |
61 | }
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/types.adoc:
--------------------------------------------------------------------------------
1 | = Neo4j / Spark Data Types Reference
2 |
3 | [abstract]
4 | --
5 | This chapter provides a reference to type compatibility issues between Neo4j and Spark
6 | --
7 |
8 | == Background
9 |
10 | Neo4j and Cypher provide a link:https://neo4j.com/docs/java-reference/current/extending-neo4j/procedures-and-functions/values-and-types/[type system]
11 | that describes how values are stored in the database, but these types do not always exactly match what Spark provides.
12 |
13 | In some cases there are types that Neo4j provides that Spark does not have an equivalent for, and vice versa.
14 |
15 | == Type Mappings
16 |
17 | .Spark to Neo4j Type Mappings Reference
18 | |===
19 | |Neo4j Type |Spark Type |Notes
20 |
21 | |`String`
22 | |`string`
23 | |Example: `"Hello"`
24 |
25 | |`Integer`
26 | |`long`
27 | |Example: `12345`
28 |
29 | |`Float`
30 | |`double`
31 | |Example: `3.141592`
32 |
33 | |`Boolean`
34 | |`boolean`
35 | |Example: `true`
36 |
37 | |`Point`
38 | |`struct { type: string, srid: integer, x: double, y: double, z: double }`
39 | |For more information on spatial types in Neo4j, see link:https://neo4j.com/docs/cypher-manual/current/syntax/spatial/[Spatial values]
40 |
41 | |`Date`
42 | |`date`
43 | |Example: `2020-09-11`
44 |
45 | |`Time`
46 | |`struct { type: string, value: string }`
47 | |Example: `[offset-time, 12:14:08.209Z]`
48 |
49 | |`LocalTime`
50 | |`struct { type: string, value: string }`
51 | |Example: `[local-time, 12:18:11.628]`
52 |
53 | |`DateTime`
54 | |`timestamp`
55 | |Example: `2020-09-11 12:17:39.192`
56 |
57 | |`LocalDateTime`
58 | |`timestamp`
59 | |Example: `2020-09-11 12:14:49.081`
60 |
61 | |`Duration`
62 | |`struct { type: string, months: long, days: long, seconds: long, nanonseconds: integer, value: string }`
63 | |See link:https://neo4j.com/docs/cypher-manual/current/functions/temporal/duration/[Temporal functions: duration]
64 |
65 | |`Node`
66 | |`struct { : long, : array[string], (PROPERTIES) }`
67 | |Nodes in Neo4j are represented as property containers; that is they appear as structs with properties corresponding to whatever properties were in the node. _For ease of use it is usually better to return individual properties than a node from a query_
68 |
69 | |`Relationship`
70 | |`struct { : long, : string, : long, : long, (PROPERTIES) }`
71 | |Relationships are returned as maps, identifying the source and target of the relationship, its type, along with properties (if any) of the relationship. _For ease of use it is usually better to return individual properties than a relationship from a query_
72 |
73 | |`Path`
74 | |`string`
75 | |Example: `path[(322)<-[20280:AIRLINE]-(33510)]`. _For ease of use it is recommended to use link:https://neo4j.com/docs/cypher-manual/current/functions/list/[path functions] to return individual properties/aspects of a path from a query.
76 |
77 | |`[Array of Same Type]`
78 | |`array[element]`
79 | |In Neo4j, arrays must be consistently typed (i.e. an array of all Float values). The inner spark type matches the type mapping above.
80 |
81 | |===
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.util.Neo4jUtil
12 |
13 |
14 | object SparkConnectorScalaSuiteIT {
15 | val server: Neo4jContainerExtension = new Neo4jContainerExtension(s"neo4j:${TestUtil.neo4jVersion()}-enterprise")
16 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
17 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
18 | .withDatabases(Seq("db1", "db2"))
19 |
20 | var conf: SparkConf = _
21 | var ss: SparkSession = _
22 | var driver: Driver = _
23 |
24 | private var _session: Session = _
25 |
26 | var connections: Long = 0
27 |
28 | @BeforeClass
29 | def setUpContainer(): Unit = {
30 | if (!server.isRunning) {
31 | try {
32 | server.start()
33 | } catch {
34 | case _: Throwable => //
35 | }
36 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
37 | conf = new SparkConf().setAppName("neoTest")
38 | .setMaster("local[*]")
39 | ss = SparkSession.builder.config(conf).getOrCreate()
40 | if (TestUtil.isTravis()) {
41 | org.apache.log4j.LogManager.getLogger("org")
42 | .setLevel(org.apache.log4j.Level.OFF)
43 | }
44 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
45 | session()
46 | .readTransaction(new TransactionWork[ResultSummary] {
47 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
48 | })
49 | connections = getActiveConnections
50 | Unit
51 | }
52 | }
53 |
54 | @AfterClass
55 | def tearDownContainer() = {
56 | if (server.isRunning) {
57 | Neo4jUtil.closeSafety(session())
58 | Neo4jUtil.closeSafety(driver)
59 | server.stop()
60 | ss.stop()
61 | }
62 | }
63 |
64 | def session(): Session = {
65 | if (_session == null || !_session.isOpen) {
66 | _session = driver.session
67 | }
68 | _session
69 | }
70 |
71 | def getActiveConnections = session()
72 | .readTransaction(new TransactionWork[Long] {
73 | override def execute(tx: Transaction): Long = tx.run(
74 | """|CALL dbms.listConnections() YIELD connectionId, connector
75 | |WHERE connector = 'bolt'
76 | |RETURN count(*) AS connections""".stripMargin)
77 | .single()
78 | .get("connections")
79 | .asLong()
80 | })
81 | }
82 |
83 | @RunWith(classOf[Suite])
84 | @Suite.SuiteClasses(Array(
85 | classOf[DataSourceReaderTSE],
86 | classOf[DataSourceReaderNeo4j4xTSE],
87 | classOf[DataSourceWriterNeo4j4xTSE],
88 | classOf[DataSourceReaderNeo4j35xTSE],
89 | classOf[DataSourceWriterTSE]
90 | ))
91 | class SparkConnectorScalaSuiteIT {}
92 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteWithApocIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.service.SchemaServiceWithApocTSE
12 | import org.neo4j.spark.util.Neo4jUtil
13 |
14 |
15 | object SparkConnectorScalaSuiteWithApocIT {
16 | val server: Neo4jContainerExtension = new Neo4jContainerExtension()
17 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
18 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
19 | .withEnv("NEO4JLABS_PLUGINS", "[\"apoc\"]")
20 | .withDatabases(Seq("db1", "db2"))
21 |
22 | var conf: SparkConf = _
23 | var ss: SparkSession = _
24 | var driver: Driver = _
25 |
26 | private var _session: Session = _
27 |
28 | var connections: Long = 0
29 |
30 | @BeforeClass
31 | def setUpContainer(): Unit = {
32 | if (!server.isRunning) {
33 | try {
34 | server.start()
35 | } catch {
36 | case _ => //
37 | }
38 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
39 | conf = new SparkConf().setAppName("neoTest")
40 | .setMaster("local[*]")
41 | ss = SparkSession.builder.config(conf).getOrCreate()
42 | if (TestUtil.isTravis()) {
43 | org.apache.log4j.LogManager.getLogger("org")
44 | .setLevel(org.apache.log4j.Level.OFF)
45 | }
46 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
47 | session()
48 | .readTransaction(new TransactionWork[ResultSummary] {
49 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
50 | })
51 | connections = getActiveConnections
52 | Unit
53 | }
54 | }
55 |
56 | @AfterClass
57 | def tearDownContainer() = {
58 | if (server.isRunning) {
59 | Neo4jUtil.closeSafety(session())
60 | Neo4jUtil.closeSafety(driver)
61 | server.stop()
62 | ss.stop()
63 | }
64 | }
65 |
66 | def session(): Session = {
67 | if (_session == null || !_session.isOpen) {
68 | _session = driver.session
69 | }
70 | _session
71 | }
72 |
73 | def getActiveConnections = session()
74 | .readTransaction(new TransactionWork[Long] {
75 | override def execute(tx: Transaction): Long = tx.run(
76 | """|CALL dbms.listConnections() YIELD connectionId, connector
77 | |WHERE connector = 'bolt'
78 | |RETURN count(*) AS connections""".stripMargin)
79 | .single()
80 | .get("connections")
81 | .asLong()
82 | })
83 | }
84 |
85 | @RunWith(classOf[Suite])
86 | @Suite.SuiteClasses(Array(
87 | classOf[SchemaServiceWithApocTSE],
88 | classOf[DataSourceReaderWithApocTSE],
89 | classOf[DataSourceReaderNeo4j4xWithApocTSE]
90 | ))
91 | class SparkConnectorScalaSuiteWithApocIT {}
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaSuiteIT.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.SparkConf
4 | import org.apache.spark.sql.SparkSession
5 | import org.junit.runner.RunWith
6 | import org.junit.runners.Suite
7 | import org.junit.{AfterClass, Assume, BeforeClass}
8 | import org.neo4j.Neo4jContainerExtension
9 | import org.neo4j.driver._
10 | import org.neo4j.driver.summary.ResultSummary
11 | import org.neo4j.spark.service.SchemaServiceTSE
12 | import org.neo4j.spark.util.Neo4jUtil
13 |
14 |
15 | object SparkConnectorScalaSuiteIT {
16 | val server: Neo4jContainerExtension = new Neo4jContainerExtension()
17 | .withNeo4jConfig("dbms.security.auth_enabled", "false")
18 | .withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
19 | .withDatabases(Seq("db1", "db2"))
20 |
21 | var conf: SparkConf = _
22 | var ss: SparkSession = _
23 | var driver: Driver = _
24 |
25 | private var _session: Session = _
26 |
27 | var connections: Long = 0
28 |
29 | @BeforeClass
30 | def setUpContainer(): Unit = {
31 | if (!server.isRunning) {
32 | try {
33 | server.start()
34 | } catch {
35 | case _: Throwable => //
36 | }
37 | Assume.assumeTrue("Neo4j container is not started", server.isRunning)
38 | conf = new SparkConf().setAppName("neoTest")
39 | .setMaster("local[*]")
40 | ss = SparkSession.builder.config(conf).getOrCreate()
41 | if (TestUtil.isTravis()) {
42 | org.apache.log4j.LogManager.getLogger("org")
43 | .setLevel(org.apache.log4j.Level.OFF)
44 | }
45 | driver = GraphDatabase.driver(server.getBoltUrl, AuthTokens.none())
46 | session()
47 | .readTransaction(new TransactionWork[ResultSummary] {
48 | override def execute(tx: Transaction): ResultSummary = tx.run("RETURN 1").consume() // we init the session so the count is consistent
49 | })
50 | connections = getActiveConnections
51 | Unit
52 | }
53 | }
54 |
55 | @AfterClass
56 | def tearDownContainer() = {
57 | if (server.isRunning) {
58 | Neo4jUtil.closeSafety(session())
59 | Neo4jUtil.closeSafety(driver)
60 | server.stop()
61 | ss.stop()
62 | }
63 | }
64 |
65 | def session(): Session = {
66 | if (_session == null || !_session.isOpen) {
67 | _session = driver.session
68 | }
69 | _session
70 | }
71 |
72 | def getActiveConnections = session()
73 | .readTransaction(new TransactionWork[Long] {
74 | override def execute(tx: Transaction): Long = tx.run(
75 | """|CALL dbms.listConnections() YIELD connectionId, connector
76 | |WHERE connector = 'bolt'
77 | |RETURN count(*) AS connections""".stripMargin)
78 | .single()
79 | .get("connections")
80 | .asLong()
81 | })
82 | }
83 |
84 | @RunWith(classOf[Suite])
85 | @Suite.SuiteClasses(Array(
86 | classOf[SchemaServiceTSE],
87 | classOf[DataSourceReaderTSE],
88 | classOf[DataSourceReaderNeo4j4xTSE],
89 | classOf[DataSourceWriterTSE],
90 | classOf[DataSourceWriterNeo4j4xTSE],
91 | classOf[DataSourceReaderNeo4j35xTSE]
92 | ))
93 | class SparkConnectorScalaSuiteIT {}
94 |
--------------------------------------------------------------------------------
/spark-2.4/src/main/scala/org/neo4j/spark/reader/Neo4jDataSourceReader.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.reader
2 |
3 | import java.util
4 | import org.apache.spark.sql.catalyst.InternalRow
5 | import org.apache.spark.sql.sources.Filter
6 | import org.apache.spark.sql.sources.v2.DataSourceOptions
7 | import org.apache.spark.sql.sources.v2.reader.{DataSourceReader, InputPartition, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
8 | import org.apache.spark.sql.types.StructType
9 | import org.neo4j.spark.util.Neo4jOptions
10 | import org.neo4j.spark.service.SchemaService
11 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, Validations}
12 |
13 | import scala.collection.JavaConverters._
14 |
15 | class Neo4jDataSourceReader(private val options: DataSourceOptions, private val jobId: String) extends DataSourceReader
16 | with SupportsPushDownFilters
17 | with SupportsPushDownRequiredColumns {
18 |
19 | private var filters: Array[Filter] = Array[Filter]()
20 |
21 | private var requiredColumns: StructType = new StructType()
22 |
23 | private val neo4jOptions: Neo4jOptions = new Neo4jOptions(options.asMap())
24 | .validate(options => Validations.read(options, jobId))
25 |
26 | private val structType = callSchemaService { schemaService => schemaService
27 | .struct() }
28 |
29 | override def readSchema(): StructType = structType
30 |
31 | private def callSchemaService[T](function: SchemaService => T): T = {
32 | val driverCache = new DriverCache(neo4jOptions.connection, jobId)
33 | val schemaService = new SchemaService(neo4jOptions, driverCache)
34 | var hasError = false
35 | try {
36 | function(schemaService)
37 | } catch {
38 | case e: Throwable => {
39 | hasError = true
40 | throw e
41 | }
42 | } finally {
43 | schemaService.close()
44 | if (hasError) {
45 | driverCache.close()
46 | }
47 | }
48 | }
49 |
50 | override def planInputPartitions: util.ArrayList[InputPartition[InternalRow]] = {
51 | // we retrieve the schema in order to parse the data correctly
52 | val schema = readSchema()
53 | val neo4jPartitions: Seq[Neo4jInputPartitionReader] = createPartitions(schema)
54 | new util.ArrayList[InputPartition[InternalRow]](neo4jPartitions.asJava)
55 | }
56 |
57 | private def createPartitions(schema: StructType) = {
58 | // we get the skip/limit for each partition and execute the "script"
59 | val (partitionSkipLimitList, scriptResult) = callSchemaService { schemaService =>
60 | (schemaService.skipLimitFromPartition(), schemaService.execute(neo4jOptions.script)) }
61 | // we generate a partition for each element
62 | partitionSkipLimitList
63 | .map(partitionSkipLimit => new Neo4jInputPartitionReader(neo4jOptions, filters, schema, jobId,
64 | partitionSkipLimit, scriptResult, requiredColumns))
65 | }
66 |
67 | override def pushFilters(filtersArray: Array[Filter]): Array[Filter] = {
68 | if (neo4jOptions.pushdownFiltersEnabled) {
69 | filters = filtersArray
70 | }
71 |
72 | filtersArray
73 | }
74 |
75 | override def pushedFilters(): Array[Filter] = filters
76 |
77 | override def pruneColumns(requiredSchema: StructType): Unit = {
78 | requiredColumns = if (!neo4jOptions.pushdownColumnsEnabled || neo4jOptions.relationshipMetadata.nodeMap) {
79 | new StructType()
80 | } else {
81 | requiredSchema
82 | }
83 | }
84 | }
--------------------------------------------------------------------------------
/test-support/src/main/scala/org/neo4j/Neo4jContainerExtension.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j
2 |
3 | import java.time.Duration
4 | import java.util.concurrent.{Callable, TimeUnit}
5 |
6 | import org.neo4j.driver.{AuthToken, AuthTokens, GraphDatabase, SessionConfig}
7 | import org.neo4j.spark.TestUtil
8 | import org.rnorth.ducttape.unreliables.Unreliables
9 | import org.testcontainers.containers.Neo4jContainer
10 | import org.testcontainers.containers.wait.strategy.{AbstractWaitStrategy, WaitAllStrategy}
11 |
12 | import collection.JavaConverters._
13 |
14 | class DatabasesWaitStrategy(private val auth: AuthToken) extends AbstractWaitStrategy {
15 | private var databases = Seq.empty[String]
16 |
17 | def forDatabases(dbs: Seq[String]): DatabasesWaitStrategy = {
18 | databases ++= dbs
19 | this
20 | }
21 |
22 | override def waitUntilReady() {
23 | val boltUrl = s"bolt://${waitStrategyTarget.getContainerIpAddress}:${waitStrategyTarget.getMappedPort(7687)}"
24 | val driver = GraphDatabase.driver(boltUrl, auth)
25 | val systemSession = driver.session(SessionConfig.forDatabase("system"))
26 | val tx = systemSession.beginTransaction()
27 | try {
28 | databases.foreach { db => tx.run(s"CREATE DATABASE $db IF NOT EXISTS") }
29 | tx.commit()
30 | } finally {
31 | tx.close()
32 | }
33 |
34 | Unreliables.retryUntilSuccess(startupTimeout.getSeconds.toInt, TimeUnit.SECONDS, new Callable[Boolean] {
35 | override def call(): Boolean = {
36 | getRateLimiter.doWhenReady(new Runnable {
37 | override def run(): Unit = {
38 | if (databases.nonEmpty) {
39 | val tx = systemSession.beginTransaction()
40 | val databasesStatus = try {
41 | tx.run("SHOW DATABASES").list().asScala.map(db => {
42 | (db.get("name").asString(), db.get("currentStatus").asString())
43 | }).toMap
44 | } finally {
45 | tx.close()
46 | }
47 |
48 | val notOnline = databasesStatus.filter(it => {
49 | it._2 != "online"
50 | })
51 |
52 | if (databasesStatus.size < databases.size || notOnline.nonEmpty) {
53 | throw new RuntimeException(s"Cannot started because of the following databases: ${notOnline.keys}")
54 | }
55 | }
56 | }
57 | })
58 | true
59 | }
60 | })
61 | systemSession.close()
62 | driver.close()
63 | }
64 | }
65 |
66 | // docker pull neo4j/neo4j-experimental:4.0.0-rc01-enterprise
67 | class Neo4jContainerExtension(imageName: String = s"neo4j${if (TestUtil.experimental()) "/neo4j-experimental" else ""}:${TestUtil.neo4jVersion()}-enterprise")
68 | extends Neo4jContainer[Neo4jContainerExtension](imageName) {
69 | private var databases = Seq.empty[String]
70 |
71 | def withDatabases(dbs: Seq[String]): Neo4jContainerExtension = {
72 | databases ++= dbs
73 | this
74 | }
75 |
76 | private def createAuth(): AuthToken = if (getAdminPassword.isEmpty) AuthTokens.basic("neo4j", getAdminPassword) else AuthTokens.none()
77 |
78 | override def start(): Unit = {
79 | if (databases.nonEmpty) {
80 | val waitAllStrategy = waitStrategy.asInstanceOf[WaitAllStrategy]
81 | waitAllStrategy.withStrategy(new DatabasesWaitStrategy(createAuth()).forDatabases(databases).withStartupTimeout(Duration.ofMinutes(2)))
82 | }
83 | addEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes")
84 | super.start()
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/overview.adoc:
--------------------------------------------------------------------------------
1 |
2 | = Project Overview
3 |
4 | [abstract]
5 | --
6 | This chapter provides an introduction to the Neo4j Connector for Apache Spark
7 | --
8 |
9 | == Overview
10 |
11 | The Neo4j Connector for Apache Spark is intended to make integrating graphs together with spark easy. There are effectively two ways of using the connector:
12 |
13 | - **As a data source**: read any set of nodes or relationships as a DataFrame in Spark
14 | - **As a sink**: write any DataFrame to Neo4j as a collection of nodes or relationships, or alternatively; use a
15 | Cypher statement to process records in a DataFrame into the graph pattern of your choice.
16 |
17 | == Multi-Languages
18 |
19 | Because the connector is based on the new Spark DataSource API, other spark interpreters for languages such as Python and R will work.
20 |
21 | The API remains the same, and mostly only slight syntax changes are necessary to accomodate the differences between (for example) Python
22 | and Scala.
23 |
24 | == Compatibility
25 |
26 | === Neo4j Compatibility
27 | This connector works with Neo4j 3.5, and the entire 4+ series of Neo4j, whether run as a single instance,
28 | in causal cluster mode, or run as a managed service in Neo4j Aura. The connector does not rely on enterprise features, and as
29 | such will work with Neo4j Community as well, with the appropriate version number.
30 |
31 | [NOTE]
32 | **Neo4j versions prior to 3.5 are not supported**
33 |
34 | === Spark Compatibility
35 |
36 | This connector currently supports Spark 2.4.5+ with Scala 2.11 and Scala 2.12 and Spark 3.0 with Scala 2.12.
37 | Depending on the combination of Spark and Scala version you'll need a different JAR.
38 | JARs are named in the form `neo4j-connector-apache-spark_${scala.version}_${spark.version}_${connector.version}`
39 |
40 | Here's a compatibility table to help you choose the correct JAR.
41 |
42 | .Compatibility Table
43 | |===
44 | | |Spark 2.4 | Spark 3.0
45 |
46 | |*Scala 2.11* |`neo4j-connector-apache-spark_2.11_2.4_4.0.0.jar`|_(not available)_
47 |
48 | |*Scala 2.12* |`neo4j-connector-apache-spark_2.12_2.4_4.0.0.jar`|`neo4j-connector-apache-spark_2.12_3.0_4.0.0.jar`
49 | |===
50 |
51 |
52 | == Training
53 |
54 | If you want an introduction on the Neo4j Connector for Apache Spark, take a look at the training that Andrea Santurbano
55 | presented at NODES2020.
56 |
57 | ++++
58 |
59 | ++++
60 |
61 |
62 | === Spark Compatibility
63 | This connector works with Apache Spark 2.4 and above, but will not work with Spark 3.0+ because of different incompatible APIs for Data sources.
64 |
65 | === Scala Compatibility
66 | This connector works with Scala 2.11 and 2.12. Because of the differences in the APIs, *different JAR files are needed* depending on your
67 | scala version. Ensure that you have the appropriate JAR file for your environment.
68 |
69 | == Availability
70 |
71 | This connector is provided under the terms of the Apache 2.0 license, which can be found in the GitHub repository.
72 |
73 | == Support
74 |
75 | For Neo4j Enterprise and Neo4j Aura customers, official releases of this connector are supported under the terms of your existing Neo4j support agreement. This support extends only to regular releases, and excludes
76 | alpha, beta, and pre-releases. If you have any questions about the support policy, please get in touch with
77 | Neo4j.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Neo4j Connector for Apache Spark
2 |
3 | This repository contains the Neo4j Connector for Apache Spark.
4 |
5 | ## License
6 |
7 | This neo4j-connector-apache-spark is Apache 2 Licensed
8 |
9 | ## Generating Documentation from Source
10 |
11 | ```
12 | cd doc
13 | # Install NodeJS dependencies
14 | npm install
15 | # Generate HTML/CSS from asciidoc
16 | ./node_modules/.bin/antora docs.yml
17 | # Start local server to browse docs
18 | npm run start
19 | ```
20 |
21 | This will open http://localhost:8000/ which will serve development docs.
22 |
23 | ## Building
24 |
25 | ### Building for Spark 2.4
26 |
27 | You can build for Spark 2.4 with both Scala 2.11 and Scala 2.12
28 |
29 | ```
30 | ./mvnw clean package -P spark-2.4 -P scala-2.11
31 | ./mvnw clean package -P spark-2.4 -P scala-2.12
32 | ```
33 |
34 | These commands will generate the corresponding targets
35 | * `spark-2.4/target/neo4j-connector-apache-spark_2.11_2.4-4.0.0.jar`
36 | * `spark-2.4/target/neo4j-connector-apache-spark_2.12_2.4-4.0.0.jar`
37 |
38 |
39 | ### Building for Spark 3.0
40 |
41 | You can build for Spark 3.0 by running
42 |
43 | ```
44 | ./mvnw clean package -P spark-3.0 -P scala-2.12
45 | ```
46 |
47 | This will generate `spark-3.0/target/neo4j-connector-apache-spark_2.12_3.0-4.0.0.jar`
48 |
49 | ## Integration with Apache Spark Applications
50 |
51 | **spark-shell, pyspark, or spark-submit**
52 |
53 | `$SPARK_HOME/bin/spark-shell --jars neo4j-connector-apache-spark_2.12_3.0-4.0.0.jar`
54 |
55 | `$SPARK_HOME/bin/spark-shell --packages neo4j-contrib:neo4j-connector-apache-spark_2.12_3.0:4.0.0`
56 |
57 | **sbt**
58 |
59 | If you use the [sbt-spark-package plugin](https://github.com/databricks/sbt-spark-package), in your sbt build file, add:
60 |
61 | ```scala spDependencies += "neo4j-contrib/neo4j-connector-apache-spark_2.11_3.0:4.0.0"```
62 |
63 | Otherwise,
64 |
65 | ```scala
66 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
67 | libraryDependencies += "neo4j-contrib" % "neo4j-connector-apache-spark_2.11_2.4" % "4.0.0"
68 | ```
69 |
70 | Or, for Spark 3.0
71 |
72 | ```scala
73 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
74 | libraryDependencies += "neo4j-contrib" % "neo4j-connector-apache-spark_2.12_3.0" % "4.0.0"
75 | ```
76 |
77 | **maven**
78 | In your pom.xml, add:
79 |
80 | ```xml
81 |
82 |
83 |
84 | neo4j-contrib
85 | neo4j-connector-apache-spark_2.11_2.4
86 | 4.0.0
87 |
88 |
89 |
90 |
91 |
92 | SparkPackagesRepo
93 | http://dl.bintray.com/spark-packages/maven
94 |
95 |
96 | ```
97 |
98 | In case of Spark 3.0
99 |
100 | ```xml
101 |
102 |
103 |
104 | neo4j-contrib
105 | neo4j-connector-apache-spark_2.12_3.0
106 | 4.0.0
107 |
108 |
109 |
110 |
111 |
112 | SparkPackagesRepo
113 | http://dl.bintray.com/spark-packages/maven
114 |
115 |
116 | ```
117 |
118 | For more info about the available version visit https://neo4j.com/developer/spark/overview/#_compatibility
--------------------------------------------------------------------------------
/doc/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | neo4j-spark-connector-docs
8 | 4.0.0
9 | neo4j-connector-apache-spark-doc
10 | Neo4j Connector for Apache Spark - Documentation
11 |
12 |
13 | neo4j-contrib
14 | neo4j-connector-apache-spark
15 | 4.0.0
16 |
17 |
18 |
19 | ${version}
20 |
21 |
22 |
23 |
24 |
25 | org.asciidoctor
26 | asciidoctor-maven-plugin
27 | 1.5.6
28 | false
29 |
30 | html5
31 | images
32 | ${basedir}/asciidoc
33 | index.adoc
34 | ${basedir}/target/docs/${docsversion}
35 |
36 | ${docsversion}
37 | ${project.version}
38 | coderay
39 | style
40 |
41 |
42 | asciidoctor-diagram
43 |
44 |
45 |
46 | ${basedir}/images
47 | ${basedir}/target/docs/${docsversion}/images
48 |
49 |
50 |
51 |
52 |
53 | generate-docs
54 | package
55 |
56 | process-asciidoc
57 |
58 |
59 |
60 |
61 |
62 | org.asciidoctor
63 | asciidoctorj-diagram
64 | 1.3.1
65 |
66 |
67 |
68 |
69 | org.codehaus.mojo
70 | build-helper-maven-plugin
71 | 3.0.0
72 |
73 |
74 | parse-version
75 |
76 | parse-version
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | before_cache:
2 | - sudo chown -R travis:travis $HOME/.m2
3 | # Ensure that jobs do not influence each other with installed Neo4j Spark Connector Jars
4 | - rm -rf $HOME/.m2/repository/org/neo4j-contrib/neo4j-spark-connector/
5 |
6 | cache:
7 | apt: true
8 | directories:
9 | - ${HOME}/.m2
10 | jobs:
11 | include:
12 | - name: "Test for Spark 2.4 and Scala 2.11"
13 | jdk: "openjdk8"
14 | language: scala
15 | scala:
16 | - 2.11.12
17 | dist: trusty
18 | install:
19 | - ./mvnw clean install -D skipTests -P scala-2.11 -P spark-2.4 --no-transfer-progress
20 | script:
21 | - echo '*** Executing tests with Spark 2.4, Scala 2.11 and Neo4j 3.5'
22 | - ./mvnw verify -P neo4j-3.5
23 | - echo '*** Executing tests with Spark 2.4, Scala 2.11 and Neo4j 4.0'
24 | - ./mvnw verify -P neo4j-4.0 --no-transfer-progress
25 | - echo '*** Executing tests with Spark 2.4, Scala 2.11 and Neo4j 4.1'
26 | - ./mvnw verify -P neo4j-4.1 --no-transfer-progress
27 | - echo '*** Executing tests with Spark 2.4, Scala 2.11 and Neo4j 4.2'
28 | - ./mvnw verify -P neo4j-4.2 --no-transfer-progress
29 | - name: "Test for Spark 2.4 and Scala 2.12"
30 | jdk: "openjdk8"
31 | language: scala
32 | scala:
33 | - 2.12.13
34 | dist: trusty
35 | install:
36 | - ./mvnw clean install -D skipTests -P scala-2.12 -P spark-2.4 --no-transfer-progress
37 | script:
38 | - echo '*** Executing tests with Spark 2.4, Scala 2.12 and Neo4j 3.5'
39 | - ./mvnw verify -P neo4j-3.5
40 | - echo '*** Executing tests with Spark 2.4, Scala 2.12 and Neo4j 4.0'
41 | - ./mvnw verify -P neo4j-4.0 --no-transfer-progress
42 | - echo '*** Executing tests with Spark 2.4, Scala 2.12 and Neo4j 4.1'
43 | - ./mvnw verify -P neo4j-4.1 --no-transfer-progress
44 | - echo '*** Executing tests with Spark 2.4, Scala 2.12 and Neo4j 4.2'
45 | - ./mvnw verify -P neo4j-4.2 --no-transfer-progress
46 | - name: "Test for Spark 3.0 and Scala 2.12"
47 | jdk: "openjdk8"
48 | dist: trusty
49 | language: scala
50 | scala:
51 | - 2.12.13
52 | install:
53 | - ./mvnw clean install -D skipTests -P spark-3.0 --no-transfer-progress
54 | script:
55 | - echo '*** Executing tests with Spark 3.0, Scala 2.12 and Neo4j 3.5'
56 | - ./mvnw verify -P neo4j-3.5
57 | - echo '*** Executing tests with Spark 3.0, Scala 2.12 and Neo4j 4.0'
58 | - ./mvnw verify -P neo4j-4.0 --no-transfer-progress
59 | - echo '*** Executing tests with Spark 3.0, Scala 2.12 and Neo4j 4.1'
60 | - ./mvnw verify -P neo4j-4.1 --no-transfer-progress
61 | - echo '*** Executing tests with Spark 3.0, Scala 2.12 and Neo4j 4.2'
62 | - ./mvnw verify -P neo4j-4.2 --no-transfer-progress
63 | # dfantuzzi: Spark package for Scala 2.13 is not yet published on maven https://mvnrepository.com/artifact/org.apache.spark/spark-core
64 | # - name: "Test for Scala 2.13"
65 | # jdk: "openjdk8"
66 | # dist: trusty
67 | # language: scala
68 | # scala:
69 | # - 2.13.4
70 | # install:
71 | # - ./mvnw clean install -D skipTests -P scala-2.13 --no-transfer-progress
72 | # script:
73 | # - echo '*** Executing tests with Scala 2.13 and Neo4j 3.5'
74 | # - ./mvnw verify -P neo4j-3.5
75 | # - echo '*** Executing tests with Scala 2.13 and Neo4j 4.0'
76 | # - ./mvnw verify -P neo4j-4.0 --no-transfer-progress
77 | # - echo '*** Executing tests with Scala 2.13 and Neo4j 4.1'
78 | # - ./mvnw verify -P neo4j-4.1 --no-transfer-progress
79 | # - echo '*** Executing tests with Scala 2.13 and Neo4j 4.2'
80 | # - ./mvnw verify -P neo4j-4.2 --no-transfer-progress
--------------------------------------------------------------------------------
/spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.junit.Assert.assertEquals
4 | import org.junit.{Assume, BeforeClass, Test}
5 | import org.neo4j.driver.summary.ResultSummary
6 | import org.neo4j.driver.{Transaction, TransactionWork}
7 |
8 | object DataSourceReaderNeo4j41xTSE {
9 | @BeforeClass
10 | def checkNeo4jVersion() {
11 | val neo4jVersion = TestUtil.neo4jVersion()
12 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0"))
13 | }
14 | }
15 |
16 | class DataSourceReaderNeo4j41xTSE extends SparkConnectorScalaBaseTSE {
17 |
18 | @Test
19 | def testEmptyDataset(): Unit = {
20 | val df = ss.read
21 | .format(classOf[DataSource].getName)
22 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
23 | .option("query", "MATCH (e:ID_DO_NOT_EXIST) RETURN id(e) as f, 1 as g")
24 | .load
25 |
26 | assertEquals(0, df.count())
27 | assertEquals(Seq("f", "g"), df.columns.toSeq)
28 | }
29 |
30 | @Test
31 | def testColumnSorted(): Unit = {
32 | SparkConnectorScalaSuiteIT.session()
33 | .writeTransaction(
34 | new TransactionWork[ResultSummary] {
35 | override def execute(tx: Transaction): ResultSummary = tx.run("CREATE (i1:Instrument{name: 'Drums', id: 1}), (i2:Instrument{name: 'Guitar', id: 2})").consume()
36 | })
37 |
38 | val df = ss.read
39 | .format(classOf[DataSource].getName)
40 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
41 | .option("query", "MATCH (i:Instrument) RETURN id(i) as internal_id, i.id as id, i.name as name, i.name")
42 | .load
43 | .orderBy("id")
44 |
45 | assertEquals(1L, df.collectAsList().get(0).get(1))
46 | assertEquals("Drums", df.collectAsList().get(0).get(2))
47 | assertEquals(Seq("internal_id", "id", "name", "i.name"), df.columns.toSeq)
48 | }
49 |
50 | @Test
51 | def testComplexReturnStatement(): Unit = {
52 | val total = 100
53 | val fixtureQuery: String =
54 | s"""UNWIND range(1, $total) as id
55 | |CREATE (pr:Product {id: id * rand(), name: 'Product ' + id})
56 | |CREATE (pe:Person {id: id, fullName: 'Person ' + id})
57 | |CREATE (pe)-[:BOUGHT{when: rand(), quantity: rand() * 1000}]->(pr)
58 | |RETURN *
59 | """.stripMargin
60 |
61 | SparkConnectorScalaSuiteIT.session()
62 | .writeTransaction(
63 | new TransactionWork[ResultSummary] {
64 | override def execute(tx: Transaction): ResultSummary = tx.run(fixtureQuery).consume()
65 | })
66 |
67 | val df = ss.read.format(classOf[DataSource].getName)
68 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
69 | .option("query",
70 | """MATCH (p:Person)-[b:BOUGHT]->(pr:Product)
71 | |RETURN id(p) AS personId, id(pr) AS productId, {quantity: b.quantity, when: b.when} AS map, "some string" as someString, {anotherField: "201"} as map2""".stripMargin)
72 | .option("schema.strategy", "string")
73 | .load()
74 |
75 | assertEquals(Seq("personId", "productId", "map", "someString", "map2"), df.columns.toSeq)
76 | assertEquals(100, df.count())
77 | }
78 |
79 | @Test
80 | def testComplexReturnStatementNoValues(): Unit = {
81 | val df = ss.read.format(classOf[DataSource].getName)
82 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
83 | .option("query",
84 | """MATCH (p:Person)-[b:BOUGHT]->(pr:Product)
85 | |RETURN id(p) AS personId, id(pr) AS productId, {quantity: b.quantity, when: b.when} AS map, "some string" as someString, {anotherField: "201", and: 1} as map2""".stripMargin)
86 | .option("schema.strategy", "string")
87 | .load()
88 |
89 | assertEquals(Seq("personId", "productId", "map", "someString", "map2"), df.columns.toSeq)
90 | assertEquals(0, df.count())
91 | }
92 |
93 | }
94 |
--------------------------------------------------------------------------------
/spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xTSE.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark
2 |
3 | import org.apache.spark.sql.DataFrame
4 | import org.junit.Assert.assertEquals
5 | import org.junit.{Assume, BeforeClass, Test}
6 | import org.neo4j.driver.summary.ResultSummary
7 | import org.neo4j.driver.{SessionConfig, Transaction, TransactionWork}
8 |
9 | object DataSourceReaderNeo4j41xTSE {
10 | @BeforeClass
11 | def checkNeo4jVersion() {
12 | val neo4jVersion = TestUtil.neo4jVersion()
13 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0"))
14 | }
15 | }
16 |
17 | class DataSourceReaderNeo4j41xTSE extends SparkConnectorScalaBaseTSE {
18 |
19 | @Test
20 | def testEmptyDataset(): Unit = {
21 | val df = ss.read
22 | .format(classOf[DataSource].getName)
23 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
24 | .option("query", "MATCH (e:ID_DO_NOT_EXIST) RETURN id(e) as f, 1 as g")
25 | .load
26 |
27 | assertEquals(0, df.count())
28 | assertEquals(Seq("f", "g"), df.columns.toSeq)
29 | }
30 |
31 | @Test
32 | def testColumnSorted(): Unit = {
33 | SparkConnectorScalaSuiteIT.session()
34 | .writeTransaction(
35 | new TransactionWork[ResultSummary] {
36 | override def execute(tx: Transaction): ResultSummary = tx.run("CREATE (i1:Instrument{name: 'Drums', id: 1}), (i2:Instrument{name: 'Guitar', id: 2})").consume()
37 | })
38 |
39 | val df = ss.read
40 | .format(classOf[DataSource].getName)
41 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
42 | .option("query", "MATCH (i:Instrument) RETURN id(i) as internal_id, i.id as id, i.name as name, i.name")
43 | .load
44 | .orderBy("id")
45 |
46 | assertEquals(1L, df.collectAsList().get(0).get(1))
47 | assertEquals("Drums", df.collectAsList().get(0).get(2))
48 | assertEquals(Seq("internal_id", "id", "name", "i.name"), df.columns.toSeq)
49 | }
50 |
51 | @Test
52 | def testComplexReturnStatement(): Unit = {
53 | val total = 100
54 | val fixtureQuery: String =
55 | s"""UNWIND range(1, $total) as id
56 | |CREATE (pr:Product {id: id * rand(), name: 'Product ' + id})
57 | |CREATE (pe:Person {id: id, fullName: 'Person ' + id})
58 | |CREATE (pe)-[:BOUGHT{when: rand(), quantity: rand() * 1000}]->(pr)
59 | |RETURN *
60 | """.stripMargin
61 |
62 | SparkConnectorScalaSuiteIT.session()
63 | .writeTransaction(
64 | new TransactionWork[ResultSummary] {
65 | override def execute(tx: Transaction): ResultSummary = tx.run(fixtureQuery).consume()
66 | })
67 |
68 | val df = ss.read.format(classOf[DataSource].getName)
69 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
70 | .option("query",
71 | """MATCH (p:Person)-[b:BOUGHT]->(pr:Product)
72 | |RETURN id(p) AS personId, id(pr) AS productId, {quantity: b.quantity, when: b.when} AS map, "some string" as someString, {anotherField: "201"} as map2""".stripMargin)
73 | .option("schema.strategy", "string")
74 | .load()
75 |
76 | assertEquals(Seq("personId", "productId", "map", "someString", "map2"), df.columns.toSeq)
77 | assertEquals(100, df.count())
78 | }
79 |
80 | @Test
81 | def testComplexReturnStatementNoValues(): Unit = {
82 | val df = ss.read.format(classOf[DataSource].getName)
83 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl)
84 | .option("query",
85 | """MATCH (p:Person)-[b:BOUGHT]->(pr:Product)
86 | |RETURN id(p) AS personId, id(pr) AS productId, {quantity: b.quantity, when: b.when} AS map, "some string" as someString, {anotherField: "201", and: 1} as map2""".stripMargin)
87 | .option("schema.strategy", "string")
88 | .load()
89 |
90 | assertEquals(Seq("personId", "productId", "map", "someString", "map2"), df.columns.toSeq)
91 | assertEquals(0, df.count())
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/doc/javascript/mp-nav.js:
--------------------------------------------------------------------------------
1 | /**
2 | * JavaScript for navigation in multi-page editions of Neo4j documentation.
3 | */
4 |
5 | function isElementInViewport (el) {
6 | if (typeof jQuery === "function" && el instanceof jQuery) {
7 | el = el[0];
8 | }
9 | var rect = el.getBoundingClientRect();
10 | return (
11 | rect.top >= 0 &&
12 | rect.left >= 0 &&
13 | rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) && /*or $(window).height() */
14 | rect.right <= (window.innerWidth || document.documentElement.clientWidth) /*or $(window).width() */
15 | );
16 | }
17 |
18 | $(document).ready(function() {
19 | var $title = $(
20 | 'h1,h2,h3,h4'
21 | ).first();
22 | var $navtitle = $('.nav-title');
23 | var visible = isElementInViewport($title);
24 | if (visible) {
25 | $navtitle.hide();
26 | }
27 | $navtitle.removeClass('hidden');
28 |
29 | function showHide(nowVisible) {
30 | if ($(window).width() >= 768 && visible !== nowVisible) {
31 | $navtitle.fadeToggle();
32 | visible = !visible;
33 | }
34 | }
35 | var timeoutId = null;
36 | addEventListener("scroll", function() {
37 | if (timeoutId) clearTimeout(timeoutId);
38 | timeoutId = setTimeout(showHide, 200, isElementInViewport($title));
39 | }, true);
40 |
41 | setNavIconColor();
42 | });
43 |
44 | function setNavIconColor() {
45 | var color = null;
46 | $('.nav-previous > a, .nav-next > a').hover(function (){
47 | $me = $(this);
48 | $me.children('span.fa').css('border-color', $me.css('color'));
49 | }, function(){
50 | $(this).children('span.fa').css('border-color', "");
51 | });
52 | }
53 |
54 | // Highlight the current chapter/section in the TOC
55 | function highlightToc() {
56 | var toc = document.querySelector('nav.toc > ul.toc');
57 | var allAnchors = toc.getElementsByTagName('a');
58 | var thisAnchor;
59 | var urlDissimilarity = 1000;
60 | for (i=0; i < allAnchors.length; i++) {
61 | var candidate = allAnchors.item(i).href;
62 | var test = document.URL.replace(candidate);
63 | // console.log('candidate:', candidate, 'test:', test, 'urlDissimilarity:', test.length);
64 | if (test.length < urlDissimilarity && test !== document.URL) {
65 | urlDissimilarity = test.length;
66 | thisAnchor = allAnchors.item(i);
67 | }
68 | };
69 |
70 | // console.log("[XXX] RESULT:", thisAnchor, "dissimilarity:", urlDissimilarity);
71 |
72 | if (thisAnchor !== undefined) {
73 | thisAnchor.parentElement.classList.add('active-nested-section');
74 | var topLevel = thisAnchor;
75 | while (topLevel.parentElement !== toc) {
76 | // console.log("traversing up:", topLevel);
77 | topLevel = topLevel.parentElement;
78 | }
79 | if (thisAnchor !== topLevel) {
80 | // console.log("highlighting:", topLevel);
81 | topLevel.classList.add('active-toplevel-section');
82 | }
83 | }
84 | }
85 |
86 | // Highlight the active publication in the docs library header
87 | function highlightLibraryHeader() {
88 | var thisName = window.docMeta.name
89 | var thisEntry;
90 | $('header > ul.documentation-library').children('li').children('a').each(
91 | function (key, value) {
92 | var href = $(this).attr('href');
93 | if (href.includes(thisName)) {
94 | $(this).css({
95 | color: '#428bca',
96 | backgroundColor: 'rgb(66, 139, 202, 0.05)',
97 | borderBottom: '2px solid #428bca',
98 | padding: '4px',
99 | marginBottom: '-6px'
100 | });
101 | }
102 | // console.log('href:', href, 'thisUrl:', thisUrl, 'thisName:', thisName);
103 | }
104 | );
105 | }
106 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/quick-java-example.adoc:
--------------------------------------------------------------------------------
1 | = Quick Java Example
2 |
3 | In order to use Neo4j Connector for Apache Spark in your Java application
4 | you need to add Spark Packages repository and the dependency.
5 |
6 | == Add the dependency
7 | === Maven
8 |
9 | [source,xml]
10 | ----
11 |
12 |
13 |
14 |
15 | ...
16 |
17 |
18 | ...
19 |
20 |
21 |
22 |
23 |
24 | neo4j-contrib
25 | neo4j-connector-apache-spark_${scala.version}_${spark.version}
26 | 4.0.0
27 |
28 |
29 |
30 |
31 |
32 |
33 | SparkPackagesRepo
34 | http://dl.bintray.com/spark-packages/maven
35 |
36 |
37 |
38 | ----
39 |
40 | === sbt
41 |
42 | [source,`build.sbt`]
43 | ----
44 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
45 | libraryDependencies += "neo4j-contrib" % "neo4j-spark-connector" % "4.0.0"
46 | ----
47 |
48 | === Gradle
49 |
50 | [source,`build.gradle`]
51 | ----
52 |
53 | dependencies{
54 | // list of dependencies
55 | compile "neo4j-contrib:neo4j-spark-connector:4.0.0"
56 | }
57 |
58 | repositories {
59 | // list of other repositories
60 | sparkPackages {
61 | url "http://dl.bintray.com/spark-packages/maven"
62 | }
63 | }
64 | ----
65 |
66 | == Code
67 |
68 | Let's say you have a Neo4j instance with link:https://neo4j.com/developer/example-data/#built-in-examples[the movie graph] running on `localhost`.
69 |
70 | [source,java]
71 | ----
72 | import org.apache.spark.sql.Dataset;
73 | import org.apache.spark.sql.Row;
74 | import org.apache.spark.sql.SparkSession;
75 |
76 | public class SparkApp {
77 |
78 | public static void main(String[] args) {
79 | SparkSession spark = SparkSession
80 | .builder()
81 | .appName("Spark SQL Example")
82 | .config("spark.master", "local")
83 | .getOrCreate();
84 |
85 | Dataset ds = spark.read().format("org.neo4j.spark.DataSource")
86 | .option("url", "bolt://localhost:7687")
87 | .option("authentication.basic.username", "neo4j")
88 | .option("authentication.basic.password", "password")
89 | .option("labels", "Person")
90 | .load();
91 |
92 | ds.show();
93 | }
94 | }
95 | ----
96 |
97 | This code will produce the following output:
98 |
99 | [source,text]
100 | ----
101 | +----+--------+------------------+----+
102 | ||| name|born|
103 | +----+--------+------------------+----+
104 | | 1|[Person]| Keanu Reeves|1964|
105 | | 2|[Person]| Carrie-Anne Moss|1967|
106 | | 3|[Person]|Laurence Fishburne|1961|
107 | | 4|[Person]| Hugo Weaving|1960|
108 | | 5|[Person]| Andy Wachowski|1967|
109 | | 6|[Person]| Lana Wachowski|1965|
110 | | 7|[Person]| Joel Silver|1952|
111 | | 8|[Person]| Emil Eifrem|1978|
112 | | 12|[Person]| Charlize Theron|1975|
113 | | 13|[Person]| Al Pacino|1940|
114 | | 14|[Person]| Taylor Hackford|1944|
115 | | 16|[Person]| Tom Cruise|1962|
116 | | 17|[Person]| Jack Nicholson|1937|
117 | | 18|[Person]| Demi Moore|1962|
118 | | 19|[Person]| Kevin Bacon|1958|
119 | | 20|[Person]| Kiefer Sutherland|1966|
120 | | 21|[Person]| Noah Wyle|1971|
121 | | 22|[Person]| Cuba Gooding Jr.|1968|
122 | | 23|[Person]| Kevin Pollak|1957|
123 | | 24|[Person]| J.T. Walsh|1943|
124 | +----+--------+------------------+----+
125 | only showing top 20 rows
126 | ----
127 |
--------------------------------------------------------------------------------
/common/src/main/scala/org/neo4j/spark/util/Neo4jImplicits.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | import javax.lang.model.SourceVersion
4 | import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
5 | import org.neo4j.driver.types.{Entity, Node, Relationship}
6 | import org.neo4j.spark.service.SchemaService
7 | import org.apache.spark.sql.sources.{EqualNullSafe, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Not, StringContains, StringEndsWith, StringStartsWith}
8 |
9 | import scala.collection.JavaConverters._
10 |
11 | object Neo4jImplicits {
12 |
13 | implicit class CypherImplicits(str: String) {
14 | private def isValidCypherIdentifier() = SourceVersion.isIdentifier(str) && !str.trim.startsWith("$")
15 |
16 | def quote(): String = if (!isValidCypherIdentifier() && !str.trim.startsWith("`") && !str.trim.endsWith("`")) s"`$str`" else str
17 |
18 | def unquote(): String = str.replaceAll("`", "");
19 |
20 | def removeAlias(): String = {
21 | val splatString = str.split('.')
22 |
23 | if (splatString.size > 1) {
24 | splatString.tail.mkString(".")
25 | }
26 | else {
27 | str
28 | }
29 | }
30 | }
31 |
32 | implicit class EntityImplicits(entity: Entity) {
33 | def toStruct(): StructType = {
34 | val fields = entity.asMap().asScala
35 | .groupBy(_._1)
36 | .map(t => {
37 | val value = t._2.head._2
38 | val cypherType = SchemaService.normalizedClassNameFromGraphEntity(value)
39 | StructField(t._1, SchemaService.cypherToSparkType(cypherType))
40 | })
41 | val entityFields = entity match {
42 | case node: Node => {
43 | Seq(StructField(Neo4jUtil.INTERNAL_ID_FIELD, DataTypes.LongType, nullable = false),
44 | StructField(Neo4jUtil.INTERNAL_LABELS_FIELD, DataTypes.createArrayType(DataTypes.StringType), nullable = true))
45 | }
46 | case relationship: Relationship => {
47 | Seq(StructField(Neo4jUtil.INTERNAL_REL_ID_FIELD, DataTypes.LongType, false),
48 | StructField(Neo4jUtil.INTERNAL_REL_TYPE_FIELD, DataTypes.StringType, false),
49 | StructField(Neo4jUtil.INTERNAL_REL_SOURCE_ID_FIELD, DataTypes.LongType, false),
50 | StructField(Neo4jUtil.INTERNAL_REL_TARGET_ID_FIELD, DataTypes.LongType, false))
51 | }
52 | }
53 |
54 | StructType(entityFields ++ fields)
55 | }
56 |
57 | def toMap(): java.util.Map[String, Any] = {
58 | val entityMap = entity.asMap().asScala
59 | val entityFields = entity match {
60 | case node: Node => {
61 | Map(Neo4jUtil.INTERNAL_ID_FIELD -> node.id(),
62 | Neo4jUtil.INTERNAL_LABELS_FIELD -> node.labels())
63 | }
64 | case relationship: Relationship => {
65 | Map(Neo4jUtil.INTERNAL_REL_ID_FIELD -> relationship.id(),
66 | Neo4jUtil.INTERNAL_REL_TYPE_FIELD -> relationship.`type`(),
67 | Neo4jUtil.INTERNAL_REL_SOURCE_ID_FIELD -> relationship.startNodeId(),
68 | Neo4jUtil.INTERNAL_REL_TARGET_ID_FIELD -> relationship.endNodeId())
69 | }
70 | }
71 | (entityFields ++ entityMap).asJava
72 | }
73 | }
74 |
75 | implicit class FilterImplicit(filter: Filter) {
76 | def getAttribute: Option[String] = Option(filter match {
77 | case eqns: EqualNullSafe => eqns.attribute
78 | case eq: EqualTo => eq.attribute
79 | case gt: GreaterThan => gt.attribute
80 | case gte: GreaterThanOrEqual => gte.attribute
81 | case lt: LessThan => lt.attribute
82 | case lte: LessThanOrEqual => lte.attribute
83 | case in: In => in.attribute
84 | case notNull: IsNotNull => notNull.attribute
85 | case isNull: IsNull => isNull.attribute
86 | case startWith: StringStartsWith => startWith.attribute
87 | case endsWith: StringEndsWith => endsWith.attribute
88 | case contains: StringContains => contains.attribute
89 | case not: Not => not.child.getAttribute.orNull
90 | case _ => null
91 | })
92 |
93 | def isAttribute(entityType: String): Boolean = {
94 | getAttribute.exists(_.contains(s"$entityType."))
95 | }
96 |
97 | def getAttributeWithoutEntityName: Option[String] = filter.getAttribute.map(_.unquote().split('.').tail.mkString("."))
98 | }
99 |
100 | implicit class StructTypeImplicit(structType: StructType) {
101 | def getFieldsName: Seq[String] = if (structType == null) {
102 | Seq.empty
103 | } else {
104 | structType.map(structField => structField.name)
105 | }
106 |
107 | def getMissingFields(fields: Set[String]): Set[String] = {
108 | val structFieldsNames = structType.getFieldsName
109 | fields.filterNot(structFieldsNames.contains(_))
110 | }
111 | }
112 |
113 | }
114 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/gds.adoc:
--------------------------------------------------------------------------------
1 | = Using with Graph Data Science
2 |
3 | [abstract]
4 | --
5 | This chapter provides an information on using the Neo4j Connector for Apache Spark with Neo4j's Graph Data Science Library.
6 | --
7 |
8 | link:https://neo4j.com/graph-data-science-library/[Neo4j's Graph Data Science (GDS) Library] lets data scientists benefit from powerful graph algorithms. It provides unsupervised machine learning methods and heuristics that learn and describe the topology of your graph. The GDS Library includes hardened graph algorithms with enterprise features, like deterministic seeding for consistent results and reproducible machine learning workflows.
9 |
10 | GDS Algorithms are bucketed into 5 "families":
11 |
12 | * _Community detection_ which detects group clusters and partition options
13 | * _Centrality_ which helps compute the importance of a node in a graph
14 | * _Heuristic Link Prediction_ which estimates the liklihood of nodes forming a relationship
15 | * _Similarity_ which evaluates how alike 2 nodes are
16 | * _Pathfinding & Search_ which finds optimal paths, evalutes route availability, and so on.
17 |
18 | == GDS Operates via Cypher
19 |
20 | All of the link:https://neo4j.com/docs/graph-data-science/current/[functionality of GDS] is used by issuing cypher queries. As such, it is easily
21 | accessible via Spark, because the Neo4j Connector for Apache Spark can issue Cypher queries and read their results back. This combination means
22 | that you can use Neo4j & GDS as a graph co-processor in an existing ML workflow that you may implement in Apache Spark.
23 |
24 | == Example
25 |
26 | In the link:https://github.com/utnaf/spark-connector-notebooks[sample Zeppelin Notebook repository], there is a GDS example that can be run against
27 | a Neo4j Sandbox, showing how to use the two together.
28 |
29 | === Create a Virtual Graph in GDS Using Spark
30 |
31 | This is very simple, straightforward code; it just constructs the right Cypher statement to link:https://neo4j.com/docs/graph-data-science/current/common-usage/creating-graphs/[create a virtual graph in GDS], and returns the results.
32 |
33 | [source,python]
34 | ----
35 | %pyspark
36 | query = """
37 | CALL gds.graph.create('got-interactions', 'Person', {
38 | INTERACTS: {
39 | orientation: 'UNDIRECTED'
40 | }
41 | })
42 | YIELD graphName, nodeCount, relationshipCount, createMillis
43 | RETURN graphName, nodeCount, relationshipCount, createMillis
44 | """
45 |
46 | df = spark.read.format("org.neo4j.spark.DataSource") \
47 | .option("url", host) \
48 | .option("authentication.type", "basic") \
49 | .option("authentication.basic.username", user) \
50 | .option("authentication.basic.password", password) \
51 | .option("query", query) \
52 | .option("partitions", "1") \
53 | .load()
54 | ----
55 |
56 |
57 | [NOTE]
58 | **Ensure partitions is set to 1. You do not want to execute this query in parallel, only once.**
59 |
60 | [NOTE]
61 | **When you use stored procedures, you must include a RETURN clause**
62 |
63 | === Run a GDS Analysis and Stream the Results Back
64 |
65 | To run an analysis, the result is just another Cypher query, executed as a spark read from Neo4j.
66 |
67 | [source,python]
68 | ----
69 | %pyspark
70 |
71 | query = """
72 | CALL gds.pageRank.stream('got-interactions')
73 | YIELD nodeId, score
74 | RETURN gds.util.asNode(nodeId).name AS name, score
75 | """
76 |
77 | df = spark.read.format("org.neo4j.spark.DataSource") \
78 | .option("url", host) \
79 | .option("authentication.type", "basic") \
80 | .option("authentication.basic.username", user) \
81 | .option("authentication.basic.password", password) \
82 | .option("query", query) \
83 | .option("partitions", "1") \
84 | .load()
85 |
86 | df.show()
87 | ----
88 |
89 | [NOTE]
90 | **Ensure partitions is set to 1. The algorithm should only be executed once.**
91 |
92 | === Streaming versus Persisting GDS Results
93 |
94 | When link:https://neo4j.com/docs/graph-data-science/current/common-usage/running-algos/[running GDS algorithms] the library gives you the choice
95 | of either streaming the results of the algorithm back the caller, or mutating the underlying graph. Using GDS together with spark provides an
96 | additional option of transforming or otherwise using a GDS result. Ultimately, either modality will work with the Neo4j Connector for Apache
97 | Spark, and it is left up to your option what's best for your use case.
98 |
99 | If you have an architecture where the GDS algorithm is being run on a read replica or separate stand-alone instance, it may be convenient to stream
100 | the results back (as you cannot write them to a read replica), and then use the connector's write functionality to take that stream of results and
101 | write them back to a _different Neo4j connection_, i.e. to a regular causal cluster.
102 |
103 |
--------------------------------------------------------------------------------
/doc/javascript/tabs-for-chunked.js:
--------------------------------------------------------------------------------
1 | function tabTheSource($content) {
2 | var storedLanguage = getCodeExampleLanguage();
3 | var LANGUAGES = {
4 | 'dotnet': 'C#',
5 | 'java': 'Java',
6 | 'javascript': 'JavaScript',
7 | 'python': 'Python'
8 | };
9 | var $UL = $('');
10 | var $LI = $('');
11 | var $A = $('');
12 | var $WRAPPER = $('');
13 | var snippets = [];
14 | var languageEventElements = {};
15 |
16 | var focusSelectedExample = function(e) {
17 | var target = $(e.target);
18 | var beforeTop = target.offset().top - $(window).scrollTop();
19 | setTimeout(function(){
20 | var newTop = target.offset().top - beforeTop;
21 | $('html,body').scrollTop(newTop);
22 | }, 1);
23 | }
24 |
25 | var selectTab = function (e) {
26 | var language = $(e.target).data('lang');
27 | var $elements = languageEventElements[language];
28 | for (var j = 0; j < $elements.length; j++) {
29 | $elements[j].tab('show');
30 | }
31 | if (storageAvailable('sessionStorage')) {
32 | sessionStorage.setItem('code_example_language', language);
33 | }
34 | }
35 |
36 | $('div.tabbed-example', $content).each(function () {
37 | var $exampleBlock = $(this);
38 | var title = $exampleBlock.children('div.example-title', this).first().text();
39 | var languages = [];
40 | var $languageBlocks = {};
41 | $(this).children('div.tabbed-example-contents').children('div.listingblock,div.informalexample[class*="include-with"]').each(function () {
42 | var $this = $(this);
43 | var language = undefined;
44 | if ($this.hasClass('listingblock')) {
45 | language = $('code', this).data('lang');
46 | } else {
47 | for (var key in LANGUAGES) {
48 | if ($this.hasClass('include-with-' + key)) {
49 | language = key;
50 | break;
51 | }
52 | }
53 | }
54 | languages.push(language);
55 | $languageBlocks[language] = $(this);
56 | });
57 | if (languages.length > 1) {
58 | snippets.push({
59 | '$exampleBlock': $exampleBlock,
60 | 'languages': languages,
61 | '$languageBlocks': $languageBlocks
62 | });
63 | }
64 | });
65 |
66 | var idNum = 0;
67 | for (var ix = 0; ix < snippets.length; ix++) {
68 | var snippet = snippets[ix];
69 | var languages = snippet.languages;
70 | languages.sort();
71 | var $languageBlocks = snippet.$languageBlocks;
72 | var $exampleBlock = snippet.$exampleBlock;
73 | var idBase = 'tabbed-example-' + idNum++;
74 | var $wrapper = $WRAPPER.clone();
75 | var $ul = $UL.clone();
76 |
77 | for (var i = 0; i < languages.length; i++) {
78 | var language = languages[i];
79 | var $content = $($languageBlocks[language]);
80 | var id;
81 | if ($content.attr('id')) {
82 | id = $content.attr('id');
83 | } else {
84 | id = idBase + '-' + language;
85 | $content.attr('id', id);
86 | }
87 | $content.addClass('tab-pane').css('position', 'relative');
88 | var $li = $LI.clone();
89 | var $a = $A.clone();
90 |
91 | $a.attr('href', '#' + id).text(LANGUAGES[language]).data('lang', language).on('shown.bs.tab', selectTab).on('click', focusSelectedExample);
92 |
93 | if (language in languageEventElements) {
94 | languageEventElements[language].push($a);
95 | } else {
96 | languageEventElements[language] = [$a];
97 | }
98 | $wrapper.append($content);
99 |
100 | if (storedLanguage) {
101 | if (language === storedLanguage) {
102 | $li.addClass('active');
103 | $content.addClass('active');
104 | }
105 | } else if (i === 0) {
106 | $li.addClass('active');
107 | $content.addClass('active');
108 | }
109 |
110 | $li.append($a);
111 | $ul.append($li);
112 | }
113 | $exampleBlock.children('div.example-title', this).first().after($ul);
114 | $exampleBlock.append($wrapper);
115 | }
116 | }
117 |
118 | function storageAvailable(type) {
119 | try {
120 | var storage = window[type];
121 | var x = '__storage_test__';
122 | storage.setItem(x, x);
123 | storage.removeItem(x);
124 | return true;
125 | }
126 | catch(e) {
127 | return false;
128 | }
129 | }
130 |
131 | function getCodeExampleLanguage() {
132 | return storageAvailable('sessionStorage') ? sessionStorage.getItem('code_example_language') || false : false;
133 | }
134 |
--------------------------------------------------------------------------------
/common/src/main/scala/org/neo4j/spark/writer/BaseDataWriter.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.writer
2 |
3 | import org.apache.spark.internal.Logging
4 | import org.apache.spark.sql.SaveMode
5 | import org.apache.spark.sql.catalyst.InternalRow
6 | import org.apache.spark.sql.types.StructType
7 | import org.neo4j.driver.exceptions.{ClientException, Neo4jException, ServiceUnavailableException, SessionExpiredException}
8 | import org.neo4j.driver.{Session, Transaction, Values}
9 | import org.neo4j.spark.service._
10 | import org.neo4j.spark.util.Neo4jUtil.closeSafety
11 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, Neo4jUtil}
12 |
13 | import java.util
14 | import java.util.concurrent.CountDownLatch
15 | import scala.collection.JavaConverters._
16 |
17 | abstract class BaseDataWriter(jobId: String,
18 | partitionId: Int,
19 | structType: StructType,
20 | saveMode: SaveMode,
21 | options: Neo4jOptions,
22 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) extends Logging {
23 | private val driverCache: DriverCache = new DriverCache(options.connection, jobId)
24 |
25 | private var transaction: Transaction = _
26 | private var session: Session = _
27 |
28 | private val mappingService = new MappingService(new Neo4jWriteMappingStrategy(options), options)
29 |
30 | private val batch: util.List[java.util.Map[String, Object]] = new util.ArrayList[util.Map[String, Object]]()
31 |
32 | private val retries = new CountDownLatch(options.transactionMetadata.retries)
33 |
34 | val query: String = new Neo4jQueryService(options, new Neo4jQueryWriteStrategy(saveMode)).createQuery()
35 |
36 | def write(record: InternalRow): Unit = {
37 | batch.add(mappingService.convert(record, structType))
38 | if (batch.size() == options.transactionMetadata.batchSize) {
39 | writeBatch()
40 | }
41 | }
42 |
43 | private def writeBatch(): Unit = {
44 | try {
45 | if (session == null || !session.isOpen) {
46 | session = driverCache.getOrCreate().session(options.session.toNeo4jSession)
47 | }
48 | if (transaction == null || !transaction.isOpen) {
49 | transaction = session.beginTransaction()
50 | }
51 | log.info(
52 | s"""Writing a batch of ${batch.size()} elements to Neo4j,
53 | |for jobId=$jobId and partitionId=$partitionId
54 | |with query: $query
55 | |""".stripMargin)
56 | val result = transaction.run(query,
57 | Values.value(Map[String, AnyRef](Neo4jQueryStrategy.VARIABLE_EVENTS -> batch,
58 | Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT -> scriptResult).asJava))
59 | if (log.isDebugEnabled) {
60 | val summary = result.consume()
61 | val counters = summary.counters()
62 | log.debug(
63 | s"""Batch saved into Neo4j data with:
64 | | - nodes created: ${counters.nodesCreated()}
65 | | - nodes deleted: ${counters.nodesDeleted()}
66 | | - relationships created: ${counters.relationshipsCreated()}
67 | | - relationships deleted: ${counters.relationshipsDeleted()}
68 | | - properties set: ${counters.propertiesSet()}
69 | | - labels added: ${counters.labelsAdded()}
70 | | - labels removed: ${counters.labelsRemoved()}
71 | |""".stripMargin)
72 | }
73 | transaction.commit()
74 | closeSafety(transaction)
75 | batch.clear()
76 | } catch {
77 | case neo4jTransientException: Neo4jException =>
78 | val code = neo4jTransientException.code()
79 | if ((neo4jTransientException.isInstanceOf[SessionExpiredException] || neo4jTransientException.isInstanceOf[ServiceUnavailableException])
80 | && !(Neo4jUtil.unsupportedTransientCodes ++ options.transactionMetadata.failOnTransactionCodes).contains(code)
81 | && retries.getCount > 0) {
82 | retries.countDown()
83 | log.info(s"Matched Neo4j transient exception next retry is ${options.transactionMetadata.retries - retries.getCount}")
84 | close()
85 | writeBatch()
86 | } else {
87 | logAndThrowException(neo4jTransientException)
88 | }
89 | case e: Exception => logAndThrowException(e)
90 | }
91 | Unit
92 | }
93 |
94 | private def logAndThrowException(e: Exception): Unit = {
95 | if (e.isInstanceOf[ClientException]) {
96 | log.error(s"Cannot commit the transaction because: ${e.getMessage}")
97 | }
98 | else {
99 | log.error("Cannot commit the transaction because the following exception", e)
100 | }
101 |
102 | throw e
103 | }
104 |
105 | def commit(): Null = {
106 | writeBatch()
107 | close()
108 | null
109 | }
110 |
111 | def abort(): Unit = {
112 | if (transaction != null && transaction.isOpen) {
113 | try {
114 | transaction.rollback()
115 | } catch {
116 | case e: Throwable => log.warn("Cannot rollback the transaction because of the following exception", e)
117 | }
118 | }
119 | close()
120 | Unit
121 | }
122 |
123 | protected def close(): Unit = {
124 | closeSafety(transaction, log)
125 | closeSafety(session, log)
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/faq.adoc:
--------------------------------------------------------------------------------
1 | [#faq]
2 | = Neo4j Connector for Apache Spark FAQ
3 |
4 | == How can I speed up writes to Neo4j?
5 |
6 | The Spark connector fundamentally writes data to Neo4j in batches. Neo4j is a transactional
7 | database, and so all modifications are made within a transaction. Those transactions in turn
8 | have overhead.
9 |
10 | The two simplest ways of increasing write performance are:
11 | * Increase the batch size (option `batch.size`). The larger the batch, the fewer transactions are executed to write all of your data, and the less transactional overhead is incurred.
12 | * Ensure that your Neo4j instance has ample free heap & properly sized page cache. Small heaps will make you unable to commit large batches, which in turn will slow overall import
13 |
14 | [NOTE]
15 | For best performance, make sure you are familiar with the material in the link:https://neo4j.com/developer/guide-performance-tuning/[Neo4j Performance Tuning Guide]
16 |
17 | It is important to keep in mind that Neo4j scales writes vertically and reads horizontally. In
18 | the link:https://neo4j.com/docs/operations-manual/current/clustering/introduction/[Causal Cluster Model], only the cluster leader (1 machine) may accept writes. For this reason, focus on getting the best hardware & performance on your cluster leader to maximize write throughput.
19 |
20 | == Where can I get help?
21 |
22 | link:https://community.neo4j.com/[The Neo4j Community] site is a great place to go to ask questions, and talk with other users who use the connector and get help from Neo4j pros.
23 |
24 | == What is the license for this connector?
25 |
26 | The source code is offered under the terms of the Apache 2.0 open source license. You are free
27 | to download, modify, and redistribute the connector; however Neo4j support will apply only to official builds provided by Neo4j.
28 |
29 | == Is this software connected to Morpheus or Cypher for Apache Spark (CAPS)?
30 |
31 | No. There is no shared code or approach between the two, and they take very different approaches. Cypher for Apache Spark/Morpheus took the approach of providing an interpreter
32 | that could execute Cypher queries within the Spark environment, and provided a native graph representation for Spark. By contrast, this connector does not provide that
33 | functionality, and focuses on doing reads and writes back and forth between Neo4j & Spark. Via this connector, all Cypher code is executed strictly within Neo4j. The spark
34 | environment operates in terms of DataFrames as it always did, and this connector does not provide graph API primitives for Spark.
35 |
36 | == Can this connector be used for pre-processing of data and loading into Neo4j?
37 |
38 | Yes. This connector enables spark to be used as a good method of loading data directly into Neo4j. See link:architecture.adoc[the architecture section] for a detailed discussion of
39 | "Normalized Loading" vs. "Cypher Destructuring" and guidance on different approaches for how to do performant data loads into Neo4j.
40 |
41 | == My writes are failing due to Deadlock Exceptions
42 |
43 | In some cases, Neo4j will reject write transactions due to a deadlock exception that you may see in the stacktrace.
44 |
45 | link:https://neo4j.com/developer/kb/explanation-of-error-deadlockdetectedexception-forseticlient-0-cant-acquire-exclusivelock/[This Neo4j Knowledge Base entry] describes the issue.
46 |
47 | Typically this is caused by too much parallelism in writing to Neo4j. For example, when you
48 | write a relationship `(:A)-[:REL]->(:B)`, this creates a "lock" in the database on both nodes.
49 | If some simultaneous other thread is attempting to write to those nodes too often, deadlock
50 | exceptions can result and a transaction will fail.
51 |
52 | In general, the solution is to repartition the dataframe prior to writing it to Neo4j, to avoid
53 | multiple partitioned writes from locking the same nodes & relationships.
54 |
55 | == I'm getting a cast error like UTF8String cannot be cast to Long. How do I solve it?
56 |
57 | You might be getting error like:
58 |
59 | ```
60 | java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Long
61 | ```
62 |
63 | or similar, with different types.
64 |
65 | This is typically due to a field having different types on the same nodes label.
66 | You can solve it by adding APOC to your Neo4j installation; this will remove the error but
67 | all the values for that field will be casted to String. This because Spark is not schema free,
68 | and need each column to always have the same type.
69 |
70 | You can read more <>.
71 |
72 | == The returned columns are not in the same order as I specified in the query
73 |
74 | Unfortunately this is a known issue and is there for Neo4j 3.* and Neo4j 4.0.
75 | With Neo4j 4.1+ you will get the same order as specified in the return statement.
76 |
77 |
78 | == TableProvider implementation org.neo4j.spark.DataSource cannot be written with ErrorIfExists mode, please use Append or Overwrite modes instead.
79 |
80 | If you are getting this error while trying to write to Neo4j be aware that the current version of the connector
81 | doesn't support *SaveMode.ErrorIfExists* on Spark 3.0,
82 | and that is the default save mode.
83 | So please, change the save mode to one of `SaveMode.Append` or `SaveMode.Overwrite`.
84 |
85 | We are working to fully support all the Save Mode on Spark 3.0.
--------------------------------------------------------------------------------
/.mvn/wrapper/MavenWrapperDownloader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2007-present the original author or authors.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 | import java.net.*;
17 | import java.io.*;
18 | import java.nio.channels.*;
19 | import java.util.Properties;
20 |
21 | public class MavenWrapperDownloader {
22 |
23 | private static final String WRAPPER_VERSION = "0.5.6";
24 | /**
25 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
26 | */
27 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
28 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
29 |
30 | /**
31 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
32 | * use instead of the default one.
33 | */
34 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
35 | ".mvn/wrapper/maven-wrapper.properties";
36 |
37 | /**
38 | * Path where the maven-wrapper.jar will be saved to.
39 | */
40 | private static final String MAVEN_WRAPPER_JAR_PATH =
41 | ".mvn/wrapper/maven-wrapper.jar";
42 |
43 | /**
44 | * Name of the property which should be used to override the default download url for the wrapper.
45 | */
46 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
47 |
48 | public static void main(String args[]) {
49 | System.out.println("- Downloader started");
50 | File baseDirectory = new File(args[0]);
51 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
52 |
53 | // If the maven-wrapper.properties exists, read it and check if it contains a custom
54 | // wrapperUrl parameter.
55 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
56 | String url = DEFAULT_DOWNLOAD_URL;
57 | if(mavenWrapperPropertyFile.exists()) {
58 | FileInputStream mavenWrapperPropertyFileInputStream = null;
59 | try {
60 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
61 | Properties mavenWrapperProperties = new Properties();
62 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
63 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
64 | } catch (IOException e) {
65 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
66 | } finally {
67 | try {
68 | if(mavenWrapperPropertyFileInputStream != null) {
69 | mavenWrapperPropertyFileInputStream.close();
70 | }
71 | } catch (IOException e) {
72 | // Ignore ...
73 | }
74 | }
75 | }
76 | System.out.println("- Downloading from: " + url);
77 |
78 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
79 | if(!outputFile.getParentFile().exists()) {
80 | if(!outputFile.getParentFile().mkdirs()) {
81 | System.out.println(
82 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
83 | }
84 | }
85 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
86 | try {
87 | downloadFileFromURL(url, outputFile);
88 | System.out.println("Done");
89 | System.exit(0);
90 | } catch (Throwable e) {
91 | System.out.println("- Error downloading");
92 | e.printStackTrace();
93 | System.exit(1);
94 | }
95 | }
96 |
97 | private static void downloadFileFromURL(String urlString, File destination) throws Exception {
98 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
99 | String username = System.getenv("MVNW_USERNAME");
100 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
101 | Authenticator.setDefault(new Authenticator() {
102 | @Override
103 | protected PasswordAuthentication getPasswordAuthentication() {
104 | return new PasswordAuthentication(username, password);
105 | }
106 | });
107 | }
108 | URL website = new URL(urlString);
109 | ReadableByteChannel rbc;
110 | rbc = Channels.newChannel(website.openStream());
111 | FileOutputStream fos = new FileOutputStream(destination);
112 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
113 | fos.close();
114 | rbc.close();
115 | }
116 |
117 | }
118 |
--------------------------------------------------------------------------------
/test-support/src/main/java/org/neo4j/spark/Assert.java:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark;//
2 | // Source code recreated from a .class file by IntelliJ IDEA
3 | // (powered by Fernflower decompiler)
4 | //
5 |
6 | import java.util.Arrays;
7 | import java.util.concurrent.TimeUnit;
8 | import java.util.function.Function;
9 | import java.util.function.Supplier;
10 |
11 | import org.hamcrest.Description;
12 | import org.hamcrest.Matcher;
13 | import org.hamcrest.MatcherAssert;
14 | import org.hamcrest.StringDescription;
15 | import org.hamcrest.core.StringContains;
16 |
17 | public final class Assert {
18 | private Assert() {
19 | }
20 |
21 | public interface ThrowingSupplier {
22 | T get() throws E;
23 |
24 | static ThrowingSupplier throwingSupplier(final Supplier supplier) {
25 | return new ThrowingSupplier() {
26 | public TYPE get() {
27 | return supplier.get();
28 | }
29 |
30 | public String toString() {
31 | return supplier.toString();
32 | }
33 | };
34 | }
35 | }
36 |
37 | public interface ThrowingAction {
38 | void apply() throws E;
39 |
40 | static ThrowingAction noop() {
41 | return () -> {
42 | };
43 | }
44 | }
45 |
46 | public static void assertException(ThrowingAction f, Class> typeOfException) {
47 | assertException(f, typeOfException, (String) null);
48 | }
49 |
50 | public static void assertException(ThrowingAction f, Class> typeOfException, String partOfErrorMessage) {
51 | try {
52 | f.apply();
53 | org.junit.Assert.fail("Expected exception of type " + typeOfException + ", but no exception was thrown");
54 | } catch (Exception var4) {
55 | if (typeOfException.isInstance(var4)) {
56 | if (partOfErrorMessage != null) {
57 | MatcherAssert.assertThat(var4.getMessage(), StringContains.containsString(partOfErrorMessage));
58 | }
59 | } else {
60 | org.junit.Assert.fail("Got unexpected exception " + var4.getClass() + "\nExpected: " + typeOfException);
61 | }
62 | }
63 |
64 | }
65 |
66 | public static void assertEventually(ThrowingSupplier actual, Matcher super T> matcher, long timeout, TimeUnit timeUnit) throws E, InterruptedException {
67 | assertEventually((ignored) -> {
68 | return "";
69 | }, actual, matcher, timeout, timeUnit);
70 | }
71 |
72 | public static void assertEventually(String reason, ThrowingSupplier actual, Matcher super T> matcher, long timeout, TimeUnit timeUnit) throws E, InterruptedException {
73 | assertEventually((ignored) -> {
74 | return reason;
75 | }, actual, matcher, timeout, timeUnit);
76 | }
77 |
78 | public static void assertEventually(Function reason, ThrowingSupplier actual, Matcher super T> matcher, long timeout, TimeUnit timeUnit) throws E, InterruptedException {
79 | long endTimeMillis = System.currentTimeMillis() + timeUnit.toMillis(timeout);
80 |
81 | while (true) {
82 | long sampleTime = System.currentTimeMillis();
83 | T last = actual.get();
84 | boolean matched = matcher.matches(last);
85 | if (matched || sampleTime > endTimeMillis) {
86 | if (!matched) {
87 | Description description = new StringDescription();
88 | description.appendText((String) reason.apply(last)).appendText("\nExpected: ").appendDescriptionOf(matcher).appendText("\n but: ");
89 | matcher.describeMismatch(last, description);
90 | throw new AssertionError("Timeout hit (" + timeout + " " + timeUnit.toString().toLowerCase() + ") while waiting for condition to match: " + description.toString());
91 | } else {
92 | return;
93 | }
94 | }
95 |
96 | Thread.sleep(100L);
97 | }
98 | }
99 |
100 | private static AssertionError newAssertionError(String message, Object expected, Object actual) {
101 | return new AssertionError((message != null && !message.isEmpty() ? message + "\n" : "") + "Expected: " + prettyPrint(expected) + ", actual: " + prettyPrint(actual));
102 | }
103 |
104 | private static String prettyPrint(Object o) {
105 | if (o == null) {
106 | return "null";
107 | }
108 |
109 | Class> clazz = o.getClass();
110 | if (clazz.isArray()) {
111 | if (clazz == byte[].class) {
112 | return Arrays.toString((byte[]) o);
113 | } else if (clazz == short[].class) {
114 | return Arrays.toString((short[]) o);
115 | } else if (clazz == int[].class) {
116 | return Arrays.toString((int[]) o);
117 | } else if (clazz == long[].class) {
118 | return Arrays.toString((long[]) o);
119 | } else if (clazz == float[].class) {
120 | return Arrays.toString((float[]) o);
121 | } else if (clazz == double[].class) {
122 | return Arrays.toString((double[]) o);
123 | } else if (clazz == char[].class) {
124 | return Arrays.toString((char[]) o);
125 | } else if (clazz == boolean[].class) {
126 | return Arrays.toString((boolean[]) o);
127 | } else {
128 | return Arrays.deepToString((Object[]) o);
129 | }
130 | } else {
131 | return String.valueOf(o);
132 | }
133 | }
134 | }
135 |
136 |
--------------------------------------------------------------------------------
/doc/docs/modules/ROOT/pages/configuration.adoc:
--------------------------------------------------------------------------------
1 | [#options]
2 | = Connector Options & Configuration
3 |
4 | When using the connector, any valid Neo4j driver option can be set using the `option` method in
5 | Spark, like so:
6 |
7 | [source,scala]
8 | ----
9 | import org.apache.spark.sql.{SaveMode, SparkSession}
10 |
11 | val spark = SparkSession.builder().getOrCreate()
12 |
13 | val df = spark.read.format("org.neo4j.spark.DataSource")
14 | .option("url", "bolt://localhost:7687")
15 | .option("authentication.type", "basic")
16 | .option("authentication.basic.username", "myuser")
17 | .option("authentication.basic.password", "neo4jpassword")
18 | .option("labels", "Person")
19 | .load()
20 | ----
21 |
22 | == Neo4j Driver Options
23 |
24 | Under the covers, the spark connector uses the link:https://neo4j.com/docs/driver-manual/current/get-started/#driver-get-started-about[official Neo4j Java Driver]. As such, in many situations you'll want the control to set driver options to account for your production deployment of Neo4j, and how to communicate with it. This is done using the `options` example above.
25 |
26 | The following table captures the most common configuration settings to use with the Neo4j driver. For full
27 | documentation on all possible configuration options for Neo4j drivers, see the link:https://neo4j.com/docs/driver-manual/current/client-applications/#driver-configuration[Neo4j Drivers Manual].
28 |
29 | .List of available options
30 | |===
31 | |Setting Name |Description |Default Value |Required
32 |
33 | 4+|*Driver Options*
34 |
35 | |`url`
36 | |The url of the Neo4j instance to connect to
37 | |_(none)_
38 | |Yes
39 |
40 | |`authentication.type`
41 | |The authentication method to be used: `none`, `basic`, `kerberos`, `custom`.
42 | More info link:https://neo4j.com/docs/driver-manual/4.1/client-applications/#driver-authentication[here, window=_blank]
43 | |`basic`
44 | |No
45 |
46 | |`authentication.basic.username`
47 | |Username to use for basic authentication type
48 | |_(Neo4j Driver default)_
49 | |No
50 |
51 | |`authentication.basic.password`
52 | |Username to use for basic authentication type
53 | |_(Neo4j Driver default)_
54 | |No
55 |
56 | |`authentication.kerberos.ticket`
57 | |Kerberos Auth Ticket
58 | |_(Neo4j Driver default)_
59 | |No
60 |
61 | |`authentication.custom.principal`
62 | |This used to identify who this token represents
63 | |_(Neo4j Driver default)_
64 | |No
65 |
66 | |`authentication.custom.credentials`
67 | |These are the credentials authenticating the principal
68 | |_(Neo4j Driver default)_
69 | |No
70 |
71 | |`authentication.custom.realm`
72 | |This is the "realm:" string, specifying the authentication provider
73 | |_(Neo4j Driver default)_
74 | |No
75 |
76 | |`encryption.enabled`
77 | |Specify if encryption should be enabled.
78 | This setting is ignored if you use a URI scheme with +s or +ssc
79 | |`false`
80 | |No
81 |
82 | |`encryption.trust.strategy`
83 | |Set certificate trust strategy, is ignored in case the connection URI uses `+s` or `+ssc` as suffix.
84 | Available values are: `TRUST_SYSTEM_CA_SIGNED_CERTIFICATES`, `TRUST_CUSTOM_CA_SIGNED_CERTIFICATES`, `TRUST_ALL_CERTIFICATES`
85 | |_(Neo4j Driver default)_
86 | |No
87 |
88 | |`encryption.ca.certificate.path`
89 | |Set certificate path for `TRUST_CUSTOM_CA_SIGNED_CERTIFICATES` trust strategy
90 | |_(Neo4j Driver default)_
91 | |No
92 |
93 | |`connection.max.lifetime.msecs`
94 | |Connection lifetime in milliseconds
95 | |_(Neo4j Driver default)_
96 | |No
97 |
98 | |`connection.liveness.timeout.msecs`
99 | |Liveness check timeout in milliseconds
100 | |_(Neo4j Driver default)_
101 | |No
102 |
103 | |`connection.acquisition.timeout.msecs`
104 | |Connection acquisition timeout in milliseconds
105 | |_(Neo4j Driver default)_
106 | |No
107 |
108 | |`connection.timeout.msecs`
109 | |Connection timeout in milliseconds
110 | |_(Neo4j Driver default)_
111 | |No
112 |
113 | 4+|*Session Options*
114 |
115 | |`database`
116 | |Database name to connect to.
117 | As the driver allows to define the database in the URL,
118 | in case you set this option will have the priority compared to the one defined in the URL
119 | |_(Neo4j Driver default)
120 | |No
121 |
122 | |`access.mode`
123 | |Possible values are: `read`, `write`.
124 | Used only while you're pulling data from Neo4j.
125 | In case of `read` the connector, in a cluster environment,
126 | will route the requests to the followers, otherwise to the leader.
127 | |`read`
128 | |No
129 | |===
130 |
131 | == Multiple connections
132 |
133 | Neo4j Connector for Apache Spark allows you to use more connections in a single Spark Session.
134 | For example, you can read data from a database and write them in another database in the same session.
135 |
136 | .Reading from a database and writing to a different one
137 | [source,scala]
138 | ----
139 | import org.apache.spark.sql.{SaveMode, SparkSession}
140 |
141 | val spark = SparkSession.builder().getOrCreate()
142 |
143 | val df = spark.read.format("org.neo4j.spark.DataSource")
144 | .option("url", "bolt://first.host.com:7687")
145 | .option("labels", "Person")
146 | .load()
147 |
148 | df.write.format("org.neo4j.spark.DataSource")
149 | .mode(SaveMode.ErrorIfExists)
150 | .option("url", "bolt://second.host.com:7687")
151 | .option("labels", "Person")
152 | .save()
153 | ----
154 |
155 | Another case to use multiple connections is when you want to merge two datasources.
156 |
157 | .Merge data between two databases
158 | [source,scala]
159 | ----
160 | import org.apache.spark.sql.{SaveMode, SparkSession}
161 |
162 | val spark = SparkSession.builder().getOrCreate()
163 |
164 | val dfOne = spark.read.format("org.neo4j.spark.DataSource")
165 | .option("url", "bolt://first.host.com:7687")
166 | .option("labels", "Person")
167 | .load()
168 |
169 | val dfTwo = spark.read.format("org.neo4j.spark.DataSource")
170 | .option("url", "bolt://second.host.com:7687")
171 | .option("labels", "Person")
172 | .load()
173 |
174 | val dfJoin = dfOne.join(dfTwo, dfOne("name") === dfTwo("name"))
175 | ----
--------------------------------------------------------------------------------
/doc/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Attempt to set APP_HOME
10 | # Resolve links: $0 may be a link
11 | PRG="$0"
12 | # Need this for relative symlinks.
13 | while [ -h "$PRG" ] ; do
14 | ls=`ls -ld "$PRG"`
15 | link=`expr "$ls" : '.*-> \(.*\)$'`
16 | if expr "$link" : '/.*' > /dev/null; then
17 | PRG="$link"
18 | else
19 | PRG=`dirname "$PRG"`"/$link"
20 | fi
21 | done
22 | SAVED="`pwd`"
23 | cd "`dirname \"$PRG\"`/" >/dev/null
24 | APP_HOME="`pwd -P`"
25 | cd "$SAVED" >/dev/null
26 |
27 | APP_NAME="Gradle"
28 | APP_BASE_NAME=`basename "$0"`
29 |
30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31 | DEFAULT_JVM_OPTS=""
32 |
33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
34 | MAX_FD="maximum"
35 |
36 | warn ( ) {
37 | echo "$*"
38 | }
39 |
40 | die ( ) {
41 | echo
42 | echo "$*"
43 | echo
44 | exit 1
45 | }
46 |
47 | # OS specific support (must be 'true' or 'false').
48 | cygwin=false
49 | msys=false
50 | darwin=false
51 | nonstop=false
52 | case "`uname`" in
53 | CYGWIN* )
54 | cygwin=true
55 | ;;
56 | Darwin* )
57 | darwin=true
58 | ;;
59 | MINGW* )
60 | msys=true
61 | ;;
62 | NONSTOP* )
63 | nonstop=true
64 | ;;
65 | esac
66 |
67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68 |
69 | # Determine the Java command to use to start the JVM.
70 | if [ -n "$JAVA_HOME" ] ; then
71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72 | # IBM's JDK on AIX uses strange locations for the executables
73 | JAVACMD="$JAVA_HOME/jre/sh/java"
74 | else
75 | JAVACMD="$JAVA_HOME/bin/java"
76 | fi
77 | if [ ! -x "$JAVACMD" ] ; then
78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79 |
80 | Please set the JAVA_HOME variable in your environment to match the
81 | location of your Java installation."
82 | fi
83 | else
84 | JAVACMD="java"
85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86 |
87 | Please set the JAVA_HOME variable in your environment to match the
88 | location of your Java installation."
89 | fi
90 |
91 | # Increase the maximum file descriptors if we can.
92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93 | MAX_FD_LIMIT=`ulimit -H -n`
94 | if [ $? -eq 0 ] ; then
95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96 | MAX_FD="$MAX_FD_LIMIT"
97 | fi
98 | ulimit -n $MAX_FD
99 | if [ $? -ne 0 ] ; then
100 | warn "Could not set maximum file descriptor limit: $MAX_FD"
101 | fi
102 | else
103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 | fi
105 | fi
106 |
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 |
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 | JAVACMD=`cygpath --unix "$JAVACMD"`
117 |
118 | # We build the pattern for arguments to be converted via cygpath
119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 | SEP=""
121 | for dir in $ROOTDIRSRAW ; do
122 | ROOTDIRS="$ROOTDIRS$SEP$dir"
123 | SEP="|"
124 | done
125 | OURCYGPATTERN="(^($ROOTDIRS))"
126 | # Add a user-defined pattern to the cygpath arguments
127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 | fi
130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 | i=0
132 | for arg in "$@" ; do
133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135 |
136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 | else
139 | eval `echo args$i`="\"$arg\""
140 | fi
141 | i=$((i+1))
142 | done
143 | case $i in
144 | (0) set -- ;;
145 | (1) set -- "$args0" ;;
146 | (2) set -- "$args0" "$args1" ;;
147 | (3) set -- "$args0" "$args1" "$args2" ;;
148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 | esac
155 | fi
156 |
157 | # Escape application args
158 | save ( ) {
159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 | echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 |
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 |
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 | cd "$(dirname "$0")"
170 | fi
171 |
172 | exec "$JAVACMD" "$@"
173 |
--------------------------------------------------------------------------------
/mvnw.cmd:
--------------------------------------------------------------------------------
1 | @REM ----------------------------------------------------------------------------
2 | @REM Licensed to the Apache Software Foundation (ASF) under one
3 | @REM or more contributor license agreements. See the NOTICE file
4 | @REM distributed with this work for additional information
5 | @REM regarding copyright ownership. The ASF licenses this file
6 | @REM to you under the Apache License, Version 2.0 (the
7 | @REM "License"); you may not use this file except in compliance
8 | @REM with the License. You may obtain a copy of the License at
9 | @REM
10 | @REM http://www.apache.org/licenses/LICENSE-2.0
11 | @REM
12 | @REM Unless required by applicable law or agreed to in writing,
13 | @REM software distributed under the License is distributed on an
14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | @REM KIND, either express or implied. See the License for the
16 | @REM specific language governing permissions and limitations
17 | @REM under the License.
18 | @REM ----------------------------------------------------------------------------
19 |
20 | @REM ----------------------------------------------------------------------------
21 | @REM Maven Start Up Batch script
22 | @REM
23 | @REM Required ENV vars:
24 | @REM JAVA_HOME - location of a JDK home dir
25 | @REM
26 | @REM Optional ENV vars
27 | @REM M2_HOME - location of maven2's installed home dir
28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
31 | @REM e.g. to debug Maven itself, use
32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
34 | @REM ----------------------------------------------------------------------------
35 |
36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
37 | @echo off
38 | @REM set title of command window
39 | title %0
40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
42 |
43 | @REM set %HOME% to equivalent of $HOME
44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
45 |
46 | @REM Execute a user defined script before this one
47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending
49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
51 | :skipRcPre
52 |
53 | @setlocal
54 |
55 | set ERROR_CODE=0
56 |
57 | @REM To isolate internal variables from possible post scripts, we use another setlocal
58 | @setlocal
59 |
60 | @REM ==== START VALIDATION ====
61 | if not "%JAVA_HOME%" == "" goto OkJHome
62 |
63 | echo.
64 | echo Error: JAVA_HOME not found in your environment. >&2
65 | echo Please set the JAVA_HOME variable in your environment to match the >&2
66 | echo location of your Java installation. >&2
67 | echo.
68 | goto error
69 |
70 | :OkJHome
71 | if exist "%JAVA_HOME%\bin\java.exe" goto init
72 |
73 | echo.
74 | echo Error: JAVA_HOME is set to an invalid directory. >&2
75 | echo JAVA_HOME = "%JAVA_HOME%" >&2
76 | echo Please set the JAVA_HOME variable in your environment to match the >&2
77 | echo location of your Java installation. >&2
78 | echo.
79 | goto error
80 |
81 | @REM ==== END VALIDATION ====
82 |
83 | :init
84 |
85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
86 | @REM Fallback to current working directory if not found.
87 |
88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
90 |
91 | set EXEC_DIR=%CD%
92 | set WDIR=%EXEC_DIR%
93 | :findBaseDir
94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound
95 | cd ..
96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound
97 | set WDIR=%CD%
98 | goto findBaseDir
99 |
100 | :baseDirFound
101 | set MAVEN_PROJECTBASEDIR=%WDIR%
102 | cd "%EXEC_DIR%"
103 | goto endDetectBaseDir
104 |
105 | :baseDirNotFound
106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
107 | cd "%EXEC_DIR%"
108 |
109 | :endDetectBaseDir
110 |
111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
112 |
113 | @setlocal EnableExtensions EnableDelayedExpansion
114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
116 |
117 | :endReadAdditionalConfig
118 |
119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
122 |
123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
124 |
125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
127 | )
128 |
129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data.
131 | if exist %WRAPPER_JAR% (
132 | if "%MVNW_VERBOSE%" == "true" (
133 | echo Found %WRAPPER_JAR%
134 | )
135 | ) else (
136 | if not "%MVNW_REPOURL%" == "" (
137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
138 | )
139 | if "%MVNW_VERBOSE%" == "true" (
140 | echo Couldn't find %WRAPPER_JAR%, downloading it ...
141 | echo Downloading from: %DOWNLOAD_URL%
142 | )
143 |
144 | powershell -Command "&{"^
145 | "$webclient = new-object System.Net.WebClient;"^
146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
148 | "}"^
149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
150 | "}"
151 | if "%MVNW_VERBOSE%" == "true" (
152 | echo Finished downloading %WRAPPER_JAR%
153 | )
154 | )
155 | @REM End of extension
156 |
157 | @REM Provide a "standardized" way to retrieve the CLI args that will
158 | @REM work with both Windows and non-Windows executions.
159 | set MAVEN_CMD_LINE_ARGS=%*
160 |
161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
162 | if ERRORLEVEL 1 goto error
163 | goto end
164 |
165 | :error
166 | set ERROR_CODE=1
167 |
168 | :end
169 | @endlocal & set ERROR_CODE=%ERROR_CODE%
170 |
171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending
173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
175 | :skipRcPost
176 |
177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause
179 |
180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
181 |
182 | exit /B %ERROR_CODE%
183 |
--------------------------------------------------------------------------------
/common/src/test/scala/org/neo4j/spark/util/Neo4jOptionsTest.scala:
--------------------------------------------------------------------------------
1 | package org.neo4j.spark.util
2 |
3 | import org.junit.Assert._
4 | import org.junit.Test
5 | import org.neo4j.driver.AccessMode
6 |
7 | import scala.collection.JavaConverters._
8 |
9 | class Neo4jOptionsTest {
10 |
11 | import org.junit.Rule
12 | import org.junit.rules.ExpectedException
13 |
14 | val _expectedException: ExpectedException = ExpectedException.none
15 |
16 | @Rule
17 | def exceptionRule: ExpectedException = _expectedException
18 |
19 | @Test
20 | def testUrlIsRequired(): Unit = {
21 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
22 | options.put(QueryType.QUERY.toString.toLowerCase, "Person")
23 |
24 | _expectedException.expect(classOf[IllegalArgumentException])
25 | _expectedException.expectMessage("Parameter 'url' is required")
26 |
27 | new Neo4jOptions(options)
28 | }
29 |
30 | @Test
31 | def testRelationshipTableName(): Unit = {
32 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
33 | options.put(Neo4jOptions.URL, "bolt://localhost")
34 | options.put(QueryType.RELATIONSHIP.toString.toLowerCase, "KNOWS")
35 | options.put(Neo4jOptions.RELATIONSHIP_SOURCE_LABELS, "Person")
36 | options.put(Neo4jOptions.RELATIONSHIP_TARGET_LABELS, "Answer")
37 |
38 | val neo4jOptions = new Neo4jOptions(options)
39 |
40 | assertEquals("table_Person_KNOWS_Answer", neo4jOptions.getTableName)
41 | }
42 |
43 | @Test
44 | def testLabelsTableName(): Unit = {
45 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
46 | options.put(Neo4jOptions.URL, "bolt://localhost")
47 | options.put("labels", "Person:Admin")
48 |
49 | val neo4jOptions = new Neo4jOptions(options)
50 |
51 | assertEquals("table_Person-Admin", neo4jOptions.getTableName)
52 | }
53 |
54 | @Test
55 | def testRelationshipNodeModesAreCaseInsensitive(): Unit = {
56 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
57 | options.put(Neo4jOptions.URL, "bolt://localhost")
58 | options.put(QueryType.RELATIONSHIP.toString.toLowerCase, "KNOWS")
59 | options.put(Neo4jOptions.RELATIONSHIP_SAVE_STRATEGY, "nAtIve")
60 | options.put(Neo4jOptions.RELATIONSHIP_SOURCE_SAVE_MODE, "Errorifexists")
61 | options.put(Neo4jOptions.RELATIONSHIP_TARGET_SAVE_MODE, "overwrite")
62 |
63 | val neo4jOptions = new Neo4jOptions(options)
64 |
65 | assertEquals(RelationshipSaveStrategy.NATIVE, neo4jOptions.relationshipMetadata.saveStrategy)
66 | assertEquals(NodeSaveMode.ErrorIfExists, neo4jOptions.relationshipMetadata.sourceSaveMode)
67 | assertEquals(NodeSaveMode.Overwrite, neo4jOptions.relationshipMetadata.targetSaveMode)
68 | }
69 |
70 | @Test
71 | def testRelationshipWriteStrategyIsNotPresentShouldThrowException(): Unit = {
72 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
73 | options.put(Neo4jOptions.URL, "bolt://localhost")
74 | options.put(QueryType.LABELS.toString.toLowerCase, "PERSON")
75 | options.put("relationship.save.strategy", "nope")
76 |
77 | _expectedException.expect(classOf[NoSuchElementException])
78 | _expectedException.expectMessage("No value found for 'NOPE'")
79 |
80 | new Neo4jOptions(options)
81 | }
82 |
83 | @Test
84 | def testQueryShouldHaveQueryType(): Unit = {
85 | val query: String = "MATCH n RETURN n"
86 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
87 | options.put(Neo4jOptions.URL, "bolt://localhost")
88 | options.put(QueryType.QUERY.toString.toLowerCase, query)
89 |
90 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
91 |
92 | assertEquals(QueryType.QUERY, neo4jOptions.query.queryType)
93 | assertEquals(query, neo4jOptions.query.value)
94 | }
95 |
96 | @Test
97 | def testNodeShouldHaveLabelType(): Unit = {
98 | val label: String = "Person"
99 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
100 | options.put(Neo4jOptions.URL, "bolt://localhost")
101 | options.put(QueryType.LABELS.toString.toLowerCase, label)
102 |
103 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
104 |
105 | assertEquals(QueryType.LABELS, neo4jOptions.query.queryType)
106 | assertEquals(label, neo4jOptions.query.value)
107 | }
108 |
109 | @Test
110 | def testRelationshipShouldHaveRelationshipType(): Unit = {
111 | val relationship: String = "KNOWS"
112 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
113 | options.put(Neo4jOptions.URL, "bolt://localhost")
114 | options.put(QueryType.LABELS.toString.toLowerCase, relationship)
115 |
116 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
117 |
118 | assertEquals(QueryType.LABELS, neo4jOptions.query.queryType)
119 | assertEquals(relationship, neo4jOptions.query.value)
120 | }
121 |
122 | @Test
123 | def testPushDownColumnIsDisabled(): Unit = {
124 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
125 | options.put(Neo4jOptions.URL, "bolt://localhost")
126 | options.put("pushdown.columns.enabled", "false")
127 |
128 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
129 |
130 | assertFalse(neo4jOptions.pushdownColumnsEnabled)
131 | }
132 |
133 | @Test
134 | def testDriverDefaults(): Unit = {
135 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
136 | options.put(Neo4jOptions.URL, "bolt://localhost")
137 | options.put(QueryType.QUERY.toString.toLowerCase, "MATCH n RETURN n")
138 |
139 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
140 |
141 | assertEquals("", neo4jOptions.session.database)
142 | assertEquals(AccessMode.READ, neo4jOptions.session.accessMode)
143 |
144 | assertEquals("basic", neo4jOptions.connection.auth)
145 | assertEquals("", neo4jOptions.connection.username)
146 | assertEquals("", neo4jOptions.connection.password)
147 | assertEquals(false, neo4jOptions.connection.encryption)
148 |
149 | assertEquals(None, neo4jOptions.connection.trustStrategy)
150 |
151 | assertEquals("", neo4jOptions.connection.certificatePath)
152 | assertEquals("", neo4jOptions.connection.ticket)
153 | assertEquals("", neo4jOptions.connection.principal)
154 | assertEquals("", neo4jOptions.connection.credentials)
155 | assertEquals("", neo4jOptions.connection.realm)
156 | assertEquals("", neo4jOptions.connection.schema)
157 |
158 | assertEquals(-1, neo4jOptions.connection.lifetime)
159 | assertEquals(-1, neo4jOptions.connection.acquisitionTimeout)
160 | assertEquals(-1, neo4jOptions.connection.connectionTimeout)
161 | assertEquals(-1, neo4jOptions.connection.livenessCheckTimeout)
162 | assertEquals(RelationshipSaveStrategy.NATIVE, neo4jOptions.relationshipMetadata.saveStrategy)
163 |
164 | assertTrue(neo4jOptions.pushdownFiltersEnabled)
165 | }
166 |
167 | @Test
168 | def testApocConfiguration(): Unit = {
169 | val options: java.util.Map[String, String] = new java.util.HashMap[String, String]()
170 | options.put("apoc.meta.nodeTypeProperties", """{"nodeLabels": ["Label"], "mandatory": false}""")
171 | options.put(Neo4jOptions.URL, "bolt://localhost")
172 |
173 | val neo4jOptions: Neo4jOptions = new Neo4jOptions(options)
174 |
175 | val expected = Map("apoc.meta.nodeTypeProperties"-> Map(
176 | "nodeLabels" -> Seq("Label").asJava,
177 | "mandatory" -> false
178 | ))
179 |
180 | assertEquals(neo4jOptions.apocConfig.procedureConfigMap, expected)
181 | }
182 | }
183 |
--------------------------------------------------------------------------------