├── common ├── src │ ├── main │ │ ├── resources │ │ │ ├── neo4j-spark-connector.properties │ │ │ └── META-INF.services │ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ │ └── org │ │ │ └── neo4j │ │ │ └── spark │ │ │ ├── util │ │ │ ├── ValidationUtil.scala │ │ │ ├── DriverCache.scala │ │ │ └── Neo4jImplicits.scala │ │ │ ├── reader │ │ │ └── BasePartitionReader.scala │ │ │ └── writer │ │ │ └── BaseDataWriter.scala │ └── test │ │ ├── resources │ │ └── neo4j-spark-connector.properties │ │ └── scala │ │ └── org │ │ └── neo4j │ │ └── spark │ │ ├── util │ │ ├── Neo4jUtilTest.scala │ │ ├── Neo4jImplicitsTest.scala │ │ └── Neo4jOptionsTest.scala │ │ ├── SparkConnectorScalaBaseTSE.scala │ │ ├── SparkConnectorScalaBaseWithApocTSE.scala │ │ ├── SparkConnectorScalaSuiteIT.scala │ │ └── SparkConnectorScalaSuiteWithApocIT.scala └── pom.xml ├── spark-2.4 ├── src │ ├── main │ │ ├── resources │ │ │ ├── neo4j-spark-connector.properties │ │ │ └── META-INF.services │ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ │ └── org │ │ │ └── neo4j │ │ │ └── spark │ │ │ ├── writer │ │ │ ├── Neo4jDataWriter.scala │ │ │ ├── Neo4jDataWriterFactory.scala │ │ │ └── Neo4jDataSourceWriter.scala │ │ │ ├── DataSource.scala │ │ │ └── reader │ │ │ ├── Neo4jInputPartitionReader.scala │ │ │ └── Neo4jDataSourceReader.scala │ └── test │ │ ├── java │ │ └── org │ │ │ └── neo4j │ │ │ └── spark │ │ │ └── SparkConnectorSuiteIT.java │ │ └── scala │ │ └── org │ │ └── neo4j │ │ └── spark │ │ ├── DataSourceReaderNeo4j41xWithApocTSE.scala │ │ ├── DataSourceReaderNeo4j35xTSE.scala │ │ ├── DataSourceReaderNeo4j4xWithApocTSE.scala │ │ ├── SparkConnectorScalaBaseTSE.scala │ │ ├── SparkConnectorScalaBaseWithApocTSE.scala │ │ ├── SparkConnectorScalaSuiteWithApocIT.scala │ │ ├── SparkConnectorScalaSuiteIT.scala │ │ └── DataSourceReaderNeo4j41xTSE.scala └── pom.xml ├── spark-3.0 ├── src │ ├── main │ │ ├── resources │ │ │ ├── neo4j-spark-connector.properties │ │ │ └── META-INF.services │ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister │ │ └── scala │ │ │ └── org │ │ │ └── neo4j │ │ │ └── spark │ │ │ ├── writer │ │ │ ├── Neo4jDataWriter.scala │ │ │ ├── Neo4jDataWriterFactory.scala │ │ │ ├── Neo4jBatchWriter.scala │ │ │ └── Neo4jWriterBuilder.scala │ │ │ ├── reader │ │ │ ├── Neo4jPartitionReader.scala │ │ │ ├── SimplePartitionReaderFactory.scala │ │ │ ├── SimpleScanBuilder.scala │ │ │ └── SimpleScan.scala │ │ │ ├── Neo4jTable.scala │ │ │ └── DataSource.scala │ └── test │ │ ├── resources │ │ └── neo4j-spark-connector.properties │ │ ├── java │ │ └── org │ │ │ └── neo4j │ │ │ └── spark │ │ │ └── SparkConnectorSuiteIT.java │ │ └── scala │ │ └── org │ │ └── neo4j │ │ └── spark │ │ ├── DataSourceReaderNeo4j41xWithApocTSE.scala │ │ ├── DataSourceReaderNeo4j35xTSE.scala │ │ ├── DataSourceReaderNeo4j4xWithApocTSE.scala │ │ ├── SparkConnectorScalaBaseTSE.scala │ │ ├── SparkConnectorScalaBaseWithApocTSE.scala │ │ ├── SparkConnectorScalaSuiteWithApocIT.scala │ │ ├── SparkConnectorScalaSuiteIT.scala │ │ └── DataSourceReaderNeo4j41xTSE.scala └── pom.xml ├── .mvn └── wrapper │ ├── maven-wrapper.jar │ ├── maven-wrapper.properties │ └── MavenWrapperDownloader.java ├── test-support ├── src │ └── main │ │ ├── resources │ │ └── neo4j-spark-connector.properties │ │ ├── scala │ │ └── org │ │ │ └── neo4j │ │ │ ├── spark │ │ │ └── TestUtil.scala │ │ │ └── Neo4jContainerExtension.scala │ │ └── java │ │ └── org │ │ └── neo4j │ │ └── spark │ │ └── Assert.java └── pom.xml ├── doc ├── docbook │ ├── catalog │ │ ├── CatalogManager.properties │ │ └── catalog.xml │ └── content-map.xml ├── devenv.local.template ├── server.js ├── docs │ ├── antora.yml │ └── modules │ │ └── ROOT │ │ ├── nav.adoc │ │ └── pages │ │ ├── aura.adoc │ │ ├── index.adoc │ │ ├── neo4j-cluster.adoc │ │ ├── graphkeys.adoc │ │ ├── python.adoc │ │ ├── types.adoc │ │ ├── overview.adoc │ │ ├── quick-java-example.adoc │ │ ├── gds.adoc │ │ ├── faq.adoc │ │ └── configuration.adoc ├── devenv ├── asciidoc │ ├── cloud │ │ └── index.adoc │ ├── overview │ │ └── index.adoc │ ├── faq │ │ └── index.adoc │ ├── quickstart │ │ └── index.adoc │ ├── index.adoc │ └── neo4j-cluster │ │ └── index.adoc ├── javascript │ ├── datatable.js │ ├── version.js │ ├── colorize.js │ ├── versionswitcher.js │ ├── mp-nav.js │ └── tabs-for-chunked.js ├── docs.yml ├── settings.gradle ├── README.md ├── package.json ├── gradlew.bat ├── css │ └── extra.css ├── pom.xml └── gradlew ├── .gitignore ├── spark-packages.sh ├── README.md ├── .travis.yml └── mvnw.cmd /common/src/main/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} -------------------------------------------------------------------------------- /spark-2.4/src/main/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} -------------------------------------------------------------------------------- /spark-3.0/src/main/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} -------------------------------------------------------------------------------- /common/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | org.neo4j.spark.DataSource -------------------------------------------------------------------------------- /spark-2.4/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | org.neo4j.spark.DataSource -------------------------------------------------------------------------------- /spark-3.0/src/main/resources/META-INF.services/org.apache.spark.sql.sources.DataSourceRegister: -------------------------------------------------------------------------------- 1 | org.neo4j.spark.DataSource -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alexott/neo4j-spark-connector/4.0/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /common/src/test/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | neo4j.version=${neo4j.version} 2 | neo4j.experimental=${neo4j.experimental} -------------------------------------------------------------------------------- /spark-3.0/src/test/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | neo4j.version=${neo4j.version} 2 | neo4j.experimental=${neo4j.experimental} -------------------------------------------------------------------------------- /test-support/src/main/resources/neo4j-spark-connector.properties: -------------------------------------------------------------------------------- 1 | neo4j.version=${neo4j.version} 2 | neo4j.experimental=${neo4j.experimental} -------------------------------------------------------------------------------- /doc/docbook/catalog/CatalogManager.properties: -------------------------------------------------------------------------------- 1 | catalogs=catalog.xml 2 | relative-catalogs=false 3 | static-catalog=yes 4 | catalog-class-name=org.apache.xml.resolver.Resolver 5 | verbosity=1 6 | -------------------------------------------------------------------------------- /doc/devenv.local.template: -------------------------------------------------------------------------------- 1 | # URI is necessary, empty user name and password are fine if accessing public artifacts. 2 | ARTIFACTORY_URI= 3 | ARTIFACTORY_USERNAME= 4 | ARTIFACTORY_PASSWORD= 5 | 6 | # vim: set sw=2 ts=2 ft=sh: 7 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 3 | -------------------------------------------------------------------------------- /doc/server.js: -------------------------------------------------------------------------------- 1 | const express = require('express') 2 | 3 | const app = express() 4 | app.use(express.static('./build/site')) 5 | 6 | app.get('/', (req, res) => res.redirect('/spark')) 7 | 8 | app.listen(8000, () => console.log('📘 http://localhost:8000')) 9 | -------------------------------------------------------------------------------- /doc/docs/antora.yml: -------------------------------------------------------------------------------- 1 | name: spark 2 | version: master 3 | title: Neo4j Spark Connector 4 | start_page: ROOT:index.adoc 5 | nav: 6 | - modules/ROOT/nav.adoc 7 | 8 | asciidoc: 9 | attributes: 10 | theme: docs 11 | connector-version: 4.0.0 12 | copyright: Neo4j Inc. 13 | -------------------------------------------------------------------------------- /common/src/test/scala/org/neo4j/spark/util/Neo4jUtilTest.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.util 2 | 3 | import org.junit.Test 4 | 5 | class Neo4jUtilTest { 6 | 7 | @Test 8 | def testSafetyCloseShouldNotFailWithNull(): Unit = { 9 | Neo4jUtil.closeSafety(null) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /doc/devenv: -------------------------------------------------------------------------------- 1 | # source me 2 | if [ -f ./devenv.local ]; then 3 | . ./devenv.local 4 | 5 | export ARTIFACTORY_URI 6 | export ARTIFACTORY_USERNAME 7 | export ARTIFACTORY_PASSWORD 8 | 9 | else 10 | echo "Couldn't find ./devenv.local." 11 | fi 12 | 13 | # vim: set sw=2 ts=2 ft=sh: 14 | -------------------------------------------------------------------------------- /doc/asciidoc/cloud/index.adoc: -------------------------------------------------------------------------------- 1 | [#cloud] 2 | == Using with Managed Cloud Spark 3 | 4 | 5 | ifdef::env-docs[] 6 | [abstract] 7 | -- 8 | This chapter provides an introduction to the Neo4j Connector for Apache Spark with cloud managed sparks 9 | -- 10 | endif::env-docs[] 11 | 12 | (Notes here) 13 | -------------------------------------------------------------------------------- /spark-2.4/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark; 2 | 3 | import org.junit.runner.RunWith; 4 | import org.junit.runners.Suite; 5 | 6 | @RunWith(Suite.class) 7 | @Suite.SuiteClasses({ 8 | DataSourceReaderTypesTSE.class 9 | }) 10 | public class SparkConnectorSuiteIT extends SparkConnectorScalaSuiteIT { 11 | } 12 | -------------------------------------------------------------------------------- /spark-3.0/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark; 2 | 3 | import org.junit.runner.RunWith; 4 | import org.junit.runners.Suite; 5 | 6 | @RunWith(Suite.class) 7 | @Suite.SuiteClasses({ 8 | DataSourceReaderTypesTSE.class 9 | }) 10 | public class SparkConnectorSuiteIT extends SparkConnectorScalaSuiteIT { 11 | } 12 | -------------------------------------------------------------------------------- /doc/docbook/catalog/catalog.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /doc/javascript/datatable.js: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | 6 | */ 7 | $(document).ready(function() { 8 | $('#table-all table').DataTable({"aLengthMenu": [[20,40, 80, -1], [20,40, 80, "All"]],"pageLength": 20}); 9 | } ); 10 | -------------------------------------------------------------------------------- /doc/asciidoc/overview/index.adoc: -------------------------------------------------------------------------------- 1 | 2 | == Project Overview 3 | 4 | ifdef::env-docs[] 5 | [abstract] 6 | -- 7 | This chapter provides an introduction to the Neo4j Connector for Apache Spark. 8 | -- 9 | endif::env-docs[] 10 | 11 | Overview here 12 | 13 | Neo4j Streams can run in two modes: 14 | 15 | * as a Neo4j plugin: 16 | 17 | ** Runmode1 18 | ** Runmode2 19 | 20 | [NOTE] 21 | **Be aware of stuff here.** 22 | -------------------------------------------------------------------------------- /doc/docs.yml: -------------------------------------------------------------------------------- 1 | site: 2 | title: Neo4j Connector for Apache Spark User Guide 3 | url: /neo4j-spark-docs 4 | content: 5 | sources: 6 | - url: ../ 7 | branches: HEAD 8 | start_path: doc/docs 9 | ui: 10 | bundle: 11 | url: https://s3-eu-west-1.amazonaws.com/static-content.neo4j.com/build/ui-bundle.zip 12 | snapshot: true 13 | asciidoc: 14 | attributes: 15 | page-theme: docs 16 | page-cdn: /_/ -------------------------------------------------------------------------------- /doc/settings.gradle: -------------------------------------------------------------------------------- 1 | pluginManagement { 2 | repositories { 3 | // mavenLocal() 4 | maven { 5 | url "https://neo.jfrog.io/neo/docs-maven" // System.env.ARTIFACTORY_URI 6 | /* 7 | credentials { 8 | username System.env.ARTIFACTORY_USERNAME 9 | password System.env.ARTIFACTORY_PASSWORD 10 | } 11 | */ 12 | } 13 | gradlePluginPortal() 14 | 15 | } 16 | } -------------------------------------------------------------------------------- /doc/asciidoc/faq/index.adoc: -------------------------------------------------------------------------------- 1 | [#faq] 2 | == Neo4j Connector for Apache Spark FAQ 3 | 4 | ifdef::env-docs[] 5 | [abstract] 6 | -- 7 | This chapter answers frequently asked questions 8 | -- 9 | endif::env-docs[] 10 | 11 | === Source Code License 12 | 13 | The source code to the Neo4j Connector for Apache Spark is available under the terms of the Apache License, version 2.0. See the LICENSE.txt file in 14 | the source code repository for full terms and conditions. 15 | 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | neo4j-home 2 | .gradle 3 | gradle/ 4 | build/ 5 | *~ 6 | \#* 7 | target 8 | out 9 | .project 10 | .classpath 11 | .settings 12 | .externalToolBuilders/ 13 | .scala_dependencies 14 | .factorypath 15 | .cache 16 | .cache-main 17 | .cache-tests 18 | *.iws 19 | *.ipr 20 | *.iml 21 | .idea 22 | .DS_Store 23 | .shell_history 24 | .mailmap 25 | .java-version 26 | .cache-main 27 | .cache-tests 28 | Thumbs.db 29 | .cache-main 30 | .cache-tests 31 | docs/guides 32 | doc/node 33 | doc/node_modules 34 | doc/package-lock.json 35 | -------------------------------------------------------------------------------- /doc/README.md: -------------------------------------------------------------------------------- 1 | # Local Development 2 | 3 | In order to locally preview the Neo4j Connector for Apache Spark documentation built with Antora do the following steps: 4 | 5 | - open a terminal window and be sure to be at the root of the project 6 | - run the following command: `cd doc` 7 | - run the following command: `npm install && npm start` 8 | - browse to [localhost:8000](http://localhost:8000) 9 | 10 | Now everytime you change one of your `.adoc` files antora will rebuild everything, 11 | and you just need to refresh your page on [localhost:8000](http://localhost:8000) 12 | -------------------------------------------------------------------------------- /test-support/src/main/scala/org/neo4j/spark/TestUtil.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.Properties 4 | 5 | object TestUtil { 6 | 7 | private val properties = new Properties() 8 | properties.load(Thread.currentThread().getContextClassLoader().getResourceAsStream("neo4j-spark-connector.properties")) 9 | 10 | def isTravis(): Boolean = Option(System.getenv("TRAVIS")).getOrElse("false").toBoolean 11 | 12 | def neo4jVersion(): String = properties.getProperty("neo4j.version") 13 | 14 | def experimental(): Boolean = properties.getProperty("neo4j.experimental", "false").toBoolean 15 | 16 | } 17 | -------------------------------------------------------------------------------- /spark-packages.sh: -------------------------------------------------------------------------------- 1 | if [[ $# -lt 2 ]] ; then 2 | echo "Usage ./spark-packages.sh " 3 | exit 1 4 | fi 5 | 6 | ARTIFACT=neo4j-connector-apache-spark_$2 7 | VERSION=$1 8 | ./mvnw clean install -Pscala-$2 -DskipTests 9 | cat << EOF > target/$ARTIFACT-$VERSION.pom 10 | 11 | 4.0.0 12 | neo4j-contrib 13 | $ARTIFACT 14 | $VERSION 15 | 16 | EOF 17 | cp pom.xml target/$ARTIFACT-$VERSION.pom 18 | cp target/$ARTIFACT-$VERSION.jar target/$ARTIFACT-$VERSION.jar 19 | zip -jv target/$ARTIFACT-$VERSION.zip target/$ARTIFACT-$VERSION.pom target/$ARTIFACT-$VERSION.jar 20 | xdg-open target -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/nav.adoc: -------------------------------------------------------------------------------- 1 | * xref::overview.adoc[Project overview] 2 | * xref::quickstart.adoc[Quick Start] 3 | * xref::configuration.adoc[Neo4j Driver Configuration] 4 | * xref::reading.adoc[Reading from Neo4j] 5 | * xref::writing.adoc[Writing to Neo4j] 6 | * xref::python.adoc[Using with Pyspark / Python] 7 | * xref::gds.adoc[Using with Graph Data Science] 8 | * xref::neo4j-cluster.adoc[Using with Neo4j Causal Cluster] 9 | * xref::aura.adoc[Using with Neo4j Aura] 10 | * xref::quick-java-example.adoc[Quick Java Example] 11 | * xref::architecture.adoc[Architecture Guidance for Implementing] 12 | * xref::types.adoc[Neo4j-Spark Data Types Reference] 13 | * xref::faq.adoc[Neo4j Connector for Apache Spark FAQ] 14 | -------------------------------------------------------------------------------- /spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriter.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.sources.v2.writer.DataWriter 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.spark.util.Neo4jOptions 8 | 9 | class Neo4jDataWriter(jobId: String, 10 | partitionId: Int, 11 | structType: StructType, 12 | saveMode: SaveMode, 13 | options: Neo4jOptions, 14 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) 15 | extends BaseDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult) 16 | with DataWriter[InternalRow] -------------------------------------------------------------------------------- /doc/asciidoc/quickstart/index.adoc: -------------------------------------------------------------------------------- 1 | 2 | == Quick Start 3 | 4 | ifdef::env-docs[] 5 | [abstract] 6 | -- 7 | Get started fast for common scenarios, using Neo4j Streams plugin or Kafka Connect plugin 8 | -- 9 | endif::env-docs[] 10 | 11 | === Neo4j Connector for Apache Spark Plugin 12 | 13 | ==== Install the Plugin 14 | 15 | * Download the latest release jar from https://github.com/neo4j-contrib/neo4j-spark-connector/releases 16 | * (Installation instructions here) 17 | 18 | ==== Spark Settings 19 | 20 | (Setting detail here) 21 | 22 | .Most Common Needed Configuration Settings 23 | |=== 24 | |Setting Name |Description |Default Value 25 | 26 | |SETTING A 27 | |description. 28 | |500 29 | 30 | |SETTING B 31 | |DESCRIPTION 32 | |33554432 33 | 34 | |SETTING C 35 | |DESCRIPTION 36 | |16384 37 | |=== 38 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriter.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.connector.write.DataWriter 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.spark.util.Neo4jOptions 8 | 9 | class Neo4jDataWriter(jobId: String, 10 | partitionId: Int, 11 | structType: StructType, 12 | saveMode: SaveMode, 13 | options: Neo4jOptions, 14 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) 15 | extends BaseDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult) 16 | with DataWriter[InternalRow] 17 | 18 | -------------------------------------------------------------------------------- /doc/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@neo4j/docs", 3 | "version": "1.0.0", 4 | "description": "Neo4j Docs builder", 5 | "main": "index.js", 6 | "watch": { 7 | "build:docs": { 8 | "patterns": [ 9 | "docs" 10 | ], 11 | "extensions": "adoc" 12 | } 13 | }, 14 | "scripts": { 15 | "server": "forever start server.js", 16 | "start": "npm run server && npm-watch", 17 | "stop": "forever stop server.js", 18 | "build:docs": "antora --fetch --stacktrace docs.yml" 19 | }, 20 | "license": "ISC", 21 | "dependencies": { 22 | "@antora/cli": "^2.3.3", 23 | "@antora/site-generator-default": "^2.3.3", 24 | "cheerio": "^1.0.0-rc.3" 25 | }, 26 | "devDependencies": { 27 | "express": "^4.17.1", 28 | "npm-watch": "^0.7.0", 29 | "forever": "^3.0.2" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /doc/javascript/version.js: -------------------------------------------------------------------------------- 1 | window.docMeta = (function () { 2 | var version = '4.0'; 3 | var name = 'Streams'; 4 | var href = window.location.href; 5 | var len = href.indexOf('/' + version) != -1 ? href.indexOf('/' + version) : href.length -1; 6 | return { 7 | name: name, 8 | version: version, 9 | availableDocVersions: ["3.4", "3.5", "4.0"], 10 | thisPubBaseUri: href.substring(0,len) + '/' + version, 11 | unversionedDocBaseUri: href.substring(0, len) + '/', 12 | commonDocsBaseUri: href.substring(0, href.indexOf(name) - 1) 13 | } 14 | })(); 15 | 16 | (function () { 17 | var baseUri = window.docMeta.unversionedDocBaseUri; // + window.location.pathname.split(window.docMeta.name + '/')[1].split('/')[0] + '/'; 18 | var docPath = window.location.href.replace(baseUri, ''); 19 | window.neo4jPageId = docPath; 20 | })(); 21 | // vim: set sw=2 ts=2: 22 | -------------------------------------------------------------------------------- /spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriterFactory.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.sources.v2.writer.{DataWriter, DataWriterFactory} 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.spark.util.Neo4jOptions 8 | 9 | class Neo4jDataWriterFactory(jobId: String, 10 | structType: StructType, 11 | saveMode: SaveMode, 12 | options: Neo4jOptions, 13 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) extends DataWriterFactory[InternalRow] { 14 | override def createDataWriter(partitionId: Int, taskId: Long, epochId: Long): DataWriter[InternalRow] = new Neo4jDataWriter(jobId, partitionId, structType, saveMode, options, scriptResult) 15 | } -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/pages/aura.adoc: -------------------------------------------------------------------------------- 1 | 2 | [#aura] 3 | = Using with Neo4j Aura 4 | 5 | [abstract] 6 | -- 7 | This chapter describes considerations around using Neo4j Connector for Apache Spark with link:https://neo4j.com/cloud/aura/[Neo4j Aura]. 8 | -- 9 | 10 | == Overview 11 | 12 | link:https://neo4j.com/cloud/aura/[Neo4j Aura] is a fully managed database as a service providing Neo4j. 13 | 14 | == Remote Clients 15 | 16 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use 17 | streams functionality. Best practices in these cases are: 18 | 19 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application. The optimal 20 | driver URI will be provided by Aura itself when you create a database 21 | * In Aura Enterprise consider creating a separate username/password for Spark access; avoid running all processes through the default 22 | `neo4j` account. 23 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriterFactory.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, PhysicalWriteInfo} 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.spark.util.Neo4jOptions 8 | 9 | class Neo4jDataWriterFactory(jobId: String, 10 | structType: StructType, 11 | saveMode: SaveMode, 12 | options: Neo4jOptions, 13 | scriptResult: java.util.List[java.util.Map[String, AnyRef]]) extends DataWriterFactory { 14 | override def createWriter(partitionId: Int, taskId:Long): DataWriter[InternalRow] = new Neo4jDataWriter( 15 | jobId, 16 | partitionId, 17 | structType, 18 | saveMode, 19 | options, 20 | scriptResult 21 | ) 22 | } 23 | -------------------------------------------------------------------------------- /doc/asciidoc/index.adoc: -------------------------------------------------------------------------------- 1 | 2 | = Neo4j Connector for Apache Spark v{docs-version} 3 | :toc: 4 | :toclevels: 5 5 | :sectids: 6 | :sectlinks: 7 | :sectnums: 8 | :sectnumlevels: 5 9 | :env-docs: true 10 | 11 | ifdef::backend-html5[(C) {copyright}] 12 | 13 | License: link:{common-license-page-uri}[Creative Commons 4.0] 14 | 15 | [abstract] 16 | -- 17 | This is the user guide for Neo4j Connector for Apache Spark version {docs-version}, authored by the Neo4j Labs Team. 18 | -- 19 | 20 | The guide covers the following areas: 21 | 22 | * <<_project_overview>> -- Project overview 23 | * <<_quick_start>> -- Get Started Fast with the most Common Scenarios 24 | * <> -- Using with Neo4j Causal Cluster 25 | * <> -- Configuring a connection to a Confluent Cloud 26 | * <> -- Neo4j Streams FAQ 27 | 28 | 29 | include::overview/index.adoc[] 30 | 31 | include::quickstart/index.adoc[] 32 | 33 | include::neo4j-cluster/index.adoc[] 34 | 35 | include::cloud/index.adoc[] 36 | 37 | include::faq/index.adoc[] -------------------------------------------------------------------------------- /common/src/main/scala/org/neo4j/spark/util/ValidationUtil.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.util 2 | 3 | object ValidationUtil { 4 | 5 | def isNotEmpty(str: String, message: String) = if (str.isEmpty) { 6 | throw new IllegalArgumentException(message) 7 | } 8 | 9 | def isNotBlank(str: String, message: String) = if (str.trim.isEmpty) { 10 | throw new IllegalArgumentException(message) 11 | } 12 | 13 | def isNotEmpty(seq: Seq[_], message: String) = if (seq.isEmpty) { 14 | throw new IllegalArgumentException(message) 15 | } 16 | 17 | def isNotEmpty(map: Map[_, _], message: String) = if (map.isEmpty) { 18 | throw new IllegalArgumentException(message) 19 | } 20 | 21 | def isTrue(boolean: Boolean, message: String) = if (!boolean) { 22 | throw new IllegalArgumentException(message) 23 | } 24 | 25 | def isFalse(boolean: Boolean, message: String) = if (boolean) { 26 | throw new IllegalArgumentException(message) 27 | } 28 | 29 | def isNotValid(message: String) = throw new IllegalArgumentException(message) 30 | } 31 | -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/pages/index.adoc: -------------------------------------------------------------------------------- 1 | 2 | = Neo4j Connector for Apache Spark v{connector-version} 3 | 4 | ifdef::backend-html5[(C) {copyright}] 5 | 6 | License: link:{attachmentsdir}/LICENSE.txt[Creative Commons 4.0] 7 | 8 | [abstract] 9 | -- 10 | This is the user guide for Neo4j Connector for Apache Spark version {connector-version}, authored by the Neo4j Labs Team. 11 | -- 12 | 13 | The guide covers the following areas: 14 | 15 | * xref::overview.adoc[Project overview] 16 | * xref::quickstart.adoc[Quick Start] 17 | * xref::configuration.adoc[Neo4j Driver Configuration] 18 | * xref::reading.adoc[Reading from Neo4j] 19 | * xref::writing.adoc[Writing to Neo4j] 20 | * xref::python.adoc[Using with Pyspark / Python] 21 | * xref::gds.adoc[Using with Graph Data Science] 22 | * xref::neo4j-cluster.adoc[Using with Neo4j Causal Cluster] 23 | * xref::aura.adoc[Using with Neo4j Aura] 24 | * xref::architecture.adoc[Architecture Guidance for Implementing] 25 | * xref::types.adoc[Neo4j-Spark Data Types Reference] 26 | * xref::faq.adoc[Neo4j Connector for Apache Spark FAQ] 27 | -------------------------------------------------------------------------------- /spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.junit.Assert.assertEquals 4 | import org.junit.{Assume, BeforeClass, Test} 5 | 6 | object DataSourceReaderNeo4j41xWithApocTSE { 7 | @BeforeClass 8 | def checkNeo4jVersion() { 9 | val neo4jVersion = TestUtil.neo4jVersion() 10 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0")) 11 | } 12 | } 13 | 14 | class DataSourceReaderNeo4j41xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE { 15 | 16 | @Test 17 | def testReturnProcedure(): Unit = { 18 | val query = 19 | """RETURN apoc.convert.toSet([1,1,3]) AS foo, 'bar' AS bar 20 | |""".stripMargin 21 | 22 | val df = ss.read.format(classOf[DataSource].getName) 23 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 24 | .option("partitions", 1) 25 | .option("query", query) 26 | .load 27 | 28 | assertEquals(Seq("foo", "bar"), df.columns.toSeq) // ordering should be preserved 29 | assertEquals(1, df.count()) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j41xWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.junit.Assert.assertEquals 4 | import org.junit.{Assume, BeforeClass, Test} 5 | 6 | object DataSourceReaderNeo4j41xWithApocTSE { 7 | @BeforeClass 8 | def checkNeo4jVersion() { 9 | val neo4jVersion = TestUtil.neo4jVersion() 10 | Assume.assumeTrue(!neo4jVersion.startsWith("3.5") && !neo4jVersion.startsWith("4.0")) 11 | } 12 | } 13 | 14 | class DataSourceReaderNeo4j41xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE { 15 | 16 | @Test 17 | def testReturnProcedure(): Unit = { 18 | val query = 19 | """RETURN apoc.convert.toSet([1,1,3]) AS foo, 'bar' AS bar 20 | |""".stripMargin 21 | 22 | val df = ss.read.format(classOf[DataSource].getName) 23 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 24 | .option("partitions", 1) 25 | .option("query", query) 26 | .load 27 | 28 | assertEquals(Seq("foo", "bar"), df.columns.toSeq) // ordering should be preserved 29 | assertEquals(1, df.count()) 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/reader/Neo4jPartitionReader.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.reader 2 | 3 | import org.apache.spark.internal.Logging 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.connector.read.PartitionReader 6 | import org.apache.spark.sql.sources.Filter 7 | import org.apache.spark.sql.types.StructType 8 | import org.neo4j.spark.service.PartitionSkipLimit 9 | import org.neo4j.spark.util.Neo4jOptions 10 | 11 | class Neo4jPartitionReader(private val options: Neo4jOptions, 12 | private val filters: Array[Filter], 13 | private val schema: StructType, 14 | private val jobId: String, 15 | private val partitionSkipLimit: PartitionSkipLimit, 16 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]], 17 | private val requiredColumns: StructType) 18 | extends BasePartitionReader(options, filters, schema, jobId, partitionSkipLimit, scriptResult, requiredColumns) 19 | with PartitionReader[InternalRow] -------------------------------------------------------------------------------- /common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | neo4j-connector-apache-spark_${scala.binary.version}_common 8 | neo4j-connector-apache-spark-common 9 | Common Services for Neo4j Connector for Apache Spark using the binary Bolt Driver 10 | 11 | 4.0.0 12 | jar 13 | 14 | 15 | neo4j-contrib 16 | neo4j-connector-apache-spark 17 | 4.0.0 18 | 19 | 20 | 21 | 22 | neo4j-contrib 23 | neo4j-connector-apache-spark_${scala.binary.version}_test-support 24 | 4.0.0 25 | test 26 | 27 | 28 | -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/pages/neo4j-cluster.adoc: -------------------------------------------------------------------------------- 1 | 2 | [#neo4j_causal_cluster] 3 | = Using with Neo4j Causal Cluster 4 | 5 | [abstract] 6 | -- 7 | This chapter describes considerations around using Neo4j Connector for Apache Spark with Neo4j Enterprise Causal Cluster. 8 | -- 9 | 10 | == Overview 11 | 12 | link:https://neo4j.com/docs/operations-manual/current/clustering/[Neo4j Clustering] is a feature available in 13 | Enterprise Edition which allows high availability of the database through having multiple database members. 14 | 15 | Neo4j Enterprise uses a link:https://neo4j.com/docs/operations-manual/current/clustering/introduction/#causal-clustering-introduction-operational[LEADER/FOLLOWER] 16 | operational view, where writes are always processed by the leader, while reads can be serviced by either followers, 17 | or optionally be read replicas, which maintain a copy of the database and serve to scale out read operations 18 | horizontally. 19 | 20 | == Remote Clients 21 | 22 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use 23 | streams functionality. Best practices in these cases are: 24 | 25 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application. -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/reader/SimplePartitionReaderFactory.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.reader 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory} 5 | import org.apache.spark.sql.sources.Filter 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.spark.service.PartitionSkipLimit 8 | import org.neo4j.spark.util.Neo4jOptions 9 | 10 | class SimplePartitionReaderFactory(private val neo4jOptions: Neo4jOptions, 11 | private val filters: Array[Filter], 12 | private val schema: StructType, 13 | private val jobId: String, 14 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]], 15 | private val requiredColumns: StructType) extends PartitionReaderFactory { 16 | override def createReader(partition: InputPartition): PartitionReader[InternalRow] = new Neo4jPartitionReader( 17 | neo4jOptions, 18 | filters, 19 | schema, 20 | jobId, 21 | partition.asInstanceOf[Neo4jPartition].partitionSkipLimit, 22 | scriptResult, 23 | requiredColumns 24 | ) 25 | } 26 | -------------------------------------------------------------------------------- /spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j35xTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.apache.spark.SparkException 4 | import org.junit.Assert.{assertTrue, fail} 5 | import org.junit.{Assume, BeforeClass, Test} 6 | import org.neo4j.driver.exceptions.ClientException 7 | 8 | object DataSourceReaderNeo4j35xTSE { 9 | @BeforeClass 10 | def checkNeo4jVersion() { 11 | Assume.assumeTrue(TestUtil.neo4jVersion().startsWith("3.5")) 12 | } 13 | } 14 | 15 | class DataSourceReaderNeo4j35xTSE extends SparkConnectorScalaBaseTSE { 16 | @Test 17 | def testShouldThrowClearErrorIfADbIsSpecified(): Unit = { 18 | try { 19 | ss.read.format(classOf[DataSource].getName) 20 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl) 21 | .option("database", "db1") 22 | .option("labels", "MATCH (h:Household) RETURN id(h)") 23 | .load() 24 | .show() 25 | } 26 | catch { 27 | case clientException: ClientException => { 28 | assertTrue(clientException.getMessage.equals( 29 | "Database name parameter for selecting database is not supported in Bolt Protocol Version 3.0. Database name: 'db1'" 30 | )) 31 | } 32 | case generic => fail(s"should be thrown a ${classOf[SparkException].getName}, got ${generic.getClass} instead") 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j35xTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.apache.spark.SparkException 4 | import org.junit.Assert.{assertTrue, fail} 5 | import org.junit.{Assume, BeforeClass, Test} 6 | import org.neo4j.driver.exceptions.ClientException 7 | 8 | object DataSourceReaderNeo4j35xTSE { 9 | @BeforeClass 10 | def checkNeo4jVersion() { 11 | Assume.assumeTrue(TestUtil.neo4jVersion().startsWith("3.5")) 12 | } 13 | } 14 | 15 | class DataSourceReaderNeo4j35xTSE extends SparkConnectorScalaBaseTSE { 16 | @Test 17 | def testShouldThrowClearErrorIfADbIsSpecified(): Unit = { 18 | try { 19 | ss.read.format(classOf[DataSource].getName) 20 | .option("url", SparkConnectorScalaSuiteIT.server.getBoltUrl) 21 | .option("database", "db1") 22 | .option("labels", "MATCH (h:Household) RETURN id(h)") 23 | .load() 24 | .show() 25 | } 26 | catch { 27 | case clientException: ClientException => { 28 | assertTrue(clientException.getMessage.equals( 29 | "Database name parameter for selecting database is not supported in Bolt Protocol Version 3.0. Database name: 'db1'" 30 | )) 31 | } 32 | case generic => fail(s"should be thrown a ${classOf[SparkException].getName}, got ${generic.getClass} instead") 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /common/src/main/scala/org/neo4j/spark/util/DriverCache.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.util 2 | 3 | import org.neo4j.driver.{Driver, GraphDatabase} 4 | import org.neo4j.spark.util.DriverCache.{cache, jobIdCache} 5 | 6 | import java.util.concurrent.ConcurrentHashMap 7 | import java.util.{Collections, function} 8 | 9 | object DriverCache { 10 | private val cache: ConcurrentHashMap[Neo4jDriverOptions, Driver] = new ConcurrentHashMap[Neo4jDriverOptions, Driver] 11 | private val jobIdCache = Collections.newSetFromMap[String](new ConcurrentHashMap[String, java.lang.Boolean]()) 12 | } 13 | 14 | class DriverCache(private val options: Neo4jDriverOptions, private val jobId: String) extends Serializable with AutoCloseable { 15 | def getOrCreate(): Driver = { 16 | this.synchronized { 17 | jobIdCache.add(jobId) 18 | cache.computeIfAbsent(options, new function.Function[Neo4jDriverOptions, Driver] { 19 | override def apply(t: Neo4jDriverOptions): Driver = GraphDatabase.driver(t.url, t.toNeo4jAuth, t.toDriverConfig) 20 | }) 21 | } 22 | } 23 | 24 | def close(): Unit = { 25 | this.synchronized { 26 | jobIdCache.remove(jobId) 27 | if(jobIdCache.isEmpty) { 28 | val driver = cache.remove(options) 29 | if (driver != null) { 30 | Neo4jUtil.closeSafety(driver) 31 | } 32 | } 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jBatchWriter.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage} 5 | import org.apache.spark.sql.types.StructType 6 | import org.neo4j.spark.service.SchemaService 7 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions} 8 | 9 | class Neo4jBatchWriter(jobId: String, 10 | structType: StructType, 11 | saveMode: SaveMode, 12 | neo4jOptions: Neo4jOptions) extends BatchWrite{ 13 | override def createBatchWriterFactory(physicalWriteInfo: PhysicalWriteInfo): DataWriterFactory = { 14 | val schemaService = new SchemaService(neo4jOptions, driverCache) 15 | schemaService.createOptimizations() 16 | val scriptResult = schemaService.execute(neo4jOptions.script) 17 | schemaService.close() 18 | 19 | new Neo4jDataWriterFactory( 20 | jobId, 21 | structType, 22 | saveMode, 23 | neo4jOptions, 24 | scriptResult 25 | ) 26 | } 27 | 28 | private val driverCache = new DriverCache(neo4jOptions.connection, jobId) 29 | 30 | override def commit(messages: Array[WriterCommitMessage]): Unit = { 31 | driverCache.close() 32 | } 33 | 34 | override def abort(messages: Array[WriterCommitMessage]): Unit = { 35 | driverCache.close() 36 | } 37 | } -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/reader/SimpleScanBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.reader 2 | 3 | import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns} 4 | import org.apache.spark.sql.sources.Filter 5 | import org.apache.spark.sql.types.StructType 6 | import org.neo4j.spark.util.{Neo4jOptions} 7 | 8 | class SimpleScanBuilder(neo4jOptions: Neo4jOptions, jobId: String, schema: StructType) extends ScanBuilder 9 | with SupportsPushDownFilters 10 | with SupportsPushDownRequiredColumns { 11 | 12 | private var filters: Array[Filter] = Array[Filter]() 13 | 14 | private var requiredColumns: StructType = new StructType() 15 | 16 | override def build(): Scan = { 17 | new SimpleScan(neo4jOptions, jobId, schema, filters, requiredColumns) 18 | } 19 | 20 | override def pushFilters(filtersArray: Array[Filter]): Array[Filter] = { 21 | if (neo4jOptions.pushdownFiltersEnabled) { 22 | filters = filtersArray 23 | } 24 | 25 | filtersArray 26 | } 27 | 28 | override def pushedFilters(): Array[Filter] = filters 29 | 30 | override def pruneColumns(requiredSchema: StructType): Unit = { 31 | requiredColumns = if ( 32 | !neo4jOptions.pushdownColumnsEnabled || neo4jOptions.relationshipMetadata.nodeMap 33 | || requiredSchema == schema 34 | ) { 35 | new StructType() 36 | } else { 37 | requiredSchema 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /doc/docbook/content-map.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /spark-2.4/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | neo4j-connector-apache-spark_${scala.binary.version}_2.4 8 | neo4j-connector-apache-spark-2.4 9 | Spark 2.4 for Neo4j Connector for Apache Spark using the binary Bolt Driver 10 | 11 | 4.0.0 12 | jar 13 | 14 | 15 | neo4j-contrib 16 | neo4j-connector-apache-spark 17 | 4.0.0 18 | 19 | 20 | 21 | 2.4.5 22 | 23 | 24 | 25 | 26 | neo4j-contrib 27 | neo4j-connector-apache-spark_${scala.binary.version}_common 28 | 4.0.0 29 | 30 | 31 | neo4j-contrib 32 | neo4j-connector-apache-spark_${scala.binary.version}_test-support 33 | 4.0.0 34 | test 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /spark-3.0/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | neo4j-connector-apache-spark_${scala.binary.version}_3.0 8 | neo4j-connector-apache-spark-3.0 9 | Spark 3.0 for Neo4j Connector for Apache Spark using the binary Bolt Driver 10 | 11 | 4.0.0 12 | jar 13 | 14 | 15 | neo4j-contrib 16 | neo4j-connector-apache-spark 17 | 4.0.0 18 | 19 | 20 | 21 | 3.0.1 22 | 23 | 24 | 25 | 26 | neo4j-contrib 27 | neo4j-connector-apache-spark_${scala.binary.version}_common 28 | 4.0.0 29 | 30 | 31 | neo4j-contrib 32 | neo4j-connector-apache-spark_${scala.binary.version}_test-support 33 | 4.0.0 34 | test 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /doc/asciidoc/neo4j-cluster/index.adoc: -------------------------------------------------------------------------------- 1 | 2 | [#neo4j_causal_cluster] 3 | == Using with Neo4j Causal Cluster 4 | 5 | ifdef::env-docs[] 6 | [abstract] 7 | -- 8 | This chapter describes considerations around using Neo4j Connector for Apache Spark with Neo4j Enterprise Causal Cluster. 9 | -- 10 | endif::env-docs[] 11 | 12 | === Overview 13 | 14 | link:https://neo4j.com/docs/operations-manual/current/clustering/[Neo4j Clustering] is a feature available in 15 | Enterprise Edition which allows high availability of the database through having multiple database members. 16 | 17 | Neo4j Enterprise uses a link:https://neo4j.com/docs/operations-manual/current/clustering/introduction/#causal-clustering-introduction-operational[LEADER/FOLLOWER] 18 | operational view, where writes are always processed by the leader, while reads can be serviced by either followers, 19 | or optionally be read replicas, which maintain a copy of the database and serve to scale out read operations 20 | horizontally. 21 | 22 | === Remote Clients 23 | 24 | Sometimes there will be remote applications that talk to Neo4j via official drivers, that want to use 25 | streams functionality. Best practices in these cases are: 26 | 27 | * Always use a `neo4j+s://` driver URI when communicating with the cluster in the client application. 28 | * Use link:https://neo4j.com/docs/driver-manual/current/sessions-transactions/#driver-transactions[Explicit Write Transactions] in 29 | your client application when using procedure calls such as `CALL streams.consume` to ensure that the routing 30 | driver routes the query to the leader. -------------------------------------------------------------------------------- /spark-2.4/src/main/scala/org/neo4j/spark/DataSource.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.{Optional, UUID} 4 | import org.apache.spark.sql.SaveMode 5 | import org.apache.spark.sql.sources.DataSourceRegister 6 | import org.apache.spark.sql.sources.v2.writer.DataSourceWriter 7 | import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, ReadSupport, WriteSupport} 8 | import org.apache.spark.sql.types.StructType 9 | import org.neo4j.spark.reader.Neo4jDataSourceReader 10 | import org.neo4j.spark.util.Neo4jOptions 11 | import org.neo4j.spark.writer.Neo4jDataSourceWriter 12 | 13 | class DataSource extends DataSourceV2 with ReadSupport with DataSourceRegister with WriteSupport { 14 | 15 | private val jobId: String = UUID.randomUUID().toString 16 | 17 | def createReader(options: DataSourceOptions) = new Neo4jDataSourceReader(options, jobId) 18 | 19 | override def shortName: String = "neo4j" 20 | 21 | override def createWriter(jobId: String, 22 | structType: StructType, 23 | saveMode: SaveMode, 24 | options: DataSourceOptions): Optional[DataSourceWriter] = 25 | if (Neo4jOptions.SUPPORTED_SAVE_MODES.contains(saveMode)) { 26 | Optional.of(new Neo4jDataSourceWriter(jobId, structType, saveMode, options)) 27 | } else { 28 | throw new IllegalArgumentException( 29 | s"""Unsupported SaveMode. 30 | |You provided $saveMode, supported are: 31 | |${Neo4jOptions.SUPPORTED_SAVE_MODES.mkString(",")} 32 | |""".stripMargin) 33 | } 34 | } -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/writer/Neo4jWriterBuilder.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.connector.write.{BatchWrite, SupportsOverwrite, SupportsTruncate, WriteBuilder} 5 | import org.apache.spark.sql.sources.Filter 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.driver.AccessMode 8 | import org.neo4j.spark.util.{Neo4jOptions, NodeSaveMode, ValidationUtil, Validations} 9 | 10 | class Neo4jWriterBuilder(jobId: String, 11 | structType: StructType, 12 | saveMode: SaveMode, 13 | neo4jOptions: Neo4jOptions) extends WriteBuilder 14 | with SupportsOverwrite 15 | with SupportsTruncate { 16 | 17 | def validOptions(): Neo4jOptions = { 18 | neo4jOptions.validate(neo4jOptions => 19 | Validations.writer(neo4jOptions, jobId, saveMode, (o: Neo4jOptions) => { 20 | ValidationUtil.isFalse( 21 | o.relationshipMetadata.sourceSaveMode.equals(NodeSaveMode.ErrorIfExists) 22 | && o.relationshipMetadata.targetSaveMode.equals(NodeSaveMode.ErrorIfExists), 23 | "Save mode 'ErrorIfExists' is not supported on Spark 3.0, use 'Append' instead.") 24 | })) 25 | } 26 | 27 | override def buildForBatch(): BatchWrite = new Neo4jBatchWriter(jobId, 28 | structType, 29 | saveMode, 30 | validOptions() 31 | ) 32 | 33 | override def overwrite(filters: Array[Filter]): WriteBuilder = { 34 | new Neo4jWriterBuilder(jobId, structType, SaveMode.Overwrite, neo4jOptions) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /doc/javascript/colorize.js: -------------------------------------------------------------------------------- 1 | // CodeMirror, copyright (c) by Marijn Haverbeke and others 2 | // Distributed under an MIT license: http://codemirror.net/LICENSE 3 | // Modified by the Neo4j team. 4 | 5 | "use strict"; 6 | 7 | CodeMirror.colorize = (function() { 8 | 9 | var isBlock = /^(p|li|div|h\\d|pre|blockquote|td)$/; 10 | 11 | function textContent(node, out) { 12 | if (node.nodeType == 3) return out.push(node.nodeValue); 13 | for (var ch = node.firstChild; ch; ch = ch.nextSibling) { 14 | textContent(ch, out); 15 | if (isBlock.test(node.nodeType)) out.push("\n"); 16 | } 17 | } 18 | 19 | return function() { 20 | var collection = document.body.getElementsByTagName("code"); 21 | 22 | for (var i = 0; i < collection.length; ++i) { 23 | var theme = " cm-s-default"; 24 | var node = collection[i]; 25 | var mode = node.getAttribute("data-lang"); 26 | if (!mode) continue; 27 | if (mode === "cypher") { 28 | theme = " cm-s-neo"; 29 | } else if (mode === "cypher-noexec") { 30 | mode = "cypher"; 31 | theme = " cm-s-neo"; 32 | } else if (mode === "java") { 33 | mode = "text/x-java"; 34 | } else if (mode === "csharp") { 35 | mode = "text/x-csharp"; 36 | } else if (mode === "sql") { 37 | mode = "text/x-sql"; 38 | } else if (mode === "properties") { 39 | mode = "text/x-properties"; 40 | } else if (mode === "json") { 41 | mode = "application/json"; 42 | } 43 | 44 | var text = []; 45 | textContent(node, text); 46 | node.innerHTML = ""; 47 | CodeMirror.runMode(text.join(""), mode, node); 48 | 49 | node.className += theme; 50 | } 51 | }; 52 | })(); 53 | -------------------------------------------------------------------------------- /spark-2.4/src/main/scala/org/neo4j/spark/writer/Neo4jDataSourceWriter.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.writer 2 | 3 | import org.apache.spark.sql.SaveMode 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.sources.v2.DataSourceOptions 6 | import org.apache.spark.sql.sources.v2.writer.{DataSourceWriter, DataWriterFactory, WriterCommitMessage} 7 | import org.apache.spark.sql.types.StructType 8 | import org.neo4j.driver.AccessMode 9 | import org.neo4j.spark.service.SchemaService 10 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, NodeSaveMode, ValidationUtil, Validations} 11 | 12 | class Neo4jDataSourceWriter(jobId: String, 13 | structType: StructType, 14 | saveMode: SaveMode, 15 | options: DataSourceOptions) extends DataSourceWriter { 16 | 17 | private val optionsMap = options.asMap() 18 | optionsMap.put(Neo4jOptions.ACCESS_MODE, AccessMode.WRITE.toString) 19 | 20 | private val neo4jOptions: Neo4jOptions = new Neo4jOptions(optionsMap) 21 | .validate((neo4jOptions: Neo4jOptions) => Validations.writer(neo4jOptions, jobId, saveMode, _ => Unit)) 22 | 23 | private val driverCache = new DriverCache(neo4jOptions.connection, jobId) 24 | 25 | override def createWriterFactory(): DataWriterFactory[InternalRow] = { 26 | val schemaService = new SchemaService(neo4jOptions, driverCache) 27 | schemaService.createOptimizations() 28 | val scriptResult = schemaService.execute(neo4jOptions.script) 29 | schemaService.close() 30 | new Neo4jDataWriterFactory(jobId, structType, saveMode, neo4jOptions, scriptResult) 31 | } 32 | 33 | override def commit(messages: Array[WriterCommitMessage]): Unit = { 34 | driverCache.close() 35 | } 36 | 37 | override def abort(messages: Array[WriterCommitMessage]): Unit = { 38 | driverCache.close() 39 | } 40 | } -------------------------------------------------------------------------------- /test-support/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | neo4j-connector-apache-spark_${scala.binary.version}_test-support 8 | neo4j-connector-apache-spark-test-support 9 | Test Utilities for Neo4j Connector for Apache Spark using the binary Bolt Driver 10 | 11 | 4.0.0 12 | jar 13 | 14 | 15 | neo4j-contrib 16 | neo4j-connector-apache-spark 17 | 4.0.0 18 | 19 | 20 | 21 | 22 | 23 | junit 24 | junit 25 | 4.13.1 26 | 27 | 28 | 29 | org.hamcrest 30 | hamcrest-library 31 | 1.3 32 | 33 | 34 | 35 | org.testcontainers 36 | testcontainers 37 | ${testcontainers.version} 38 | 39 | 40 | 41 | org.testcontainers 42 | neo4j 43 | ${testcontainers.version} 44 | 45 | 46 | 47 | com.fasterxml.jackson.core 48 | jackson-annotations 49 | 2.12.0 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/Neo4jTable.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.apache.spark.internal.Logging 4 | import org.apache.spark.sql.SaveMode 5 | import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability} 6 | import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder} 7 | import org.apache.spark.sql.types.StructType 8 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 9 | import org.neo4j.driver.AccessMode 10 | import org.neo4j.spark.reader.SimpleScanBuilder 11 | import org.neo4j.spark.util.{Neo4jOptions, Validations} 12 | import org.neo4j.spark.writer.Neo4jWriterBuilder 13 | 14 | import scala.collection.JavaConverters._ 15 | 16 | class Neo4jTable(schema: StructType, options: java.util.Map[String, String], jobId: String) extends Table 17 | with SupportsRead 18 | with SupportsWrite 19 | with Logging { 20 | 21 | private val neo4jOptions = new Neo4jOptions(options) 22 | 23 | override def name(): String = neo4jOptions.getTableName 24 | 25 | override def schema(): StructType = schema 26 | 27 | override def capabilities(): java.util.Set[TableCapability] = Set( 28 | TableCapability.BATCH_READ, 29 | TableCapability.BATCH_WRITE, 30 | TableCapability.ACCEPT_ANY_SCHEMA, 31 | TableCapability.OVERWRITE_BY_FILTER, 32 | TableCapability.OVERWRITE_DYNAMIC 33 | ).asJava 34 | 35 | override def newScanBuilder(options: CaseInsensitiveStringMap): SimpleScanBuilder = { 36 | val validOptions = neo4jOptions.validate(neo4jOptions => Validations.read(neo4jOptions, jobId)) 37 | new SimpleScanBuilder(validOptions, jobId, schema()) 38 | } 39 | 40 | override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { 41 | val mapOptions = new java.util.HashMap[String, String](options) 42 | mapOptions.put(Neo4jOptions.ACCESS_MODE, AccessMode.WRITE.toString) 43 | val writeNeo4jOptions = new Neo4jOptions(mapOptions) 44 | new Neo4jWriterBuilder(jobId, info.schema(), SaveMode.Append, writeNeo4jOptions) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /spark-2.4/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j4xWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.junit.Assert.assertEquals 4 | import org.junit.{Assume, BeforeClass, Test} 5 | import org.neo4j.driver.summary.ResultSummary 6 | import org.neo4j.driver.{SessionConfig, Transaction, TransactionWork} 7 | 8 | object DataSourceReaderNeo4j4xWithApocTSE { 9 | @BeforeClass 10 | def checkNeo4jVersion() { 11 | Assume.assumeFalse(TestUtil.neo4jVersion().startsWith("3.5")) 12 | } 13 | } 14 | 15 | class DataSourceReaderNeo4j4xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE { 16 | 17 | @Test 18 | def testMultiDbJoin(): Unit = { 19 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db1")) 20 | .writeTransaction( 21 | new TransactionWork[ResultSummary] { 22 | override def execute(tx: Transaction): ResultSummary = tx.run( 23 | """ 24 | CREATE (p1:Person:Customer {name: 'John Doe'}), 25 | (p2:Person:Customer {name: 'Mark Brown'}), 26 | (p3:Person:Customer {name: 'Cindy White'}) 27 | """).consume() 28 | }) 29 | 30 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db2")) 31 | .writeTransaction( 32 | new TransactionWork[ResultSummary] { 33 | override def execute(tx: Transaction): ResultSummary = tx.run( 34 | """ 35 | CREATE (p1:Person:Employee {name: 'Jane Doe'}), 36 | (p2:Person:Employee {name: 'John Doe'}) 37 | """).consume() 38 | }) 39 | 40 | val df1 = ss.read.format(classOf[DataSource].getName) 41 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 42 | .option("database", "db1") 43 | .option("labels", "Person") 44 | .load() 45 | 46 | val df2 = ss.read.format(classOf[DataSource].getName) 47 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 48 | .option("database", "db2") 49 | .option("labels", "Person") 50 | .load() 51 | 52 | assertEquals(3, df1.count()) 53 | assertEquals(2, df2.count()) 54 | 55 | val dfJoin = df1.join(df2, df1("name") === df2("name")) 56 | assertEquals(1, dfJoin.count()) 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /spark-3.0/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4j4xWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import org.junit.Assert.assertEquals 4 | import org.junit.{Assume, BeforeClass, Test} 5 | import org.neo4j.driver.summary.ResultSummary 6 | import org.neo4j.driver.{SessionConfig, Transaction, TransactionWork} 7 | 8 | object DataSourceReaderNeo4j4xWithApocTSE { 9 | @BeforeClass 10 | def checkNeo4jVersion() { 11 | Assume.assumeFalse(TestUtil.neo4jVersion().startsWith("3.5")) 12 | } 13 | } 14 | 15 | class DataSourceReaderNeo4j4xWithApocTSE extends SparkConnectorScalaBaseWithApocTSE { 16 | 17 | @Test 18 | def testMultiDbJoin(): Unit = { 19 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db1")) 20 | .writeTransaction( 21 | new TransactionWork[ResultSummary] { 22 | override def execute(tx: Transaction): ResultSummary = tx.run( 23 | """ 24 | CREATE (p1:Person:Customer {name: 'John Doe'}), 25 | (p2:Person:Customer {name: 'Mark Brown'}), 26 | (p3:Person:Customer {name: 'Cindy White'}) 27 | """).consume() 28 | }) 29 | 30 | SparkConnectorScalaSuiteWithApocIT.driver.session(SessionConfig.forDatabase("db2")) 31 | .writeTransaction( 32 | new TransactionWork[ResultSummary] { 33 | override def execute(tx: Transaction): ResultSummary = tx.run( 34 | """ 35 | CREATE (p1:Person:Employee {name: 'Jane Doe'}), 36 | (p2:Person:Employee {name: 'John Doe'}) 37 | """).consume() 38 | }) 39 | 40 | val df1 = ss.read.format(classOf[DataSource].getName) 41 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 42 | .option("database", "db1") 43 | .option("labels", "Person") 44 | .load() 45 | 46 | val df2 = ss.read.format(classOf[DataSource].getName) 47 | .option("url", SparkConnectorScalaSuiteWithApocIT.server.getBoltUrl) 48 | .option("database", "db2") 49 | .option("labels", "Person") 50 | .load() 51 | 52 | assertEquals(3, df1.count()) 53 | assertEquals(2, df2.count()) 54 | 55 | val dfJoin = df1.join(df2, df1("name") === df2("name")) 56 | assertEquals(1, dfJoin.count()) 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /common/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.SparkConf 6 | import org.hamcrest.Matchers 7 | import org.junit._ 8 | import org.junit.rules.TestName 9 | import org.neo4j.driver.summary.ResultSummary 10 | import org.neo4j.driver.{Transaction, TransactionWork} 11 | 12 | object SparkConnectorScalaBaseTSE { 13 | 14 | private var startedFromSuite = true 15 | 16 | @BeforeClass 17 | def setUpContainer() = { 18 | if (!SparkConnectorScalaSuiteIT.server.isRunning) { 19 | startedFromSuite = false 20 | SparkConnectorScalaSuiteIT.setUpContainer() 21 | } 22 | } 23 | 24 | @AfterClass 25 | def tearDownContainer() = { 26 | if (!startedFromSuite) { 27 | SparkConnectorScalaSuiteIT.tearDownContainer() 28 | } 29 | } 30 | 31 | } 32 | 33 | class SparkConnectorScalaBaseTSE { 34 | 35 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf 36 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss 37 | 38 | val _testName: TestName = new TestName 39 | 40 | @Rule 41 | def testName = _testName 42 | 43 | @Before 44 | def before() { 45 | SparkConnectorScalaSuiteIT.session() 46 | .writeTransaction(new TransactionWork[ResultSummary] { 47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 48 | }) 49 | } 50 | 51 | @After 52 | def after() { 53 | if (!TestUtil.isTravis()) { 54 | try { 55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 56 | override def get(): Boolean = { 57 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 58 | SparkConnectorScalaSuiteIT.connections == afterConnections 59 | } 60 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS) 61 | } finally { 62 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 63 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes 64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections") 65 | } 66 | } 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/DataSource.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.UUID 4 | import org.apache.spark.sql.connector.catalog.{Table, TableProvider} 5 | import org.apache.spark.sql.connector.expressions.Transform 6 | import org.apache.spark.sql.sources.DataSourceRegister 7 | import org.apache.spark.sql.types.StructType 8 | import org.apache.spark.sql.util.CaseInsensitiveStringMap 9 | import org.neo4j.spark.service.SchemaService 10 | import org.neo4j.spark.util.Validations.validateConnection 11 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions} 12 | 13 | class DataSource extends TableProvider 14 | with DataSourceRegister { 15 | 16 | private val jobId: String = UUID.randomUUID().toString 17 | 18 | private var schema: StructType = null 19 | 20 | private var neo4jOptions: Neo4jOptions = null 21 | 22 | private def callSchemaService[T](neo4jOptions: Neo4jOptions, function: SchemaService => T): T = { 23 | val driverCache = new DriverCache(neo4jOptions.connection, jobId) 24 | val schemaService = new SchemaService(neo4jOptions, driverCache) 25 | try { 26 | validateConnection(driverCache.getOrCreate().session(neo4jOptions.session.toNeo4jSession)) 27 | function(schemaService) 28 | } catch { 29 | case e: Throwable => 30 | throw e 31 | } finally { 32 | schemaService.close() 33 | driverCache.close() 34 | } 35 | } 36 | 37 | override def inferSchema(caseInsensitiveStringMap: CaseInsensitiveStringMap): StructType = { 38 | if (schema == null) { 39 | schema = callSchemaService(getNeo4jOptions(caseInsensitiveStringMap), { schemaService => schemaService.struct() }) 40 | } 41 | 42 | schema 43 | } 44 | 45 | private def getNeo4jOptions(caseInsensitiveStringMap: CaseInsensitiveStringMap) = { 46 | if(neo4jOptions == null) { 47 | neo4jOptions = new Neo4jOptions(caseInsensitiveStringMap.asCaseSensitiveMap()) 48 | } 49 | 50 | neo4jOptions 51 | } 52 | 53 | override def getTable(structType: StructType, transforms: Array[Transform], map: java.util.Map[String, String]): Table = { 54 | val caseInsensitiveStringMapNeo4jOptions = new CaseInsensitiveStringMap(map); 55 | new Neo4jTable(inferSchema(caseInsensitiveStringMapNeo4jOptions), map, jobId) 56 | } 57 | 58 | override def shortName(): String = "neo4j" 59 | } 60 | -------------------------------------------------------------------------------- /spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.SparkConf 6 | import org.hamcrest.Matchers 7 | import org.junit._ 8 | import org.junit.rules.TestName 9 | import org.neo4j.driver.summary.ResultSummary 10 | import org.neo4j.driver.{Transaction, TransactionWork} 11 | 12 | object SparkConnectorScalaBaseTSE { 13 | 14 | private var startedFromSuite = true 15 | 16 | @BeforeClass 17 | def setUpContainer() = { 18 | if (!SparkConnectorScalaSuiteIT.server.isRunning) { 19 | startedFromSuite = false 20 | SparkConnectorScalaSuiteIT.setUpContainer() 21 | } 22 | } 23 | 24 | @AfterClass 25 | def tearDownContainer() = { 26 | if (!startedFromSuite) { 27 | SparkConnectorScalaSuiteIT.tearDownContainer() 28 | } 29 | } 30 | 31 | } 32 | 33 | class SparkConnectorScalaBaseTSE { 34 | 35 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf 36 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss 37 | 38 | val _testName: TestName = new TestName 39 | 40 | @Rule 41 | def testName = _testName 42 | 43 | @Before 44 | def before() { 45 | SparkConnectorScalaSuiteIT.session() 46 | .writeTransaction(new TransactionWork[ResultSummary] { 47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 48 | }) 49 | } 50 | 51 | @After 52 | def after() { 53 | if (!TestUtil.isTravis()) { 54 | try { 55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 56 | override def get(): Boolean = { 57 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 58 | SparkConnectorScalaSuiteIT.connections == afterConnections 59 | } 60 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS) 61 | } finally { 62 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 63 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes 64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections") 65 | } 66 | } 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import org.apache.spark.sql.SparkSession 6 | import org.apache.spark.SparkConf 7 | import org.hamcrest.Matchers 8 | import org.junit._ 9 | import org.junit.rules.TestName 10 | import org.neo4j.driver.summary.ResultSummary 11 | import org.neo4j.driver.{Transaction, TransactionWork} 12 | 13 | object SparkConnectorScalaBaseTSE { 14 | 15 | private var startedFromSuite = true 16 | 17 | @BeforeClass 18 | def setUpContainer() = { 19 | if (!SparkConnectorScalaSuiteIT.server.isRunning) { 20 | startedFromSuite = false 21 | SparkConnectorScalaSuiteIT.setUpContainer() 22 | } 23 | } 24 | 25 | @AfterClass 26 | def tearDownContainer() = { 27 | if (!startedFromSuite) { 28 | SparkConnectorScalaSuiteIT.tearDownContainer() 29 | } 30 | } 31 | 32 | } 33 | 34 | class SparkConnectorScalaBaseTSE { 35 | 36 | val conf: SparkConf = SparkConnectorScalaSuiteIT.conf 37 | val ss: SparkSession = SparkConnectorScalaSuiteIT.ss 38 | 39 | val _testName: TestName = new TestName 40 | 41 | @Rule 42 | def testName = _testName 43 | 44 | @Before 45 | def before() { 46 | SparkConnectorScalaSuiteIT.session() 47 | .writeTransaction(new TransactionWork[ResultSummary] { 48 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 49 | }) 50 | } 51 | 52 | @After 53 | def after() { 54 | if (!TestUtil.isTravis()) { 55 | try { 56 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 57 | override def get(): Boolean = { 58 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 59 | SparkConnectorScalaSuiteIT.connections == afterConnections 60 | } 61 | }, Matchers.equalTo(true), 30, TimeUnit.SECONDS) 62 | } finally { 63 | val afterConnections = SparkConnectorScalaSuiteIT.getActiveConnections 64 | if (SparkConnectorScalaSuiteIT.connections != afterConnections) { // just for debug purposes 65 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteIT.connections}, after: $afterConnections") 66 | } 67 | } 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /common/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.SparkConf 6 | import org.hamcrest.Matchers 7 | import org.junit._ 8 | import org.junit.rules.TestName 9 | import org.neo4j.driver.summary.ResultSummary 10 | import org.neo4j.driver.{Transaction, TransactionWork} 11 | 12 | object SparkConnectorScalaBaseWithApocTSE { 13 | 14 | private var startedFromSuite = true 15 | 16 | @BeforeClass 17 | def setUpContainer() = { 18 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) { 19 | startedFromSuite = false 20 | SparkConnectorScalaSuiteWithApocIT.setUpContainer() 21 | } 22 | } 23 | 24 | @AfterClass 25 | def tearDownContainer() = { 26 | if (!startedFromSuite) { 27 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer() 28 | } 29 | } 30 | 31 | } 32 | 33 | class SparkConnectorScalaBaseWithApocTSE { 34 | 35 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf 36 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss 37 | 38 | val _testName: TestName = new TestName 39 | 40 | @Rule 41 | def testName = _testName 42 | 43 | @Before 44 | def before() { 45 | SparkConnectorScalaSuiteWithApocIT.session() 46 | .writeTransaction(new TransactionWork[ResultSummary] { 47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 48 | }) 49 | } 50 | 51 | @After 52 | def after() { 53 | if (!TestUtil.isTravis()) { 54 | try { 55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 56 | override def get(): Boolean = { 57 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 58 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections 59 | } 60 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS) 61 | } finally { 62 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 63 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes 64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections") 65 | } 66 | } 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /spark-3.0/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | import org.apache.spark.sql.SparkSession 5 | import org.apache.spark.SparkConf 6 | import org.hamcrest.Matchers 7 | import org.junit._ 8 | import org.junit.rules.TestName 9 | import org.neo4j.driver.summary.ResultSummary 10 | import org.neo4j.driver.{Transaction, TransactionWork} 11 | 12 | object SparkConnectorScalaBaseWithApocTSE { 13 | 14 | private var startedFromSuite = true 15 | 16 | @BeforeClass 17 | def setUpContainer() = { 18 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) { 19 | startedFromSuite = false 20 | SparkConnectorScalaSuiteWithApocIT.setUpContainer() 21 | } 22 | } 23 | 24 | @AfterClass 25 | def tearDownContainer() = { 26 | if (!startedFromSuite) { 27 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer() 28 | } 29 | } 30 | 31 | } 32 | 33 | class SparkConnectorScalaBaseWithApocTSE { 34 | 35 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf 36 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss 37 | 38 | val _testName: TestName = new TestName 39 | 40 | @Rule 41 | def testName = _testName 42 | 43 | @Before 44 | def before() { 45 | SparkConnectorScalaSuiteWithApocIT.session() 46 | .writeTransaction(new TransactionWork[ResultSummary] { 47 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 48 | }) 49 | } 50 | 51 | @After 52 | def after() { 53 | if (!TestUtil.isTravis()) { 54 | try { 55 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 56 | override def get(): Boolean = { 57 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 58 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections 59 | } 60 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS) 61 | } finally { 62 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 63 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes 64 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections") 65 | } 66 | } 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /spark-2.4/src/test/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import org.apache.spark.sql.SparkSession 6 | import org.apache.spark.SparkConf 7 | import org.hamcrest.Matchers 8 | import org.junit._ 9 | import org.junit.rules.TestName 10 | import org.neo4j.driver.summary.ResultSummary 11 | import org.neo4j.driver.{Transaction, TransactionWork} 12 | 13 | object SparkConnectorScalaBaseWithApocTSE { 14 | 15 | private var startedFromSuite = true 16 | 17 | @BeforeClass 18 | def setUpContainer() = { 19 | if (!SparkConnectorScalaSuiteWithApocIT.server.isRunning) { 20 | startedFromSuite = false 21 | SparkConnectorScalaSuiteWithApocIT.setUpContainer() 22 | } 23 | } 24 | 25 | @AfterClass 26 | def tearDownContainer() = { 27 | if (!startedFromSuite) { 28 | SparkConnectorScalaSuiteWithApocIT.tearDownContainer() 29 | } 30 | } 31 | 32 | } 33 | 34 | class SparkConnectorScalaBaseWithApocTSE { 35 | 36 | val conf: SparkConf = SparkConnectorScalaSuiteWithApocIT.conf 37 | val ss: SparkSession = SparkConnectorScalaSuiteWithApocIT.ss 38 | 39 | val _testName: TestName = new TestName 40 | 41 | @Rule 42 | def testName = _testName 43 | 44 | @Before 45 | def before() { 46 | SparkConnectorScalaSuiteWithApocIT.session() 47 | .writeTransaction(new TransactionWork[ResultSummary] { 48 | override def execute(tx: Transaction): ResultSummary = tx.run("MATCH (n) DETACH DELETE n").consume() 49 | }) 50 | } 51 | 52 | @After 53 | def after() { 54 | if (!TestUtil.isTravis()) { 55 | try { 56 | Assert.assertEventually(new Assert.ThrowingSupplier[Boolean, Exception] { 57 | override def get(): Boolean = { 58 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 59 | SparkConnectorScalaSuiteWithApocIT.connections == afterConnections 60 | } 61 | }, Matchers.equalTo(true), 45, TimeUnit.SECONDS) 62 | } finally { 63 | val afterConnections = SparkConnectorScalaSuiteWithApocIT.getActiveConnections 64 | if (SparkConnectorScalaSuiteWithApocIT.connections != afterConnections) { // just for debug purposes 65 | println(s"For test ${testName.getMethodName} => connections before: ${SparkConnectorScalaSuiteWithApocIT.connections}, after: $afterConnections") 66 | } 67 | } 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/pages/graphkeys.adoc: -------------------------------------------------------------------------------- 1 | [#graphkeys] 2 | = Graph Keys 3 | 4 | When using the connector to write data, it's necessary to indicate which elements of the dataframe correspond to 5 | the identifying properties / keys of the node that you're writing. 6 | 7 | In the link:writing.html[Writing] section, the following options were discussed, applying to the "Keys" strategy. 8 | 9 | * `node.keys` 10 | * `relationship.source.node.keys` 11 | * `relationship.target.node.keys` 12 | 13 | The following sections describe how to use key mappings to express the connection between DataFrame format and desired graph schema. 14 | 15 | == Graph Key Format 16 | 17 | Each of these fields is a comma-separated list of keys, such as `field1,field2`. In turn, each of the 18 | keys themselves can contain a _mapping_ from a DataFrame attribute to a node property, such as `EventID:id`. 19 | 20 | This mapping is always expressed in the order `DataFrameID:NodeID`, and allows for the data frame column name, 21 | and the Neo4j node property name to differ. 22 | 23 | == Simple Example 24 | 25 | Probably the most common example will be to simply provide the name of a single attribute in the DataFrame; 26 | the node will receive a property of the same name. 27 | 28 | ``` 29 | my_person_dataframe.write 30 | .format("org.neo4j.spark.DataSource") 31 | .mode(SaveMode.Overwrite) 32 | .option("url", "bolt://localhost:7687") 33 | .option("labels", ":Person") 34 | .option("node.keys", "id") 35 | .save() 36 | ``` 37 | 38 | == Complex Example 39 | 40 | For example, let's say that we wanted to write a dataframe of "Location" nodes. Imagine we had a dataframe 41 | that looked like this: 42 | 43 | ``` 44 | LocationName,LocationType 45 | USA,Country 46 | Richmond,City 47 | ``` 48 | 49 | Further, let's assume that we need a compound key (both attributes must be used to uniquely identify a node) 50 | and that we want to use simpler names on node properties, so that we end up with Neo4j nodes like this: 51 | 52 | ``` 53 | (:Location { name: 'USA', type: 'Country' }) 54 | (:Location { name: 'Richmond', type: 'City' }) 55 | ``` 56 | 57 | In order to do this, we would use the Graph Key expression of `"LocationName:name,LocationType:type"` 58 | 59 | ``` 60 | locations_dataframe.write 61 | .format("org.neo4j.spark.DataSource") 62 | .mode(SaveMode.Overwrite) 63 | .option("url", "bolt://localhost:7687") 64 | .option("labels", ":Location") 65 | .option("node.keys", "LocationName:name,LocationType:type") 66 | .save() 67 | ``` 68 | -------------------------------------------------------------------------------- /doc/docs/modules/ROOT/pages/python.adoc: -------------------------------------------------------------------------------- 1 | = Using with Pyspark / Python 2 | 3 | [abstract] 4 | -- 5 | This chapter provides an information on using the Neo4j Connector for Apache Spark with Python 6 | -- 7 | 8 | This connector uses the link:https://jaceklaskowski.gitbooks.io/mastering-spark-sql/content/spark-sql-data-source-api-v2.html[DataSource V2 API] in 9 | Spark. 10 | 11 | With a properly configured pyspark interpreter, you should be able to use python to call the connector and do any/all spark 12 | work. 13 | 14 | Here, we present examples of what the API looks like in scala versus Python, to aid adaptation of any code examples you might have, and get 15 | started quickly. 16 | 17 | This first listing is a simple program that reads all "Person" nodes out of a Neo4j instance into a dataframe, in Scala. 18 | 19 | [source,scala] 20 | ---- 21 | import org.apache.spark.sql.{SaveMode, SparkSession} 22 | 23 | val spark = SparkSession.builder().getOrCreate() 24 | 25 | spark.read.format("org.neo4j.spark.DataSource") 26 | .option("url", "bolt://localhost:7687") 27 | .option("labels", "Person:Customer:Confirmed") 28 | .load() 29 | ---- 30 | 31 | Here is the same program in Python: 32 | 33 | [source,python] 34 | ---- 35 | spark.read.format("org.neo4j.spark.DataSource") \ 36 | .option("url", "bolt://localhost:7687") \ 37 | .option("labels", "Person:Customer:Confirmed") \ 38 | .load() 39 | ---- 40 | 41 | For the most part, the API is the same, and we are only adapting the syntax for Python, by adding backslashes to allow line continuance, 42 | and avoid running into Python's indentation rules. 43 | 44 | == API Differences 45 | 46 | Some common API constants may need to be referred to as strings in the pyspark API. Consider these two examples in Scala & Python, 47 | focusing on the `SaveMode`. 48 | 49 | [source,scala] 50 | ---- 51 | import org.apache.spark.sql.{SaveMode, SparkSession} 52 | 53 | df.write 54 | .format("org.neo4j.spark.DataSource") 55 | .mode(SaveMode.ErrorIfExists) 56 | .option("url", "bolt://localhost:7687") 57 | .option("labels", ":Person") 58 | .save() 59 | ---- 60 | 61 | The same program in python is very similar, again just with language syntax differences, but note the "mode": 62 | 63 | [source,python] 64 | ---- 65 | import org.apache.spark.sql.{SaveMode, SparkSession} 66 | 67 | df.write \ 68 | .format("org.neo4j.spark.DataSource") \ 69 | .mode("ErrorIfExists") \ 70 | .option("url", "bolt://localhost:7687") \ 71 | .option("labels", ":Person") \ 72 | .save() 73 | ---- 74 | -------------------------------------------------------------------------------- /doc/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /spark-3.0/src/main/scala/org/neo4j/spark/reader/SimpleScan.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.reader 2 | 3 | import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan} 4 | import org.apache.spark.sql.sources.Filter 5 | import org.apache.spark.sql.types.StructType 6 | import org.neo4j.spark.service.{PartitionSkipLimit, SchemaService} 7 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions} 8 | 9 | import scala.collection.JavaConverters.seqAsJavaListConverter 10 | 11 | case class Neo4jPartition(partitionSkipLimit: PartitionSkipLimit) extends InputPartition 12 | 13 | class SimpleScan( 14 | neo4jOptions: Neo4jOptions, 15 | jobId: String, 16 | schema: StructType, 17 | filters: Array[Filter], 18 | requiredColumns: StructType 19 | ) extends Scan with Batch { 20 | 21 | override def toBatch: Batch = this 22 | 23 | var scriptResult: java.util.List[java.util.Map[String, AnyRef]] = _ 24 | 25 | private def callSchemaService[T](function: SchemaService => T): T = { 26 | val driverCache = new DriverCache(neo4jOptions.connection, jobId) 27 | val schemaService = new SchemaService(neo4jOptions, driverCache) 28 | var hasError = false 29 | try { 30 | function(schemaService) 31 | } catch { 32 | case e: Throwable => 33 | hasError = true 34 | throw e 35 | } finally { 36 | schemaService.close() 37 | if (hasError) { 38 | driverCache.close() 39 | } 40 | } 41 | } 42 | 43 | private def createPartitions() = { 44 | // we get the skip/limit for each partition and execute the "script" 45 | val (partitionSkipLimitList, scriptResult) = callSchemaService { schemaService => 46 | (schemaService.skipLimitFromPartition(), schemaService.execute(neo4jOptions.script)) 47 | } 48 | // we generate a partition for each element 49 | this.scriptResult = scriptResult 50 | partitionSkipLimitList 51 | .map(partitionSkipLimit => Neo4jPartition(partitionSkipLimit)) 52 | } 53 | 54 | override def planInputPartitions(): Array[InputPartition] = { 55 | val neo4jPartitions: Seq[Neo4jPartition] = createPartitions() 56 | neo4jPartitions.toArray 57 | } 58 | 59 | override def createReaderFactory(): PartitionReaderFactory = { 60 | new SimplePartitionReaderFactory( 61 | neo4jOptions, filters, schema, jobId, scriptResult, requiredColumns 62 | ) 63 | } 64 | 65 | override def readSchema(): StructType = schema 66 | } 67 | -------------------------------------------------------------------------------- /common/src/main/scala/org/neo4j/spark/reader/BasePartitionReader.scala: -------------------------------------------------------------------------------- 1 | package org.neo4j.spark.reader 2 | 3 | import org.apache.spark.internal.Logging 4 | import org.apache.spark.sql.catalyst.InternalRow 5 | import org.apache.spark.sql.sources.Filter 6 | import org.apache.spark.sql.types.StructType 7 | import org.neo4j.driver.{Record, Session, Transaction, Values} 8 | import org.neo4j.spark.service.{MappingService, Neo4jQueryReadStrategy, Neo4jQueryService, Neo4jQueryStrategy, Neo4jReadMappingStrategy, PartitionSkipLimit} 9 | import org.neo4j.spark.util.{DriverCache, Neo4jOptions, Neo4jUtil} 10 | import org.neo4j.spark.util.Neo4jImplicits.StructTypeImplicit 11 | 12 | import scala.collection.JavaConverters._ 13 | 14 | abstract class BasePartitionReader(private val options: Neo4jOptions, 15 | private val filters: Array[Filter], 16 | private val schema: StructType, 17 | private val jobId: String, 18 | private val partitionSkipLimit: PartitionSkipLimit, 19 | private val scriptResult: java.util.List[java.util.Map[String, AnyRef]], 20 | private val requiredColumns: StructType) extends Logging { 21 | private var result: Iterator[Record] = _ 22 | private var session: Session = _ 23 | private var transaction: Transaction = _ 24 | private val driverCache: DriverCache = new DriverCache(options.connection, 25 | if (partitionSkipLimit.partitionNumber > 0) s"$jobId-${partitionSkipLimit.partitionNumber}" else jobId) 26 | 27 | private val query: String = new Neo4jQueryService(options, new Neo4jQueryReadStrategy(filters, partitionSkipLimit, requiredColumns.getFieldsName)) 28 | .createQuery() 29 | 30 | private val mappingService = new MappingService(new Neo4jReadMappingStrategy(options, requiredColumns), options) 31 | 32 | def next: Boolean = { 33 | if (result == null) { 34 | session = driverCache.getOrCreate().session(options.session.toNeo4jSession) 35 | transaction = session.beginTransaction() 36 | log.info(s"Running the following query on Neo4j: $query") 37 | result = transaction.run(query, Values 38 | .value(Map[String, AnyRef](Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT -> scriptResult).asJava)) 39 | .asScala 40 | } 41 | 42 | result.hasNext 43 | } 44 | 45 | def get: InternalRow = mappingService.convert(result.next(), schema) 46 | 47 | def close(): Unit = { 48 | Neo4jUtil.closeSafety(transaction, log) 49 | Neo4jUtil.closeSafety(session, log) 50 | driverCache.close() 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /doc/javascript/versionswitcher.js: -------------------------------------------------------------------------------- 1 | jQuery( window ).load( function() { 2 | var location = window.location; 3 | versionSwitcher( jQuery ); 4 | } ); 5 | 6 | /** 7 | * Utility to browse different versions of the documentation. Requires the versions.js file loaded, which lists the 8 | * available (relevant) versions of a particular publication. 9 | */ 10 | function versionSwitcher( $ ) 11 | { 12 | $('.searchbox').hide(); 13 | var MAX_STABLE_COUNT = 2; 14 | var DOCS_BASE_URL = window.docMeta.commonDocsBaseUri; 15 | var THIS_DOC_BASE_URI = window.docMeta.unversionedDocBaseUri; 16 | 17 | var currentVersion = window.docMeta.version; 18 | var currentPage = window.neo4jPageId; 19 | 20 | // TODO re-enable loadVersions(); 21 | 22 | /** 23 | * Load an array of version into a div element and check if the current page actually exists in these versions. 24 | * Non-existing entries will be unlinked. Current version will be marked as such. 25 | */ 26 | function loadVersions() { 27 | var $navHeader = $( 'header' ); 28 | var $additionalVersions = $( '