├── .github
    └── workflows
    │   ├── ISSUE_TEMPLATE.md
    │   ├── PULL_REQUEST_TEMPLATE.md
    │   └── maven.yml
├── .gitignore
├── .scalafmt.conf
├── .travis.yml
├── LICENSES
    ├── Apache-2.0.txt
    └── CC-1.0.txt
├── README.md
├── example
    ├── .gitignore
    ├── pom.xml
    └── src
    │   └── main
    │       ├── resources
    │           ├── data.csv
    │           ├── edge
    │           ├── log4j.properties
    │           └── vertex
    │       └── scala
    │           └── com
    │               └── vesoft
    │                   └── nebula
    │                       └── examples
    │                           └── connector
    │                               ├── NebulaSparkReaderExample.scala
    │                               └── NebulaSparkWriterExample.scala
├── nebula-algorithm
    ├── README-CN.md
    ├── README.md
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── resources
    │       │   ├── application.conf
    │       │   └── edge
    │       └── scala
    │       │   └── com
    │       │       └── vesoft
    │       │           └── nebula
    │       │               └── algorithm
    │       │                   ├── Main.scala
    │       │                   ├── config
    │       │                       ├── AlgoConfig.scala
    │       │                       ├── Configs.scala
    │       │                       ├── NebulaConfig.scala
    │       │                       └── SparkConfig.scala
    │       │                   ├── lib
    │       │                       ├── BetweennessCentralityAlgo.scala
    │       │                       ├── ConnectedComponentsAlgo.scala
    │       │                       ├── DegreeStaticAlgo.scala
    │       │                       ├── GraphTriangleCountAlgo.scala
    │       │                       ├── KCoreAlgo.scala
    │       │                       ├── LabelPropagationAlgo.scala
    │       │                       ├── LouvainAlgo.scala
    │       │                       ├── PageRankAlgo.scala
    │       │                       ├── ShortestPathAlgo.scala
    │       │                       ├── StronglyConnectedComponentsAlgo.scala
    │       │                       └── TriangleCountAlgo.scala
    │       │                   ├── reader
    │       │                       └── DataReader.scala
    │       │                   ├── utils
    │       │                       └── NebulaUtil.scala
    │       │                   └── writer
    │       │                       └── AlgoWriter.scala
    │   └── test
    │       ├── resources
    │           ├── application.conf
    │           ├── edge.csv
    │           └── edge_noWeight.csv
    │       └── scala
    │           └── com
    │               └── vesoft
    │                   └── nebula
    │                       └── algorithm
    │                           ├── config
    │                               └── ConfigSuite.scala
    │                           ├── data
    │                               └── MockNebulaData.scala
    │                           ├── lib
    │                               ├── BetweennessAlgoSuite.scala
    │                               ├── CcAlgoSuite.scala
    │                               ├── DegreeStaticAlgoSuite.scala
    │                               ├── KCoreAlgoSuite.scala
    │                               ├── LabelPropagationAlgoSuite.scala
    │                               ├── LouvainAlgoSuite.scala
    │                               ├── PageRankAlgoSuite.scala
    │                               ├── SCCAlgoSuite.scala
    │                               ├── ShortestPathAlgoSuite.scala
    │                               └── TrangleCountSuite.scala
    │                           └── utils
    │                               └── NebulaUtilSuite.scala
├── nebula-exchange
    ├── .gitignore
    ├── README-CN.md
    ├── README.md
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── resources
    │       │   └── application.conf
    │       └── scala
    │       │   └── com
    │       │       └── vesoft
    │       │           └── nebula
    │       │               └── exchange
    │       │                   ├── CheckPointHandler.scala
    │       │                   ├── ErrorHandler.scala
    │       │                   ├── Exchange.scala
    │       │                   ├── GraphProvider.scala
    │       │                   ├── MetaProvider.scala
    │       │                   ├── config
    │       │                       ├── Configs.scala
    │       │                       ├── SchemaConfigs.scala
    │       │                       ├── SinkConfigs.scala
    │       │                       └── SourceConfigs.scala
    │       │                   ├── package.scala
    │       │                   ├── processor
    │       │                       ├── EdgeProcessor.scala
    │       │                       ├── Processor.scala
    │       │                       ├── ReloadProcessor.scala
    │       │                       └── VerticesProcessor.scala
    │       │                   ├── reader
    │       │                       ├── FileBaseReader.scala
    │       │                       ├── Reader.scala
    │       │                       ├── ServerBaseReader.scala
    │       │                       └── StreamingBaseReader.scala
    │       │                   ├── utils
    │       │                       ├── HDFSUtils.scala
    │       │                       ├── KafkaUtils.scala
    │       │                       ├── NebulaUtils.scala
    │       │                       └── Neo4jUtils.scala
    │       │                   └── writer
    │       │                       ├── FileBaseWriter.scala
    │       │                       ├── ServerBaseWriter.scala
    │       │                       └── Writer.scala
    │   └── test
    │       ├── resources
    │           ├── application.conf
    │           └── docker-compose.yaml
    │       └── scala
    │           └── com
    │               └── vesoft
    │                   └── nebula
    │                       └── exchange
    │                           ├── NebulaGraphMock.scala
    │                           ├── config
    │                               └── ConfigsSuite.scala
    │                           ├── processor
    │                               └── ProcessorSuite.scala
    │                           └── utils
    │                               └── NebulaUtilsSuite.scala
├── nebula-spark-connector
    ├── .gitignore
    ├── README.md
    ├── README_CN.md
    ├── pom.xml
    └── src
    │   ├── main
    │       └── scala
    │       │   └── com
    │       │       └── vesoft
    │       │           └── nebula
    │       │               └── connector
    │       │                   ├── NebulaConfig.scala
    │       │                   ├── NebulaDataSource.scala
    │       │                   ├── NebulaEnum.scala
    │       │                   ├── NebulaOptions.scala
    │       │                   ├── NebulaUtils.scala
    │       │                   ├── PartitionUtils.scala
    │       │                   ├── Template.scala
    │       │                   ├── exception
    │       │                       └── Exception.scala
    │       │                   ├── nebula
    │       │                       ├── GraphProvider.scala
    │       │                       └── MetaProvider.scala
    │       │                   ├── package.scala
    │       │                   ├── reader
    │       │                       ├── NebulaEdgePartitionReader.scala
    │       │                       ├── NebulaPartition.scala
    │       │                       ├── NebulaPartitionReader.scala
    │       │                       ├── NebulaSourceReader.scala
    │       │                       └── NebulaVertexPartitionReader.scala
    │       │                   └── writer
    │       │                       ├── NebulaCommitMessage.scala
    │       │                       ├── NebulaEdgeWriter.scala
    │       │                       ├── NebulaExecutor.scala
    │       │                       ├── NebulaSourceWriter.scala
    │       │                       ├── NebulaVertexWriter.scala
    │       │                       └── NebulaWriter.scala
    │   └── test
    │       └── scala
    │           └── com
    │               └── vesoft
    │                   └── nebula
    │                       └── connector
    │                           ├── NebulaConfigSuite.scala
    │                           └── writer
    │                               └── NebulaExecutorSuite.scala
└── pom.xml


/.github/workflows/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | #### Expected behavior
 2 | 
 3 | #### Actual behavior
 4 | 
 5 | #### Steps to reproduce
 6 | 
 7 | #### JVM version (e.g. `java -version`)
 8 | 
 9 | #### Scala version (e.g. `scala -version`)
10 | 
11 | #### OS version (e.g. `uname -a`)
12 | 


--------------------------------------------------------------------------------
/.github/workflows/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Motivation:
 2 | 
 3 | Why you're making that change and what is the problem you're trying to solve.
 4 | 
 5 | Modification:
 6 | 
 7 | Describe the modifications you've done.
 8 | 
 9 | Result:
10 | 
11 | Fixes #<GitHub issue number>.
12 | 


--------------------------------------------------------------------------------
/.github/workflows/maven.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will build a Java project with Maven
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 3 | 
 4 | name: Java CI with Maven
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches:
11 |       - master
12 |       - 'v[0-9]+.*'
13 | 
14 | jobs:
15 |   build:
16 | 
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v2
21 |     - name: Set up JDK 1.8
22 |       uses: actions/setup-java@v1
23 |       with:
24 |         java-version: 1.8
25 | 
26 |     - name: Cache the Maven packages to speed up build
27 |       uses: actions/cache@v2
28 |       with:
29 |         path: ~/.m2/repository
30 |         key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
31 |         restore-keys: ${{ runner.os }}-maven-
32 | 
33 |     - name: download neo4j-contrib & graphframes & pulsar-spark-connector dependency
34 |       run: |
35 |         wget https://oss-cdn.nebula-graph.com.cn/jar-packages/neo4j-contrib.zip
36 |         wget https://oss-cdn.nebula-graph.com.cn/jar-packages/graphframes.zip
37 |         wget https://oss-cdn.nebula-graph.com.cn/jar-packages/streamnative.zip
38 |         unzip -o -d ~/.m2/repository/ neo4j-contrib.zip
39 |         unzip -o -d ~/.m2/repository/ graphframes.zip
40 |         rm -rf ~/.m2/repository/io/streamnative
41 |         unzip -o -d ~/.m2/repository/io/ streamnative.zip
42 | 
43 |     - name: Install nebula-graph
44 |       run: |
45 |         mkdir tmp
46 |         pushd tmp
47 |         git clone https://github.com/vesoft-inc/nebula-docker-compose.git
48 |         pushd nebula-docker-compose/
49 |         cp ../../nebula-exchange/src/test/resources/docker-compose.yaml .
50 |         docker-compose up -d
51 |         sleep 10
52 |         popd
53 |         popd
54 | 
55 |     - name: Build with Maven
56 |       run: mvn -B package
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 
25 | # build target
26 | target/
27 | 
28 | # IDE
29 | .idea/
30 | .eclipse/
31 | *.iml
32 | 
33 | spark-importer.ipr
34 | spark-importer.iws
35 | 
36 | # mac
37 | .DS_Store
38 | 


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | align = more
2 | maxColumn = 100
3 | docstrings = ScalaDoc
4 | assumeStandardLibraryStripMargin = true


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 vesoft inc. All rights reserved.
 2 | #
 3 | # This source code is licensed under Apache 2.0 License,
 4 | # attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 | 
 6 | language: java
 7 | 
 8 | jdk:
 9 |   - oraclejdk11
10 |   - openjdk8
11 |   - openjdk11
12 | 
13 | install: mvn clean compile package install -Dgpg.skip -Dmaven.javadoc.skip=true
14 | 


--------------------------------------------------------------------------------
/LICENSES/CC-1.0.txt:
--------------------------------------------------------------------------------
 1 | "Commons Clause" License Condition v1.0
 2 | 
 3 | The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition.
 4 | 
 5 | Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, the right to Sell the Software.
 6 | 
 7 | For purposes of the foregoing, "Sell" means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other considerationon (including without limitation fees for hosting or consulting/support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Clause License Condition notice.
 8 | 
 9 | Software: Nebula Graph [Software in this repository]
10 | 
11 | License: Apache 2.0 [https://www.apache.org/licenses/LICENSE-2.0.html]
12 | 
13 | Licensor: vesoft inc.
14 | 
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nebula-spark-utils
 2 | 
 3 | > **说明**:
 4 | 
 5 | nebula-spark-utils 仓库包含 Nebula 的三个产品：Nebula Exchange、Nebula Spark Connector 和 Nebula Algorithm。
 6 | 
 7 | 为了更好地进行产品的版本发布和管理，我们决定将 nebula-spark-utils 拆分成三个仓库，如果您要使用以上三个产品，请转移到产品对应的新仓库：
 8 | 
 9 | * Nebula Exchange: https://github.com/vesoft-inc/nebula-exchange
10 | * Nebula Spark Connector: https://github.com/vesoft-inc/nebula-spark-connector
11 | * Nebula Algorithm: https://github.com/vesoft-inc/nebula-algorithm
12 | 
13 | ------
14 | 
15 | > **Note**: 
16 | 
17 | nebula-spark-utils repository contains code of three nebula products: Nebula Exchange, Nebula Spark Connector, Nebula Algorithm.
18 | 
19 | In order for better version release and management, the team decided to split the current repository into three independent repositories.
20 | 
21 | Please choose the correct repository for use:
22 | 
23 | * Nebula Exchange: https://github.com/vesoft-inc/nebula-exchange
24 | * Nebula Spark Connector: https://github.com/vesoft-inc/nebula-spark-connector
25 | * Nebula Algorithm: https://github.com/vesoft-inc/nebula-algorithm
26 | 
27 | If you want to use Spark utilities for [Nebula Graph v1.x](https://github.com/vesoft-inc/nebula), visit [the v1.0 branch of nebula-java](https://github.com/vesoft-inc/nebula-java/tree/v1.0).
28 | 


--------------------------------------------------------------------------------
/example/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 
25 | # build target
26 | target/
27 | 
28 | # IDE
29 | .idea/
30 | .eclipse/
31 | *.iml
32 | 
33 | spark-importer.ipr
34 | spark-importer.iws
35 | 
36 | .DS_Store
37 | 


--------------------------------------------------------------------------------
/example/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>nebula-spark</artifactId>
  7 |         <groupId>com.vesoft</groupId>
  8 |         <version>2.5-SNAPSHOT</version>
  9 |         <relativePath>../pom.xml</relativePath>
 10 |     </parent>
 11 |     <modelVersion>4.0.0</modelVersion>
 12 | 
 13 |     <artifactId>example</artifactId>
 14 | 
 15 |     <build>
 16 |         <plugins>
 17 |             <!-- deploy-plugin -->
 18 |             <plugin>
 19 |                 <groupId>org.apache.maven.plugins</groupId>
 20 |                 <artifactId>maven-deploy-plugin</artifactId>
 21 |                 <configuration>
 22 |                     <skip>true</skip>
 23 |                 </configuration>
 24 |             </plugin>
 25 |             <!-- compiler-plugin -->
 26 |             <plugin>
 27 |                 <groupId>org.apache.maven.plugins</groupId>
 28 |                 <artifactId>maven-compiler-plugin</artifactId>
 29 |                 <version>3.8.1</version>
 30 |                 <configuration>
 31 |                     <source>1.8</source>
 32 |                     <target>1.8</target>
 33 |                 </configuration>
 34 |             </plugin>
 35 |             <!-- maven-jar -->
 36 |             <plugin>
 37 |                 <groupId>org.apache.maven.plugins</groupId>
 38 |                 <artifactId>maven-jar-plugin</artifactId>
 39 |                 <version>3.2.0</version>
 40 |                 <executions>
 41 |                     <execution>
 42 |                         <goals>
 43 |                             <goal>test-jar</goal>
 44 |                         </goals>
 45 |                     </execution>
 46 |                 </executions>
 47 |             </plugin>
 48 |             <!-- maven-shade -->
 49 |             <plugin>
 50 |                 <groupId>org.apache.maven.plugins</groupId>
 51 |                 <artifactId>maven-shade-plugin</artifactId>
 52 |                 <version>3.2.1</version>
 53 |                 <executions>
 54 |                     <execution>
 55 |                         <phase>package</phase>
 56 |                         <goals>
 57 |                             <goal>shade</goal>
 58 |                         </goals>
 59 |                         <configuration>
 60 |                             <createDependencyReducedPom>false</createDependencyReducedPom>
 61 |                             <artifactSet>
 62 |                                 <excludes>
 63 |                                     <exclude>org.apache.spark:*</exclude>
 64 |                                     <exclude>org.apache.hadoop:*</exclude>
 65 |                                     <exclude>org.apache.hive:*</exclude>
 66 |                                     <exclude>log4j:log4j</exclude>
 67 |                                     <exclude>org.apache.orc:*</exclude>
 68 |                                     <exclude>xml-apis:xml-apis</exclude>
 69 |                                     <exclude>javax.inject:javax.inject</exclude>
 70 |                                     <exclude>org.spark-project.hive:hive-exec</exclude>
 71 |                                     <exclude>stax:stax-api</exclude>
 72 |                                     <exclude>org.glassfish.hk2.external:aopalliance-repackaged</exclude>
 73 |                                 </excludes>
 74 |                             </artifactSet>
 75 |                             <filters>
 76 |                                 <filter>
 77 |                                     <artifact>*:*</artifact>
 78 |                                     <excludes>
 79 |                                         <exclude>com/vesoft/tools/**</exclude>
 80 |                                         <exclude>META-INF/*.SF</exclude>
 81 |                                         <exclude>META-INF/*.DSA</exclude>
 82 |                                         <exclude>META-INF/*.RSA</exclude>
 83 |                                     </excludes>
 84 |                                 </filter>
 85 |                             </filters>
 86 |                         </configuration>
 87 |                     </execution>
 88 |                 </executions>
 89 |             </plugin>
 90 | 
 91 |             <plugin>
 92 |                 <groupId>org.scala-tools</groupId>
 93 |                 <artifactId>maven-scala-plugin</artifactId>
 94 |                 <version>2.15.2</version>
 95 |                 <configuration>
 96 |                     <scalaVersion>2.11.12</scalaVersion>
 97 |                     <args>
 98 |                         <arg>-target:jvm-1.8</arg>
 99 |                     </args>
100 |                     <jvmArgs>
101 |                         <jvmArg>-Xss4096K</jvmArg>
102 |                     </jvmArgs>
103 |                 </configuration>
104 |                 <executions>
105 |                     <execution>
106 |                         <id>scala-compile</id>
107 |                         <goals>
108 |                             <goal>compile</goal>
109 |                         </goals>
110 |                         <configuration>
111 |                             <excludes>
112 |                                 <exclude>com/vesoft/tools/**</exclude>
113 |                                 <exclude>META-INF/*.SF</exclude>
114 |                                 <exclude>META-INF/*.DSA</exclude>
115 |                                 <exclude>META-INF/*.RSA</exclude>
116 |                             </excludes>
117 |                         </configuration>
118 |                     </execution>
119 |                     <execution>
120 |                         <id>scala-test-compile</id>
121 |                         <goals>
122 |                             <goal>testCompile</goal>
123 |                         </goals>
124 |                         <configuration>
125 |                             <excludes>
126 |                                 com/vesoft/tools/**
127 |                             </excludes>
128 |                         </configuration>
129 |                     </execution>
130 |                 </executions>
131 |             </plugin>
132 |         </plugins>
133 |     </build>
134 | 
135 |     <dependencies>
136 |         <dependency>
137 |             <groupId>org.slf4j</groupId>
138 |             <artifactId>slf4j-log4j12</artifactId>
139 |             <version>1.7.25</version>
140 |         </dependency>
141 |         <dependency>
142 |             <groupId>org.slf4j</groupId>
143 |             <artifactId>slf4j-api</artifactId>
144 |             <version>1.7.25</version>
145 |         </dependency>
146 | 
147 |         <dependency>
148 |             <groupId>org.apache.spark</groupId>
149 |             <artifactId>spark-core_2.11</artifactId>
150 |             <version>2.4.4</version>
151 |         </dependency>
152 |         <dependency>
153 |             <groupId>org.apache.spark</groupId>
154 |             <artifactId>spark-sql_2.11</artifactId>
155 |             <version>2.4.4</version>
156 |         </dependency>
157 | 
158 |         <dependency>
159 |             <groupId>com.vesoft</groupId>
160 |             <artifactId>nebula-spark-connector</artifactId>
161 |             <version>2.5-SNAPSHOT</version>
162 |         </dependency>
163 |     </dependencies>
164 | </project>
165 | 


--------------------------------------------------------------------------------
/example/src/main/resources/data.csv:
--------------------------------------------------------------------------------
 1 | id,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12,col13
 2 | 1,Tom,tom,10,20,30,40,2021-01-27,2021-01-01T12:10:10,43535232,true,1.0,2.0,10:10:10
 3 | 2,Jina,Jina,11,21,31,41,2021-01-28,2021-01-02T12:10:10,43535232,false,1.1,2.1,11:10:10
 4 | 3,Tim,Tim,12,22,32,42,2021-01-29,2021-01-03T12:10:10,43535232,false,1.2,2.2,12:10:10
 5 | 4,张三,张三,13,23,33,43,2021-01-30,2021-01-04T12:10:10,43535232,true,1.3,2.3,13:10:10
 6 | 5,李四,李四,14,24,34,44,2021-02-01,2021-01-05T12:10:10,43535232,false,1.4,2.4,14:10:10
 7 | 6,王五,王五,15,25,35,45,2021-02-02,2021-01-06T12:10:10,0,false,1.5,2.5,15:10:10
 8 | 7,Jina,Jina,16,26,36,46,2021-02-03,2021-01-07T12:10:10,43535232,true,1.6,2.6,16:10:10
 9 | 8,Jina,Jina,17,27,37,47,2021-02-04,2021-01-08T12:10:10,43535232,false,1.7,2.7,17:10:10
10 | 9,Jina,Jina,18,28,38,48,2021-02-05,2021-01-09T12:10:10,43535232,true,1.8,2.8,18:10:10
11 | 10,Jina,Jina,19,29,39,49,2021-02-06,2021-01-10T12:10:10,43535232,false,1.9,2.9,19:10:10
12 | -1,Jina,Jina,20,30,40,50,2021-02-07,2021-02-11T12:10:10,43535232,false,2.0,3.0,20:10:10
13 | -2,Jina,Jina,21,31,41,51,2021-02-08,2021-03-12T12:10:10,43535232,false,2.1,3.1,21:10:10
14 | -3,Jina,Jina,22,32,42,52,2021-02-09,2021-04-13T12:10:10,43535232,false,2.2,3.2,22:10:10
15 | 


--------------------------------------------------------------------------------
/example/src/main/resources/edge:
--------------------------------------------------------------------------------
 1 | {"src":12345,"dst":23456,"degree":34, "descr": "aaa","timep": "2020-01-01"}
 2 | {"src":11111,"dst":22222,"degree":33, "descr": "aaa","timep": "2020-01-01"}
 3 | {"src":11111,"dst":33333,"degree":32, "descr": "a\baa","timep": "2020-01-01"}
 4 | {"src":11111,"dst":44444,"degree":31, "descr": "aaa","timep": "2020-01-01"}
 5 | {"src":22222,"dst":55555,"degree":30, "descr": "a\naa","timep": "2020-01-01"}
 6 | {"src":33333,"dst":44444,"degree":29, "descr": "aaa","timep": "2020-01-01"}
 7 | {"src":33333,"dst":55555,"degree":28, "descr": "aa\ta","timep": "2020-01-01"}
 8 | {"src":44444,"dst":22222,"degree":27, "descr": "aaa","timep": "2020-01-01"}
 9 | {"src":44444,"dst":55555,"degree":26, "descr": "aaa","timep": "2020-01-01"}
10 | {"src":22222,"dst":66666,"degree":25, "descr": "aaa","timep": "2020-01-01"}


--------------------------------------------------------------------------------
/example/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Global logging configuration
2 | log4j.rootLogger=INFO, stdout
3 | # Console output...
4 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
5 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
6 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] - %m%n
7 | 


--------------------------------------------------------------------------------
/example/src/main/resources/vertex:
--------------------------------------------------------------------------------
 1 | {"id":12,"name":"Tom","age":20,"born": "2000-01-01"}
 2 | {"id":13,"name":"Bob","age":21,"born": "1999-01-02"}
 3 | {"id":14,"name":"Jane","age":22,"born": "1998-01-03"}
 4 | {"id":15,"name":"Jena","age":23,"born": "1997-01-04"}
 5 | {"id":16,"name":"Nic","age":24,"born": "1996-01-05"}
 6 | {"id":17,"name":"Mei","age":25,"born": "1995-01-06"}
 7 | {"id":18,"name":"HH","age":26,"born": "1994-01-07"}
 8 | {"id":19,"name":"Tyler","age":27,"born": "1993-01-08"}
 9 | {"id":20,"name":"Ber","age":28,"born": "1992-01-09"}
10 | {"id":21,"name":"Mercy","age":29,"born": "1991-01-10"}


--------------------------------------------------------------------------------
/example/src/main/scala/com/vesoft/nebula/examples/connector/NebulaSparkReaderExample.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.examples.connector
  8 | 
  9 | import com.facebook.thrift.protocol.TCompactProtocol
 10 | import com.vesoft.nebula.connector.connector.NebulaDataFrameReader
 11 | import com.vesoft.nebula.connector.{NebulaConnectionConfig, ReadNebulaConfig}
 12 | import org.apache.spark.SparkConf
 13 | import org.apache.spark.sql.SparkSession
 14 | import org.slf4j.LoggerFactory
 15 | 
 16 | object NebulaSparkReaderExample {
 17 | 
 18 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 19 | 
 20 |   def main(args: Array[String]): Unit = {
 21 | 
 22 |     val sparkConf = new SparkConf
 23 |     sparkConf
 24 |       .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 25 |       .registerKryoClasses(Array[Class[_]](classOf[TCompactProtocol]))
 26 |     val spark = SparkSession
 27 |       .builder()
 28 |       .master("local")
 29 |       .config(sparkConf)
 30 |       .getOrCreate()
 31 | 
 32 |     readVertex(spark)
 33 |     readEdges(spark)
 34 |     readVertexGraph(spark)
 35 |     readEdgeGraph(spark)
 36 | 
 37 |     spark.close()
 38 |     sys.exit()
 39 |   }
 40 | 
 41 |   def readVertex(spark: SparkSession): Unit = {
 42 |     LOG.info("start to read nebula vertices")
 43 |     val config =
 44 |       NebulaConnectionConfig
 45 |         .builder()
 46 |         .withMetaAddress("127.0.0.1:9559")
 47 |         .withConenctionRetry(2)
 48 |         .build()
 49 |     val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig
 50 |       .builder()
 51 |       .withSpace("test")
 52 |       .withLabel("person")
 53 |       .withNoColumn(false)
 54 |       .withReturnCols(List("birthday"))
 55 |       .withLimit(10)
 56 |       .withPartitionNum(10)
 57 |       .build()
 58 |     val vertex = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF()
 59 |     vertex.printSchema()
 60 |     vertex.show(20)
 61 |     println("vertex count: " + vertex.count())
 62 |   }
 63 | 
 64 |   def readEdges(spark: SparkSession): Unit = {
 65 |     LOG.info("start to read nebula edges")
 66 | 
 67 |     val config =
 68 |       NebulaConnectionConfig
 69 |         .builder()
 70 |         .withMetaAddress("127.0.0.1:9559")
 71 |         .withTimeout(6000)
 72 |         .withConenctionRetry(2)
 73 |         .build()
 74 |     val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig
 75 |       .builder()
 76 |       .withSpace("test")
 77 |       .withLabel("knows")
 78 |       .withNoColumn(false)
 79 |       .withReturnCols(List("degree"))
 80 |       .withLimit(10)
 81 |       .withPartitionNum(10)
 82 |       .build()
 83 |     val edge = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
 84 |     edge.printSchema()
 85 |     edge.show(20)
 86 |     println("edge count: " + edge.count())
 87 |   }
 88 | 
 89 |   def readVertexGraph(spark: SparkSession): Unit = {
 90 |     LOG.info("start to read graphx vertex")
 91 |     val config =
 92 |       NebulaConnectionConfig
 93 |         .builder()
 94 |         .withMetaAddress("127.0.0.1:9559")
 95 |         .withTimeout(6000)
 96 |         .withConenctionRetry(2)
 97 |         .build()
 98 |     val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig
 99 |       .builder()
100 |       .withSpace("test")
101 |       .withLabel("person")
102 |       .withNoColumn(false)
103 |       .withReturnCols(List("birthday"))
104 |       .withLimit(10)
105 |       .withPartitionNum(10)
106 |       .build()
107 | 
108 |     val vertexRDD = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToGraphx()
109 |     LOG.info("vertex rdd first record: " + vertexRDD.first())
110 |     LOG.info("vertex rdd count: {}", vertexRDD.count())
111 |   }
112 | 
113 |   def readEdgeGraph(spark: SparkSession): Unit = {
114 |     LOG.info("start to read graphx edge")
115 |     val config =
116 |       NebulaConnectionConfig
117 |         .builder()
118 |         .withMetaAddress("127.0.0.1:9559")
119 |         .withTimeout(6000)
120 |         .withConenctionRetry(2)
121 |         .build()
122 |     val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig
123 |       .builder()
124 |       .withSpace("test")
125 |       .withLabel("knows")
126 |       .withNoColumn(false)
127 |       .withReturnCols(List("timep"))
128 |       .withLimit(10)
129 |       .withPartitionNum(10)
130 |       .build()
131 |     val edgeRDD = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToGraphx()
132 |     LOG.info("edge rdd first record:" + edgeRDD.first())
133 |     LOG.info("edge rdd count: {}", edgeRDD.count())
134 |   }
135 | 
136 | }
137 | 


--------------------------------------------------------------------------------
/nebula-algorithm/README-CN.md:
--------------------------------------------------------------------------------
 1 | # 欢迎使用 Nebula Algorithm
 2 | 
 3 | nebula-algorithm 是一款基于 [GraphX](https://spark.apache.org/graphx/) 的 Spark 应用程序，提供了以下图计算算法：
 4 | 
 5 | 
 6 |  |           算法名          |中文说明|应用场景|
 7 |  |:------------------------:|:-----------:|:----:|
 8 |  |         PageRank         |  页面排序  | 网页排序、重点节点挖掘|
 9 |  |         Louvain          |  社区发现  | 社团挖掘、层次化聚类|
10 |  |          KCore           |    K核    |社区发现、金融风控|
11 |  |     LabelPropagation     |  标签传播  |资讯传播、广告推荐、社区发现|
12 |  |    ConnectedComponent    |  联通分量  |社区发现、孤岛发现|
13 |  |StronglyConnectedComponent| 强联通分量  |社区发现|
14 |  |       ShortestPath       |  最短路径   |路径规划、网络规划|
15 |  |       TriangleCount      | 三角形计数  |网络结构分析|
16 |  |     GraphTriangleCount   |全图三角形计数|网络紧密性分析|
17 |  |   BetweennessCentrality  | 介数中心性  |关键节点挖掘，节点影响力计算|
18 |  |        DegreeStatic      |   度统计   |图结构分析|
19 |  
20 | 使用 `nebula-algorithm`，可以通过提交 `Spark` 任务的形式使用完整的算法工具对 `Nebula Graph` 数据库中的数据执行图计算，也可以通过编程形式调用`lib`库下的算法针对DataFrame执行图计算。
21 | 
22 | ## 如何获取
23 |  1. 编译打包 Nebula Algorithm
24 |     ```
25 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
26 |     $ cd nebula-algorithm
27 |     $ mvn clean package -Dgpg.skip -Dmaven.javadoc.skip=true -Dmaven.test.skip=true
28 |     ```
29 |     编译完成后，在 `nebula-algorithm/target` 目录下会生成 `nebula-algorithm-2.0.0.jar` 。
30 | 
31 |  2. 在 Maven 远程仓库下载
32 |    https://repo1.maven.org/maven2/com/vesoft/nebula-algorithm/2.0.0/
33 | 
34 | # 使用 Nebula Algorithm
35 | 
36 |    使用限制：Nebula Algorithm 未自动对字符串id进行编码，因此执行图算法时，边的源点和目标点必须是整数（Nebula Space 的 vid_type可以是String类型，但数据必须是整数）。
37 |    
38 | * 使用方法1：直接提交 nebula-algorithm 算法包
39 | 
40 |    * 设置配置文件
41 |    
42 |     关于配置项的具体说明参考[示例配置](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-algorithm/src/main/resources/application.conf)
43 | 
44 |    * 提交算法任务
45 | 
46 |     ```
47 |     ${SPARK_HOME}/bin/spark-submit --master <mode> --class com.vesoft.nebula.algorithm.Main nebula-algorithm-2.0.0.jar -p application.conf
48 |     ```
49 | * 使用方法2：调用 nebula-algorithm 算法接口
50 | 
51 |    在`nebula-algorithm`的`lib`库中提供了10中常用图计算算法，可通过编程调用的形式调用算法。
52 |    * 在pom.xml中添加依赖
53 |    ```
54 |     <dependency>
55 |          <groupId>com.vesoft</groupId>
56 |          <artifactId>nebula-algorithm</artifactId>
57 |          <version>2.0.0</version>
58 |     </dependency>
59 |    ```
60 |    * 定义算法参数调用算法（以`PageRank`为例）
61 |    ```
62 |    val prConfig = new PRConfig(5, 1.0)
63 |    val louvainResult = PageRankAlgo.apply(spark, data, prConfig, false)
64 |    ```
65 |  
66 |     其他算法的调用方法见[测试示例](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib) 。
67 |     
68 |     > 注：执行算法的DataFrame默认第一列是源点，第二列是目标点，第三列是边权重。
69 | 
70 | ## 贡献
71 | 
72 | Nebula Algorithm 是一个完全开源的项目，欢迎开源爱好者通过以下方式参与：
73 | 
74 | - 前往 [Nebula Graph 论坛](https://discuss.nebula-graph.com.cn/ "点击前往“Nebula Graph 论坛") 上参与 Issue 讨论，如答疑、提供想法或者报告无法解决的问题
75 | - 撰写或改进文档
76 | - 提交优化代码
77 | 


--------------------------------------------------------------------------------
/nebula-algorithm/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Nebula Algorithm
 2 | 
 3 | <p align="center">
 4 |   <br>English | <a href="README-CN.md">中文</a>
 5 | </p>
 6 | 
 7 | nebula-algorithm is a Spark Application based on [GraphX](https://spark.apache.org/graphx/) with the following Algorithm provided for now:
 8 | 
 9 | 
10 | |          Name          |Use Case|
11 | |:------------------------:|:---------------:|
12 | |         PageRank         | page ranking, important node digging|
13 | |         Louvain          | community digging, hierarchical clustering|
14 | |          KCore           | community detection, financial risk control|
15 | |     LabelPropagation     | community detection, consultation propagation, advertising recommendation|
16 | |    ConnectedComponent    | community detection, isolated island detection|
17 | |StronglyConnectedComponent| community detection|
18 | |       ShortestPath       | path plan, network plan|
19 | |       TriangleCount      | network structure analysis|
20 | |    GraphTriangleCount    | network structure and tightness analysis|
21 | |   BetweennessCentrality  | important node digging, node influence calculation|
22 | |        DegreeStatic      | graph structure analysis|
23 | 
24 | 
25 | You could submit the entire spark application or invoke algorithms in `lib` library to apply graph algorithms for DataFrame.
26 | 
27 | ## Get Nebula Algorithm
28 |  1. Build Nebula Algorithm
29 |     ```
30 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
31 |     $ cd nebula-algorithm
32 |     $ mvn clean package -Dgpg.skip -Dmaven.javadoc.skip=true -Dmaven.test.skip=true
33 |     ```
34 |     After the above buiding process, the target file  `nebula-algorithm-2.0.0.jar` will be placed under `nebula-algorithm/target`.
35 | 
36 |  2. Download from Maven repo
37 |       
38 |       Alternatively, it could be downloaded from the following Maven repo:
39 |       
40 |       https://repo1.maven.org/maven2/com/vesoft/nebula-algorithm/2.0.0/
41 | 
42 | ## Use Nebula Algorithm
43 | 
44 | Limitation: Due to Nebula Algorithm will not encode string id, thus during the algorithm execution, the source and target of edges must be in Type Int (The `vid_type` in Nebula Space could be String, while data must be in Type Int).
45 | 
46 | * Option 1: Submit nebula-algorithm package
47 | 
48 |    * Configuration
49 |    
50 |    Refer to the [configuration example](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-algorithm/src/main/resources/application.conf).
51 | 
52 |    * Submit Spark Application
53 | 
54 |     ```
55 |     ${SPARK_HOME}/bin/spark-submit --master <mode> --class com.vesoft.nebula.algorithm.Main nebula-algorithm-2.0.0.jar -p application.conf
56 |     ```
57 |    
58 | * Option2: Call nebula-algorithm interface
59 | 
60 |    Now there are 10 algorithms provided in `lib` from `nebula-algorithm`, which could be invoked in a programming fashion as below:
61 |    
62 |    * Add dependencies in `pom.xml`.
63 |    ```
64 |     <dependency>
65 |          <groupId>com.vesoft</groupId>
66 |          <artifactId>nebula-algorithm</artifactId>
67 |          <version>2.0.0</version>
68 |     </dependency>
69 |    ```
70 |    * Instantiate algorithm's config, below is an example for `PageRank`.
71 |    ```
72 |    val prConfig = new PRConfig(5, 1.0)
73 |    val louvainResult = PageRankAlgo.apply(spark, data, prConfig, false)
74 |    ```
75 |    
76 |     For other algorithms, please refer to [test cases](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib).
77 |    
78 |    > Note: The first column of DataFrame in the application represents the source vertices, the second represents the target vertices and the third represents edges' weight.
79 | 
80 | ## Contribute
81 | 
82 | Nebula Algorithm is open source, you are more than welcomed to contribute in the following ways:
83 | 
84 | - Discuss in the community via [the forum](https://discuss.nebula-graph.io/) or raise issues here.
85 | - Compose or improve our documents.
86 | - Pull Request to help improve the code itself here.
87 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/resources/application.conf:
--------------------------------------------------------------------------------
  1 | {
  2 |   # Spark relation config
  3 |   spark: {
  4 |     app: {
  5 |         name: LPA
  6 |         # spark.app.partitionNum
  7 |         partitionNum:100
  8 |     }
  9 |     master:local
 10 |   }
 11 | 
 12 |   data: {
 13 |     # data source. optional of nebula,csv,json
 14 |     source: csv
 15 |     # data sink, means the algorithm result will be write into this sink. optional of nebula,csv,text
 16 |     sink: csv
 17 |     # if your algorithm needs weight
 18 |     hasWeight: false
 19 |   }
 20 | 
 21 |   # Nebula Graph relation config
 22 |   nebula: {
 23 |     # algo's data source from Nebula. If data.source is nebula, then this nebula.read config can be valid.
 24 |     read: {
 25 |         # Nebula metad server address, multiple addresses are split by English comma
 26 |         metaAddress: "127.0.0.1:9559"
 27 |         # Nebula space
 28 |         space: nb
 29 |         # Nebula edge types, multiple labels means that data from multiple edges will union together
 30 |         labels: ["serve"]
 31 |         # Nebula edge property name for each edge type, this property will be as weight col for algorithm.
 32 |         # Make sure the weightCols are corresponding to labels.
 33 |         weightCols: ["start_year"]
 34 |     }
 35 | 
 36 |     # algo result sink into Nebula. If data.sink is nebula, then this nebula.write config can be valid.
 37 |     write:{
 38 |         # Nebula graphd server address， multiple addresses are split by English comma
 39 |         graphAddress: "127.0.0.1:9669"
 40 |         # Nebula metad server address, multiple addresses are split by English comma
 41 |         metaAddress: "127.0.0.1:9559,127.0.0.1:9560"
 42 |         user:root
 43 |         pswd:nebula
 44 |         # Nebula space name
 45 |         space:nb
 46 |         # Nebula tag name, the algorithm result will be write into this tag
 47 |         tag:pagerank
 48 |     }
 49 |   }
 50 | 
 51 |   local: {
 52 |     # algo's data source from Nebula. If data.source is csv or json, then this local.read can be valid.
 53 |     read:{
 54 |         filePath: "file:///tmp/algo_edge.csv"
 55 |         # srcId column
 56 |         srcId:"_c0"
 57 |         # dstId column
 58 |         dstId:"_c1"
 59 |         # weight column
 60 |         #weight: "col3"
 61 |         # if csv file has header
 62 |         header: false
 63 |         # csv file's delimiter
 64 |         delimiter:","
 65 |     }
 66 | 
 67 |     # algo result sink into local file. If data.sink is csv or text, then this local.write can be valid.
 68 |     write:{
 69 |         resultPath:/tmp/count
 70 |     }
 71 |   }
 72 | 
 73 | 
 74 |   algorithm: {
 75 |     # the algorithm that you are going to execute，pick one from [pagerank, louvain, connectedcomponent,
 76 |     # labelpropagation, shortestpaths, degreestatic, kcore, stronglyconnectedcomponent, trianglecount,
 77 |     # betweenness, graphtriangleCount]
 78 |     executeAlgo: graphtrianglecount
 79 | 
 80 |     # PageRank parameter
 81 |     pagerank: {
 82 |         maxIter: 10
 83 |         resetProb: 0.15  # default 0.15
 84 |     }
 85 | 
 86 |     # Louvain parameter
 87 |     louvain: {
 88 |         maxIter: 20
 89 |         internalIter: 10
 90 |         tol: 0.5
 91 |    }
 92 | 
 93 |    # connected component parameter.
 94 |     connectedcomponent: {
 95 |         maxIter: 20
 96 |    }
 97 | 
 98 |    # LabelPropagation parameter
 99 |     labelpropagation: {
100 |         maxIter: 20
101 |    }
102 | 
103 |    # ShortestPaths parameter
104 |     shortestpaths: {
105 |         # several vertices to compute the shortest path to all vertices.
106 |         landmarks: "1"
107 |    }
108 | 
109 |     # Vertex degree statistics parameter
110 |     degreestatic: {}
111 | 
112 |    # KCore parameter
113 |    kcore:{
114 |         maxIter:10
115 |         degree:1
116 |    }
117 | 
118 |    # Trianglecount parameter
119 |    trianglecount:{}
120 | 
121 |    # graphTriangleCount parameter
122 |    graphtrianglecount:{}
123 | 
124 |    # Betweenness centrality parameter
125 |    betweenness:{
126 |         maxIter:5
127 |    }
128 |  }
129 | }
130 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/resources/edge:
--------------------------------------------------------------------------------
 1 | {"src":12345,"dst":23456,"degree":34, "descr": "aaa","timep": "2020-01-01"}
 2 | {"src":11111,"dst":22222,"degree":33, "descr": "aaa","timep": "2020-01-01"}
 3 | {"src":11111,"dst":33333,"degree":32, "descr": "a\baa","timep": "2020-01-01"}
 4 | {"src":11111,"dst":44444,"degree":31, "descr": "aaa","timep": "2020-01-01"}
 5 | {"src":22222,"dst":55555,"degree":30, "descr": "a\naa","timep": "2020-01-01"}
 6 | {"src":33333,"dst":44444,"degree":29, "descr": "aaa","timep": "2020-01-01"}
 7 | {"src":33333,"dst":55555,"degree":28, "descr": "aa\ta","timep": "2020-01-01"}
 8 | {"src":44444,"dst":22222,"degree":27, "descr": "aaa","timep": "2020-01-01"}
 9 | {"src":44444,"dst":55555,"degree":26, "descr": "aaa","timep": "2020-01-01"}
10 | {"src":22222,"dst":66666,"degree":25, "descr": "aaa","timep": "2020-01-01"}


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/Main.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.algorithm
  8 | 
  9 | import com.vesoft.nebula.algorithm.config.Configs.Argument
 10 | import com.vesoft.nebula.algorithm.config.{
 11 |   AlgoConfig,
 12 |   AlgoConstants,
 13 |   BetweennessConfig,
 14 |   CcConfig,
 15 |   Configs,
 16 |   KCoreConfig,
 17 |   LPAConfig,
 18 |   LouvainConfig,
 19 |   PRConfig,
 20 |   ShortestPathConfig,
 21 |   SparkConfig
 22 | }
 23 | import com.vesoft.nebula.algorithm.lib.{
 24 |   BetweennessCentralityAlgo,
 25 |   ConnectedComponentsAlgo,
 26 |   DegreeStaticAlgo,
 27 |   GraphTriangleCountAlgo,
 28 |   KCoreAlgo,
 29 |   LabelPropagationAlgo,
 30 |   LouvainAlgo,
 31 |   PageRankAlgo,
 32 |   ShortestPathAlgo,
 33 |   StronglyConnectedComponentsAlgo,
 34 |   TriangleCountAlgo
 35 | }
 36 | import com.vesoft.nebula.algorithm.reader.{CsvReader, JsonReader, NebulaReader}
 37 | import com.vesoft.nebula.algorithm.writer.{CsvWriter, NebulaWriter, TextWriter}
 38 | import org.apache.commons.math3.ode.UnknownParameterException
 39 | import org.apache.log4j.Logger
 40 | import org.apache.spark.sql.types.{LongType, StructField, StructType}
 41 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 42 | 
 43 | /**
 44 |   * This object is the entry of all graph algorithms.
 45 |   *
 46 |   * How to use this tool to run algorithm:
 47 |   *    1. Configure application.conf file.
 48 |   *    2. Make sure your environment has installed spark and started spark service.
 49 |   *    3. Submit nebula algorithm application using this command:
 50 |   *        spark-submit --class com.vesoft.nebula.tools.algorithm.Main /your-jar-path/nebula-algorithm-1.1.0.jar -p /your-application.conf-path/application.conf
 51 |   */
 52 | object Main {
 53 | 
 54 |   private val LOGGER = Logger.getLogger(this.getClass)
 55 | 
 56 |   def main(args: Array[String]): Unit = {
 57 |     val PROGRAM_NAME = "Nebula graphx"
 58 |     val options      = Configs.parser(args, PROGRAM_NAME)
 59 |     val p: Argument = options match {
 60 |       case Some(config) => config
 61 |       case _ =>
 62 |         LOGGER.error("Argument parse failed")
 63 |         sys.exit(-1)
 64 |     }
 65 |     val configs = Configs.parse(p.config)
 66 |     LOGGER.info(s"configs =  ${configs}")
 67 | 
 68 |     val algoName: String = AlgoConfig.getAlgoName(configs)
 69 |     LOGGER.info(s"algoName= ${algoName}")
 70 | 
 71 |     val sparkConfig  = SparkConfig.getSpark(configs)
 72 |     val partitionNum = sparkConfig.partitionNum
 73 | 
 74 |     // reader
 75 |     val dataSet = createDataSource(sparkConfig.spark, configs, partitionNum)
 76 | 
 77 |     // algorithm
 78 |     val algoResult = executeAlgorithm(sparkConfig.spark, algoName, configs, dataSet)
 79 |     // writer
 80 |     saveAlgoResult(algoResult, configs)
 81 | 
 82 |     sys.exit(0)
 83 |   }
 84 | 
 85 |   /**
 86 |     * create data from datasource
 87 |     *
 88 |     * @param spark
 89 |     * @param configs
 90 |     * @return DataFrame
 91 |     */
 92 |   private[this] def createDataSource(spark: SparkSession,
 93 |                                      configs: Configs,
 94 |                                      partitionNum: String): DataFrame = {
 95 |     val dataSource = configs.dataSourceSinkEntry.source
 96 |     val dataSet: Dataset[Row] = dataSource.toLowerCase match {
 97 |       case "nebula" => {
 98 |         val reader = new NebulaReader(spark, configs, partitionNum)
 99 |         reader.read()
100 |       }
101 |       case "csv" => {
102 |         val reader = new CsvReader(spark, configs, partitionNum)
103 |         reader.read()
104 |       }
105 |       case "json" => {
106 |         val reader = new JsonReader(spark, configs, partitionNum)
107 |         reader.read()
108 |       }
109 |     }
110 |     dataSet
111 |   }
112 | 
113 |   /**
114 |     * execute algorithms
115 |     * @param spark
116 |     * @param algoName
117 |     * @param configs
118 |     * @param dataSet
119 |     * @return DataFrame
120 |     */
121 |   private[this] def executeAlgorithm(spark: SparkSession,
122 |                                      algoName: String,
123 |                                      configs: Configs,
124 |                                      dataSet: DataFrame): DataFrame = {
125 |     val hasWeight = configs.dataSourceSinkEntry.hasWeight
126 |     val algoResult = {
127 |       algoName.toLowerCase match {
128 |         case "pagerank" => {
129 |           val pageRankConfig = PRConfig.getPRConfig(configs)
130 |           PageRankAlgo(spark, dataSet, pageRankConfig, hasWeight)
131 |         }
132 |         case "louvain" => {
133 |           val louvainConfig = LouvainConfig.getLouvainConfig(configs)
134 |           LouvainAlgo(spark, dataSet, louvainConfig, hasWeight)
135 |         }
136 |         case "connectedcomponent" => {
137 |           val ccConfig = CcConfig.getCcConfig(configs)
138 |           ConnectedComponentsAlgo(spark, dataSet, ccConfig, hasWeight)
139 |         }
140 |         case "labelpropagation" => {
141 |           val lpaConfig = LPAConfig.getLPAConfig(configs)
142 |           LabelPropagationAlgo(spark, dataSet, lpaConfig, hasWeight)
143 |         }
144 |         case "shortestpaths" => {
145 |           val spConfig = ShortestPathConfig.getShortestPathConfig(configs)
146 |           ShortestPathAlgo(spark, dataSet, spConfig, hasWeight)
147 |         }
148 |         case "degreestatic" => {
149 |           DegreeStaticAlgo(spark, dataSet)
150 |         }
151 |         case "kcore" => {
152 |           val kCoreConfig = KCoreConfig.getKCoreConfig(configs)
153 |           KCoreAlgo(spark, dataSet, kCoreConfig)
154 |         }
155 |         case "stronglyconnectedcomponent" => {
156 |           val ccConfig = CcConfig.getCcConfig(configs)
157 |           StronglyConnectedComponentsAlgo(spark, dataSet, ccConfig, hasWeight)
158 |         }
159 |         case "betweenness" => {
160 |           val betweennessConfig = BetweennessConfig.getBetweennessConfig(configs)
161 |           BetweennessCentralityAlgo(spark, dataSet, betweennessConfig, hasWeight)
162 |         }
163 |         case "trianglecount" => {
164 |           TriangleCountAlgo(spark, dataSet)
165 |         }
166 |         case "graphtrianglecount" => {
167 |           GraphTriangleCountAlgo(spark, dataSet)
168 |         }
169 |         case _ => throw new UnknownParameterException("unknown executeAlgo name.")
170 |       }
171 |     }
172 |     algoResult
173 |   }
174 | 
175 |   private[this] def saveAlgoResult(algoResult: DataFrame, configs: Configs): Unit = {
176 |     val dataSink = configs.dataSourceSinkEntry.sink
177 |     dataSink.toLowerCase match {
178 |       case "nebula" => {
179 |         val writer = new NebulaWriter(algoResult, configs)
180 |         writer.write()
181 |       }
182 |       case "csv" => {
183 |         val writer = new CsvWriter(algoResult, configs)
184 |         writer.write()
185 |       }
186 |       case "text" => {
187 |         val writer = new TextWriter(algoResult, configs)
188 |         writer.write()
189 |       }
190 |       case _ => throw new UnsupportedOperationException("unsupported data sink")
191 |     }
192 |   }
193 | }
194 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/AlgoConfig.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.algorithm.config
  8 | 
  9 | import org.apache.spark.graphx.VertexId
 10 | 
 11 | case class PRConfig(maxIter: Int, resetProb: Double)
 12 | 
 13 | /**
 14 |   * pagerank algorithm configuration
 15 |   */
 16 | object PRConfig {
 17 |   var maxIter: Int      = _
 18 |   var resetProb: Double = _
 19 | 
 20 |   def getPRConfig(configs: Configs): PRConfig = {
 21 |     val prConfig = configs.algorithmConfig.map
 22 | 
 23 |     maxIter = prConfig("algorithm.pagerank.maxIter").toInt
 24 |     resetProb =
 25 |       if (prConfig.contains("algorithm.pagerank.resetProb"))
 26 |         prConfig("algorithm.pagerank.resetProb").toDouble
 27 |       else 0.15
 28 | 
 29 |     PRConfig(maxIter, resetProb)
 30 |   }
 31 | }
 32 | 
 33 | case class LPAConfig(maxIter: Int)
 34 | 
 35 | /**
 36 |   * labelPropagation algorithm configuration
 37 |   */
 38 | object LPAConfig {
 39 |   var maxIter: Int = _
 40 | 
 41 |   def getLPAConfig(configs: Configs): LPAConfig = {
 42 |     val lpaConfig = configs.algorithmConfig.map
 43 | 
 44 |     maxIter = lpaConfig("algorithm.labelpropagation.maxIter").toInt
 45 |     LPAConfig(maxIter)
 46 |   }
 47 | }
 48 | 
 49 | case class CcConfig(maxIter: Int)
 50 | 
 51 | /**
 52 |   * ConnectedComponect algorithm configuration
 53 |   */
 54 | object CcConfig {
 55 |   var maxIter: Int = _
 56 | 
 57 |   def getCcConfig(configs: Configs): CcConfig = {
 58 |     val ccConfig = configs.algorithmConfig.map
 59 | 
 60 |     maxIter = ccConfig("algorithm.connectedcomponent.maxIter").toInt
 61 |     CcConfig(maxIter)
 62 |   }
 63 | }
 64 | 
 65 | case class ShortestPathConfig(landmarks: Seq[VertexId])
 66 | 
 67 | /**
 68 |   * ConnectedComponect algorithm configuration
 69 |   */
 70 | object ShortestPathConfig {
 71 |   var landmarks: Seq[Long] = _
 72 | 
 73 |   def getShortestPathConfig(configs: Configs): ShortestPathConfig = {
 74 |     val spConfig = configs.algorithmConfig.map
 75 | 
 76 |     landmarks = spConfig("algorithm.shortestpaths.landmarks").split(",").toSeq.map(_.toLong)
 77 |     ShortestPathConfig(landmarks)
 78 |   }
 79 | }
 80 | 
 81 | case class LouvainConfig(maxIter: Int, internalIter: Int, tol: Double)
 82 | 
 83 | /**
 84 |   * louvain algorithm configuration
 85 |   */
 86 | object LouvainConfig {
 87 |   var maxIter: Int      = _
 88 |   var internalIter: Int = _
 89 |   var tol: Double       = _
 90 | 
 91 |   def getLouvainConfig(configs: Configs): LouvainConfig = {
 92 |     val louvainConfig = configs.algorithmConfig.map
 93 | 
 94 |     maxIter = louvainConfig("algorithm.louvain.maxIter").toInt
 95 |     internalIter = louvainConfig("algorithm.louvain.internalIter").toInt
 96 |     tol = louvainConfig("algorithm.louvain.tol").toDouble
 97 | 
 98 |     LouvainConfig(maxIter, internalIter, tol)
 99 |   }
100 | }
101 | 
102 | /**
103 |   * degree static
104 |   */
105 | case class DegreeStaticConfig(degree: Boolean, inDegree: Boolean, outDegree: Boolean)
106 | 
107 | object DegreeStaticConfig {
108 |   var degree: Boolean    = false
109 |   var inDegree: Boolean  = false
110 |   var outDegree: Boolean = false
111 | 
112 |   def getDegreeStaticConfig(configs: Configs): DegreeStaticConfig = {
113 |     val degreeConfig = configs.algorithmConfig.map
114 |     degree = ConfigUtil.getOrElseBoolean(degreeConfig, "algorithm.degreestatic.degree", false)
115 |     inDegree = ConfigUtil.getOrElseBoolean(degreeConfig, "algorithm.degreestatic.indegree", false)
116 |     outDegree = ConfigUtil.getOrElseBoolean(degreeConfig, "algorithm.degreestatic.outdegree", false)
117 |     DegreeStaticConfig(degree, inDegree, outDegree)
118 |   }
119 | }
120 | 
121 | /**
122 |   * k-core
123 |   */
124 | case class KCoreConfig(maxIter: Int, degree: Int)
125 | 
126 | object KCoreConfig {
127 |   var maxIter: Int = _
128 |   var degree: Int  = _
129 | 
130 |   def getKCoreConfig(configs: Configs): KCoreConfig = {
131 |     val kCoreConfig = configs.algorithmConfig.map
132 |     maxIter = kCoreConfig("algorithm.kcore.maxIter").toInt
133 |     degree = kCoreConfig("algorithm.kcore.degree").toInt
134 |     KCoreConfig(maxIter, degree)
135 |   }
136 | }
137 | 
138 | /**
139 |   * Betweenness
140 |   */
141 | case class BetweennessConfig(maxIter: Int)
142 | 
143 | object BetweennessConfig {
144 |   var maxIter: Int = _
145 | 
146 |   def getBetweennessConfig(configs: Configs): BetweennessConfig = {
147 |     val betweennessConfig = configs.algorithmConfig.map
148 |     maxIter = betweennessConfig("algorithm.betweenness.maxIter").toInt
149 |     BetweennessConfig(maxIter)
150 |   }
151 | }
152 | 
153 | case class AlgoConfig(configs: Configs)
154 | 
155 | object AlgoConfig {
156 |   def getAlgoName(configs: Configs): String = {
157 |     val algoConfig = configs.algorithmConfig.map
158 |     algoConfig("algorithm.executeAlgo")
159 |   }
160 | }
161 | 
162 | object ConfigUtil {
163 |   def getOrElseBoolean(config: Map[String, String], key: String, defaultValue: Boolean): Boolean = {
164 |     if (config.contains(key)) {
165 |       config(key).toBoolean
166 |     } else {
167 |       defaultValue
168 |     }
169 |   }
170 | 
171 | }
172 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/NebulaConfig.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.config
 8 | 
 9 | object NebulaConfig {
10 | 
11 |   def getReadNebula(configs: Configs): NebulaReadConfigEntry = {
12 |     val nebulaConfigs = configs.nebulaConfig
13 |     nebulaConfigs.readConfigEntry
14 |   }
15 | 
16 |   def getWriteNebula(configs: Configs): NebulaWriteConfigEntry = {
17 |     val nebulaConfigs = configs.nebulaConfig
18 |     nebulaConfigs.writeConfigEntry
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/SparkConfig.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.config
 8 | 
 9 | import org.apache.spark.sql.SparkSession
10 | 
11 | case class SparkConfig(spark: SparkSession, partitionNum: String)
12 | 
13 | object SparkConfig {
14 | 
15 |   var spark: SparkSession = _
16 | 
17 |   var partitionNum: String = _
18 | 
19 |   def getSpark(configs: Configs, defaultAppName: String = "algorithm"): SparkConfig = {
20 |     val sparkConfigs = configs.sparkConfig.map
21 |     val session = SparkSession.builder
22 |       .appName(defaultAppName)
23 |       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
24 | 
25 |     for (key <- sparkConfigs.keySet) {
26 |       session.config(key, sparkConfigs(key))
27 |     }
28 |     partitionNum = sparkConfigs.getOrElse("spark.app.partitionNum", "0")
29 |     SparkConfig(session.getOrCreate(), partitionNum)
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/ConnectedComponentsAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
10 | import com.vesoft.nebula.algorithm.config.{
11 |   AlgoConstants,
12 |   CcConfig,
13 |   Configs,
14 |   LPAConfig,
15 |   NebulaConfig,
16 |   PRConfig,
17 |   SparkConfig
18 | }
19 | import org.apache.log4j.Logger
20 | import org.apache.spark.graphx.{Graph, VertexId, VertexRDD}
21 | import org.apache.spark.rdd.RDD
22 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
23 | import org.apache.spark.graphx.lib.ConnectedComponents
24 | import org.apache.spark.sql.types.{DoubleType, LongType, StructField, StructType}
25 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
26 | 
27 | object ConnectedComponentsAlgo {
28 |   private val LOGGER = Logger.getLogger(this.getClass)
29 | 
30 |   val ALGORITHM: String = "ConnectedComponents"
31 | 
32 |   /**
33 |     * run the ConnectedComponents algorithm for nebula graph
34 |     */
35 |   def apply(spark: SparkSession,
36 |             dataset: Dataset[Row],
37 |             ccConfig: CcConfig,
38 |             hasWeight: Boolean): DataFrame = {
39 | 
40 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, hasWeight)
41 | 
42 |     val ccResultRDD = execute(graph, ccConfig.maxIter)
43 | 
44 |     val schema = StructType(
45 |       List(
46 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
47 |         StructField(AlgoConstants.CC_RESULT_COL, LongType, nullable = true)
48 |       ))
49 |     val algoResult = spark.sqlContext
50 |       .createDataFrame(ccResultRDD, schema)
51 | 
52 |     algoResult
53 |   }
54 | 
55 |   def execute(graph: Graph[None.type, Double], maxIter: Int): RDD[Row] = {
56 |     val ccResultRDD: VertexRDD[VertexId] = ConnectedComponents.run(graph, maxIter).vertices
57 |     ccResultRDD.map(row => Row(row._1, row._2))
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/DegreeStaticAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.AlgoConstants
10 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
11 | import org.apache.log4j.Logger
12 | import org.apache.spark.graphx.{Graph, VertexRDD}
13 | import org.apache.spark.rdd.RDD
14 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
15 | import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
16 | 
17 | object DegreeStaticAlgo {
18 | 
19 |   private val LOGGER = Logger.getLogger(this.getClass)
20 | 
21 |   val ALGORITHM: String = "DegreeStatic"
22 | 
23 |   /**
24 |     * run the pagerank algorithm for nebula graph
25 |     */
26 |   def apply(spark: SparkSession, dataset: Dataset[Row]): DataFrame = {
27 | 
28 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, false)
29 | 
30 |     val degreeResultRDD = execute(graph)
31 | 
32 |     val schema = StructType(
33 |       List(
34 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
35 |         StructField(AlgoConstants.DEGREE_RESULT_COL, IntegerType, nullable = true),
36 |         StructField(AlgoConstants.INDEGREE_RESULT_COL, IntegerType, nullable = true),
37 |         StructField(AlgoConstants.OUTDEGREE_RESULT_COL, IntegerType, nullable = true)
38 |       ))
39 |     val algoResult = spark.sqlContext
40 |       .createDataFrame(degreeResultRDD, schema)
41 | 
42 |     algoResult
43 |   }
44 | 
45 |   def execute(graph: Graph[None.type, Double]): RDD[Row] = {
46 |     val degreeRdd: VertexRDD[Int]    = graph.degrees
47 |     val inDegreeRdd: VertexRDD[Int]  = graph.inDegrees
48 |     val outDegreeRdd: VertexRDD[Int] = graph.outDegrees
49 | 
50 |     val degreeAndInDegree: VertexRDD[(Int, Int)] =
51 |       degreeRdd.leftJoin(inDegreeRdd)((id, d, inD) => (d, inD.getOrElse(0)))
52 | 
53 |     val result = degreeAndInDegree.leftJoin(outDegreeRdd)((id, dAndInD, opt) =>
54 |       (dAndInD._1, dAndInD._2, opt.getOrElse(0)))
55 |     result.map(vertex => Row(vertex._1, vertex._2._1, vertex._2._2, vertex._2._3))
56 |   }
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/GraphTriangleCountAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.AlgoConstants
10 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
11 | import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
12 | 
13 | /**
14 |   *  compute all graph's triangle count
15 |   */
16 | object GraphTriangleCountAlgo {
17 | 
18 |   def apply(spark: SparkSession, dataset: Dataset[Row]): DataFrame = {
19 | 
20 |     val triangleCount = TriangleCountAlgo(spark, dataset)
21 |     val count = triangleCount
22 |       .select(AlgoConstants.TRIANGLECOUNT_RESULT_COL)
23 |       .rdd
24 |       .map(value => value.get(0).asInstanceOf[Int])
25 |       .reduce(_ + _) / 3
26 |     val list = List(count)
27 |     val rdd  = spark.sparkContext.parallelize(list).map(row => Row(row))
28 | 
29 |     val schema = StructType(
30 |       List(
31 |         StructField("count", IntegerType, nullable = false)
32 |       ))
33 |     val algoResult = spark.sqlContext
34 |       .createDataFrame(rdd, schema)
35 | 
36 |     algoResult
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/KCoreAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.{AlgoConstants, KCoreConfig}
10 | import org.apache.log4j.Logger
11 | import org.apache.spark.graphx.Graph
12 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
13 | import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
14 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
15 | 
16 | object KCoreAlgo {
17 |   private val LOGGER = Logger.getLogger(this.getClass)
18 | 
19 |   val ALGORITHM: String = "LabelPropagation"
20 | 
21 |   /**
22 |     * run the louvain algorithm for nebula graph
23 |     */
24 |   def apply(spark: SparkSession, dataset: Dataset[Row], kCoreConfig: KCoreConfig): DataFrame = {
25 | 
26 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, false)
27 |     val kCoreGraph                      = execute(graph, kCoreConfig.maxIter, kCoreConfig.degree)
28 | 
29 |     val schema = StructType(
30 |       List(
31 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
32 |         StructField(AlgoConstants.KCORE_RESULT_COL, IntegerType, nullable = true)
33 |       ))
34 |     val resultRDD  = kCoreGraph.vertices.map(vertex => Row(vertex._1, vertex._2))
35 |     val algoResult = spark.sqlContext.createDataFrame(resultRDD, schema)
36 |     algoResult
37 |   }
38 | 
39 |   /**
40 |     * extract k-core sub-graph
41 |     */
42 |   def execute(graph: Graph[None.type, Double], maxIter: Int, k: Int): Graph[Int, Double] = {
43 |     var lastVertexNum: Long    = graph.numVertices
44 |     var currentVertexNum: Long = -1
45 |     var isStable: Boolean      = false
46 |     var iterNum: Int           = 1
47 | 
48 |     var degreeGraph = graph
49 |       .outerJoinVertices(graph.degrees) { (vid, vd, degree) =>
50 |         degree.getOrElse(0)
51 |       }
52 |       .cache
53 |     var subGraph: Graph[Int, Double] = null
54 | 
55 |     while (iterNum < maxIter) {
56 |       subGraph = degreeGraph.subgraph(vpred = (vid, degree) => degree >= k)
57 |       degreeGraph = subGraph
58 |         .outerJoinVertices(subGraph.degrees) { (vid, vd, degree) =>
59 |           degree.getOrElse(0)
60 |         }
61 |         .cache
62 | 
63 |       currentVertexNum = degreeGraph.numVertices
64 |       if (currentVertexNum == lastVertexNum) {
65 |         isStable = true;
66 |       } else {
67 |         lastVertexNum = currentVertexNum
68 |       }
69 | 
70 |       iterNum += 1
71 |     }
72 |     subGraph
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/LabelPropagationAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
10 | import com.vesoft.nebula.algorithm.config.{AlgoConstants, LPAConfig}
11 | import org.apache.log4j.Logger
12 | import org.apache.spark.graphx.{Graph, VertexId, VertexRDD}
13 | import org.apache.spark.rdd.RDD
14 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
15 | import org.apache.spark.graphx.lib.LabelPropagation
16 | import org.apache.spark.sql.types.{LongType, StructField, StructType}
17 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
18 | 
19 | object LabelPropagationAlgo {
20 |   private val LOGGER = Logger.getLogger(this.getClass)
21 | 
22 |   val ALGORITHM: String = "LabelPropagation"
23 | 
24 |   /**
25 |     * run the LabelPropagation algorithm for nebula graph
26 |     */
27 |   def apply(spark: SparkSession,
28 |             dataset: Dataset[Row],
29 |             lpaConfig: LPAConfig,
30 |             hasWeight: Boolean): DataFrame = {
31 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, hasWeight)
32 | 
33 |     val lpaResultRDD = execute(graph, lpaConfig.maxIter)
34 | 
35 |     val schema = StructType(
36 |       List(
37 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
38 |         StructField(AlgoConstants.LPA_RESULT_COL, LongType, nullable = true)
39 |       ))
40 |     val algoResult = spark.sqlContext
41 |       .createDataFrame(lpaResultRDD, schema)
42 | 
43 |     algoResult
44 |   }
45 | 
46 |   def execute(graph: Graph[None.type, Double], maxIter: Int): RDD[Row] = {
47 |     val lpaResultRDD: VertexRDD[VertexId] = LabelPropagation.run(graph, maxIter).vertices
48 |     lpaResultRDD.map(row => Row(row._1, row._2))
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/PageRankAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.{
10 |   AlgoConstants,
11 |   Configs,
12 |   NebulaConfig,
13 |   PRConfig,
14 |   SparkConfig
15 | }
16 | import org.apache.log4j.Logger
17 | import org.apache.spark.graphx.{Graph, VertexRDD}
18 | import org.apache.spark.rdd.RDD
19 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
20 | import org.apache.spark.graphx.lib.PageRank
21 | import org.apache.spark.sql.types.{DoubleType, LongType, StructField, StructType}
22 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
23 | 
24 | object PageRankAlgo {
25 |   private val LOGGER = Logger.getLogger(this.getClass)
26 | 
27 |   val ALGORITHM: String = "PageRank"
28 | 
29 |   /**
30 |     * run the pagerank algorithm for nebula graph
31 |     */
32 |   def apply(spark: SparkSession,
33 |             dataset: Dataset[Row],
34 |             pageRankConfig: PRConfig,
35 |             hasWeight: Boolean): DataFrame = {
36 | 
37 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, hasWeight)
38 | 
39 |     val prResultRDD = execute(graph, pageRankConfig.maxIter, pageRankConfig.resetProb)
40 | 
41 |     val schema = StructType(
42 |       List(
43 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
44 |         StructField(AlgoConstants.PAGERANK_RESULT_COL, DoubleType, nullable = true)
45 |       ))
46 |     val algoResult = spark.sqlContext
47 |       .createDataFrame(prResultRDD, schema)
48 | 
49 |     algoResult
50 |   }
51 | 
52 |   def execute(graph: Graph[None.type, Double], maxIter: Int, resetProb: Double): RDD[Row] = {
53 |     val prResultRDD: VertexRDD[Double] = PageRank.run(graph, maxIter, resetProb).vertices
54 |     prResultRDD.map(row => Row(row._1, row._2))
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/ShortestPathAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
10 | import com.vesoft.nebula.algorithm.config.{
11 |   AlgoConstants,
12 |   CcConfig,
13 |   Configs,
14 |   NebulaConfig,
15 |   PRConfig,
16 |   ShortestPathConfig,
17 |   SparkConfig
18 | }
19 | import org.apache.log4j.Logger
20 | import org.apache.spark.graphx.{Graph, VertexId, VertexRDD}
21 | import org.apache.spark.rdd.RDD
22 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
23 | import org.apache.spark.graphx.lib.ShortestPaths
24 | import org.apache.spark.graphx.lib.ShortestPaths.SPMap
25 | import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType}
26 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
27 | 
28 | object ShortestPathAlgo {
29 |   private val LOGGER = Logger.getLogger(this.getClass)
30 | 
31 |   val ALGORITHM: String = "ShortestPath"
32 | 
33 |   /**
34 |     * run the ShortestPath algorithm for nebula graph
35 |     */
36 |   def apply(spark: SparkSession,
37 |             dataset: Dataset[Row],
38 |             shortestPathConfig: ShortestPathConfig,
39 |             hasWeight: Boolean): DataFrame = {
40 | 
41 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, hasWeight)
42 | 
43 |     val prResultRDD = execute(graph, shortestPathConfig.landmarks)
44 | 
45 |     val schema = StructType(
46 |       List(
47 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
48 |         StructField(AlgoConstants.SHORTPATH_RESULT_COL, StringType, nullable = true)
49 |       ))
50 |     val algoResult = spark.sqlContext
51 |       .createDataFrame(prResultRDD, schema)
52 | 
53 |     algoResult
54 |   }
55 | 
56 |   def execute(graph: Graph[None.type, Double], landmarks: Seq[VertexId]): RDD[Row] = {
57 |     val spResultRDD: VertexRDD[SPMap] = ShortestPaths.run(graph, landmarks).vertices
58 |     spResultRDD.map(row => Row(row._1, row._2.toString()))
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/StronglyConnectedComponentsAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
10 | import com.vesoft.nebula.algorithm.config.{AlgoConstants, CcConfig}
11 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
12 | import org.apache.spark.graphx.{Graph, VertexId, VertexRDD}
13 | import org.apache.spark.graphx.lib.{ConnectedComponents, StronglyConnectedComponents}
14 | import org.apache.spark.rdd.RDD
15 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
16 | import org.apache.spark.sql.types.{LongType, StructField, StructType}
17 | 
18 | object StronglyConnectedComponentsAlgo {
19 | 
20 |   val ALGORITHM: String = "StronglyConnectedComponents"
21 | 
22 |   /**
23 |     * run the StronglyConnectedComponents algorithm for nebula graph
24 |     */
25 |   def apply(spark: SparkSession,
26 |             dataset: Dataset[Row],
27 |             ccConfig: CcConfig,
28 |             hasWeight: Boolean): DataFrame = {
29 | 
30 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, hasWeight)
31 | 
32 |     val ccResultRDD = execute(graph, ccConfig.maxIter)
33 | 
34 |     val schema = StructType(
35 |       List(
36 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
37 |         StructField(AlgoConstants.SCC_RESULT_COL, LongType, nullable = true)
38 |       ))
39 |     val algoResult = spark.sqlContext
40 |       .createDataFrame(ccResultRDD, schema)
41 | 
42 |     algoResult
43 |   }
44 | 
45 |   def execute(graph: Graph[None.type, Double], maxIter: Int): RDD[Row] = {
46 |     val ccResultRDD: VertexRDD[VertexId] = StronglyConnectedComponents.run(graph, maxIter).vertices
47 |     ccResultRDD.map(row => Row(row._1, row._2))
48 |   }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/lib/TriangleCountAlgo.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.AlgoConstants
10 | import com.vesoft.nebula.algorithm.utils.NebulaUtil
11 | import org.apache.log4j.Logger
12 | import org.apache.spark.graphx.{Graph, VertexRDD}
13 | import org.apache.spark.graphx.lib.TriangleCount
14 | import org.apache.spark.rdd.RDD
15 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
16 | import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
17 | 
18 | object TriangleCountAlgo {
19 |   private val LOGGER = Logger.getLogger(this.getClass)
20 | 
21 |   val ALGORITHM: String = "TriangleCount"
22 | 
23 |   /**
24 |     * run the TriangleCount algorithm for nebula graph
25 |     *
26 |     * compute each vertex's triangle count
27 |     */
28 |   def apply(spark: SparkSession, dataset: Dataset[Row]): DataFrame = {
29 | 
30 |     val graph: Graph[None.type, Double] = NebulaUtil.loadInitGraph(dataset, false)
31 | 
32 |     val triangleResultRDD = execute(graph)
33 | 
34 |     val schema = StructType(
35 |       List(
36 |         StructField(AlgoConstants.ALGO_ID_COL, LongType, nullable = false),
37 |         StructField(AlgoConstants.TRIANGLECOUNT_RESULT_COL, IntegerType, nullable = true)
38 |       ))
39 |     val algoResult = spark.sqlContext
40 |       .createDataFrame(triangleResultRDD, schema)
41 | 
42 |     algoResult
43 |   }
44 | 
45 |   def execute(graph: Graph[None.type, Double]): RDD[Row] = {
46 |     val resultRDD: VertexRDD[Int] = TriangleCount.run(graph).vertices
47 |     resultRDD.map(row => Row(row._1, row._2))
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/reader/DataReader.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.algorithm.reader
  8 | 
  9 | import com.vesoft.nebula.connector.connector.NebulaDataFrameReader
 10 | import com.vesoft.nebula.connector.{NebulaConnectionConfig, ReadNebulaConfig}
 11 | import com.vesoft.nebula.algorithm.config.Configs
 12 | import org.apache.spark.sql.{DataFrame, SparkSession}
 13 | 
 14 | import scala.collection.mutable.ListBuffer
 15 | 
 16 | abstract class DataReader(spark: SparkSession, configs: Configs) {
 17 |   def read(): DataFrame
 18 | }
 19 | 
 20 | class NebulaReader(spark: SparkSession, configs: Configs, partitionNum: String)
 21 |     extends DataReader(spark, configs) {
 22 |   override def read(): DataFrame = {
 23 |     val metaAddress = configs.nebulaConfig.readConfigEntry.address
 24 |     val space       = configs.nebulaConfig.readConfigEntry.space
 25 |     val labels      = configs.nebulaConfig.readConfigEntry.labels
 26 |     val weights     = configs.nebulaConfig.readConfigEntry.weightCols
 27 |     val partition   = partitionNum.toInt
 28 | 
 29 |     val config =
 30 |       NebulaConnectionConfig
 31 |         .builder()
 32 |         .withMetaAddress(metaAddress)
 33 |         .withConenctionRetry(2)
 34 |         .build()
 35 | 
 36 |     val noColumn = weights.isEmpty
 37 | 
 38 |     var dataset: DataFrame = null
 39 |     for (i <- labels.indices) {
 40 |       val returnCols: ListBuffer[String] = new ListBuffer[String]
 41 |       if (configs.dataSourceSinkEntry.hasWeight && weights.nonEmpty) {
 42 |         returnCols.append(weights(i))
 43 |       }
 44 |       val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig
 45 |         .builder()
 46 |         .withSpace(space)
 47 |         .withLabel(labels(i))
 48 |         .withNoColumn(noColumn)
 49 |         .withReturnCols(returnCols.toList)
 50 |         .withPartitionNum(partition)
 51 |         .build()
 52 |       if (dataset == null) {
 53 |         dataset = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
 54 |       } else {
 55 |         dataset = dataset.union(spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF())
 56 |       }
 57 |     }
 58 |     dataset
 59 |   }
 60 | }
 61 | 
 62 | class CsvReader(spark: SparkSession, configs: Configs, partitionNum: String)
 63 |     extends DataReader(spark, configs) {
 64 |   override def read(): DataFrame = {
 65 |     val delimiter = configs.localConfigEntry.delimiter
 66 |     val header    = configs.localConfigEntry.header
 67 |     val localPath = configs.localConfigEntry.filePath
 68 | 
 69 |     val partition = partitionNum.toInt
 70 | 
 71 |     val data =
 72 |       spark.read
 73 |         .option("header", header)
 74 |         .option("delimiter", delimiter)
 75 |         .csv(localPath)
 76 |     val weight = configs.localConfigEntry.weight
 77 |     val src    = configs.localConfigEntry.srcId
 78 |     val dst    = configs.localConfigEntry.dstId
 79 |     if (configs.dataSourceSinkEntry.hasWeight && weight != null && !weight.trim.isEmpty) {
 80 |       data.select(src, dst, weight)
 81 |     } else {
 82 |       data.select(src, dst)
 83 |     }
 84 |     if (partition != 0) {
 85 |       data.repartition(partition)
 86 |     }
 87 |     data
 88 |   }
 89 | }
 90 | 
 91 | class JsonReader(spark: SparkSession, configs: Configs, partitionNum: String)
 92 |     extends DataReader(spark, configs) {
 93 |   override def read(): DataFrame = {
 94 |     val localPath = configs.localConfigEntry.filePath
 95 |     val data      = spark.read.json(localPath)
 96 |     val partition = partitionNum.toInt
 97 | 
 98 |     val weight = configs.localConfigEntry.weight
 99 |     val src    = configs.localConfigEntry.srcId
100 |     val dst    = configs.localConfigEntry.dstId
101 |     if (configs.dataSourceSinkEntry.hasWeight && weight != null && !weight.trim.isEmpty) {
102 |       data.select(src, dst, weight)
103 |     } else {
104 |       data.select(src, dst)
105 |     }
106 |     if (partition != 0) {
107 |       data.repartition(partition)
108 |     }
109 |     data
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/utils/NebulaUtil.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.utils
 8 | 
 9 | import org.apache.spark.graphx.{Edge, Graph}
10 | import org.apache.spark.rdd.RDD
11 | import org.apache.spark.sql.{Dataset, Encoder, Row}
12 | import org.slf4j.LoggerFactory
13 | 
14 | object NebulaUtil {
15 |   private val LOG = LoggerFactory.getLogger(this.getClass)
16 | 
17 |   /**
18 |     * construct original graph
19 |     *
20 |     * @param hasWeight if the graph has no weight, then edge's weight is default 1.0
21 |     * @return Graph
22 |     */
23 |   def loadInitGraph(dataSet: Dataset[Row], hasWeight: Boolean): Graph[None.type, Double] = {
24 |     implicit val encoder: Encoder[Edge[Double]] = org.apache.spark.sql.Encoders.kryo[Edge[Double]]
25 |     val edges: RDD[Edge[Double]] = dataSet
26 |       .map(row => {
27 |         if (hasWeight) {
28 |           Edge(row.get(0).toString.toLong, row.get(1).toString.toLong, row.get(2).toString.toDouble)
29 |         } else {
30 |           Edge(row.get(0).toString.toLong, row.get(1).toString.toLong, 1.0)
31 |         }
32 |       })(encoder)
33 |       .rdd
34 | 
35 |     Graph.fromEdges(edges, None)
36 |   }
37 | 
38 |   /**
39 |     * Assembly algorithm's result file path
40 |     *
41 |     * @param path algorithm configuration
42 |     * @param algorithmName
43 |     *
44 |     * @return validate result path
45 |     */
46 |   def getResultPath(path: String, algorithmName: String): String = {
47 |     var resultFilePath = path
48 |     if (!resultFilePath.endsWith("/")) {
49 |       resultFilePath = resultFilePath + "/"
50 |     }
51 |     resultFilePath + algorithmName
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/writer/AlgoWriter.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.writer
 8 | 
 9 | import com.vesoft.nebula.connector.connector.NebulaDataFrameWriter
10 | import com.vesoft.nebula.connector.{NebulaConnectionConfig, WriteNebulaVertexConfig}
11 | import com.vesoft.nebula.algorithm.config.{AlgoConstants, Configs}
12 | import org.apache.spark.sql.DataFrame
13 | 
14 | abstract class AlgoWriter(data: DataFrame, configs: Configs) {
15 |   def write(): Unit
16 | }
17 | 
18 | class NebulaWriter(data: DataFrame, configs: Configs) extends AlgoWriter(data, configs) {
19 |   override def write(): Unit = {
20 |     val graphAddress = configs.nebulaConfig.writeConfigEntry.graphAddress
21 |     val metaAddress  = configs.nebulaConfig.writeConfigEntry.metaAddress
22 |     val space        = configs.nebulaConfig.writeConfigEntry.space
23 |     val tag          = configs.nebulaConfig.writeConfigEntry.tag
24 |     val user         = configs.nebulaConfig.writeConfigEntry.user
25 |     val passwd       = configs.nebulaConfig.writeConfigEntry.pswd
26 | 
27 |     val config =
28 |       NebulaConnectionConfig
29 |         .builder()
30 |         .withMetaAddress(metaAddress)
31 |         .withGraphAddress(graphAddress)
32 |         .withConenctionRetry(2)
33 |         .build()
34 |     val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig
35 |       .builder()
36 |       .withSpace(space)
37 |       .withTag(tag)
38 |       .withVidField(AlgoConstants.ALGO_ID_COL)
39 |       .withVidAsProp(false)
40 |       .withBatch(1000)
41 |       .build()
42 |     data.write.nebula(config, nebulaWriteVertexConfig).writeVertices()
43 |   }
44 | }
45 | 
46 | class CsvWriter(data: DataFrame, configs: Configs) extends AlgoWriter(data, configs) {
47 |   override def write(): Unit = {
48 |     val resultPath = configs.localConfigEntry.resultPath
49 |     data.repartition(1).write.option("header", true).csv(resultPath)
50 |   }
51 | }
52 | 
53 | class TextWriter(data: DataFrame, configs: Configs) extends AlgoWriter(data, configs) {
54 |   override def write(): Unit = {
55 |     val resultPath = configs.localConfigEntry.resultPath
56 |     data.repartition(1).write.option("header", true).text(resultPath)
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/resources/application.conf:
--------------------------------------------------------------------------------
  1 | {
  2 |   # Spark relation config
  3 |   spark: {
  4 |     app: {
  5 |         name: LPA
  6 |         # spark.app.partitionNum
  7 |         partitionNum:100
  8 |     }
  9 |     master:local
 10 |   }
 11 | 
 12 |   data: {
 13 |     # data source. optional of nebula,csv,json,parquet
 14 |     source: csv
 15 |     # data sink, means the algorithm result will be write into this sink. optional of nebula,csv,txt
 16 |     sink: nebula
 17 |     # if your algorithm needs weight
 18 |     hasWeight: false
 19 |   }
 20 | 
 21 |   # Nebula Graph relation config
 22 |   nebula: {
 23 |     # algo's data source from Nebula
 24 |     read: {
 25 |         # Nebula metad server address, multiple addresses are split by English comma
 26 |         metaAddress: "127.0.0.1:9559"
 27 |         # Nebula space
 28 |         space: nb
 29 |         # Nebula edge types, multiple labels means that data from multiple edges will union together
 30 |         labels: ["serve"]
 31 |         # Nebula edge property name for each edge type, this property will be as weight col for algorithm.
 32 |         # Make sure the weightCols are corresponding to labels.
 33 |         weightCols: ["start_year"]
 34 |     }
 35 | 
 36 |     # algo result sink into Nebula
 37 |     write:{
 38 |         # Nebula graphd server address， multiple addresses are split by English comma
 39 |         graphAddress: "127.0.0.1:9669"
 40 |         # Nebula metad server address, multiple addresses are split by English comma
 41 |         metaAddress: "127.0.0.1:9559,127.0.0.1:9560"
 42 |         user:root
 43 |         pswd:nebula
 44 |         # Nebula space name
 45 |         space:nb
 46 |         # Nebula tag name, the algorithm result will be write into this tag
 47 |         tag:pagerank
 48 |     }
 49 |   }
 50 | 
 51 |   local: {
 52 |     # algo's data source from Nebula
 53 |     read:{
 54 |         filePath: "hdfs://127.0.0.1:9000/edge/work_for.csv"
 55 |         # srcId column
 56 |         srcId:"_c0"
 57 |         # dstId column
 58 |         dstId:"_c1"
 59 |         # weight column
 60 |         #weight: "col3"
 61 |         # if csv file has header
 62 |         header: false
 63 |         # csv file's delimiter
 64 |         delimiter:","
 65 |     }
 66 | 
 67 |     # algo result sink into local file
 68 |     write:{
 69 |         resultPath:/tmp/
 70 |     }
 71 |   }
 72 | 
 73 | 
 74 |   algorithm: {
 75 |     # the algorithm that you are going to execute，pick one from [pagerank, louvain, connectedcomponent,
 76 |     # labelpropagation, shortestpaths, degreestatic, kcore, stronglyconnectedcomponent, trianglecount,
 77 |     # betweenness]
 78 |     executeAlgo: pagerank
 79 | 
 80 |     # pagerank parameter
 81 |     pagerank: {
 82 |         maxIter: 10
 83 |         resetProb: 0.15  # default 0.15
 84 |     }
 85 | 
 86 |     # louvain parameter
 87 |     louvain: {
 88 |         maxIter: 20
 89 |         internalIter: 10
 90 |         tol: 0.5
 91 |    }
 92 | 
 93 |    # connected component parameter  TODO not implemented yet.
 94 |     connectedcomponent: {
 95 |         maxIter: 20
 96 |    }
 97 | 
 98 |    # LabelPropagation
 99 |     labelpropagation: {
100 |         maxIter: 20
101 |    }
102 | 
103 |    # ShortestPaths
104 |     shortestpaths: {
105 |         # several vertices to compute the shortest path to all vertices.
106 |         landmarks: "1"
107 |    }
108 | 
109 |     # vertex degree static
110 |     degreestatic: {}
111 | 
112 |    # kcore
113 |    kcore:{
114 |         maxIter:10
115 |         degree:1
116 |    }
117 | 
118 |    # trianglecount
119 |    trianglecount:{}
120 | 
121 |    # betweenness centrality
122 |    betweenness:{
123 |         maxIter:5
124 |    }
125 |  }
126 | }
127 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/resources/edge.csv:
--------------------------------------------------------------------------------
 1 | src,dst,weight
 2 | 1,1,5.0
 3 | 1,2,1.0
 4 | 1,3,5.0
 5 | 1,4,1.0
 6 | 2,1,5.0
 7 | 2,2,1.0
 8 | 2,3,5.0
 9 | 2,4,1.0
10 | 3,1,1.0
11 | 3,2,5.0
12 | 3,3,1.0
13 | 3,4,5.0
14 | 4,1,1.0
15 | 4,2,5.0
16 | 4,3,1.0
17 | 4,4,5.0


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/resources/edge_noWeight.csv:
--------------------------------------------------------------------------------
 1 | 1 2
 2 | 1 7
 3 | 1 4
 4 | 2 0
 5 | 2 4
 6 | 2 5
 7 | 2 6
 8 | 3 0
 9 | 3 7
10 | 4 0
11 | 4 10
12 | 5 7
13 | 5 11
14 | 6 7
15 | 6 11
16 | 8 9
17 | 8 10
18 | 8 11
19 | 9 12
20 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/config/ConfigSuite.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.algorithm.config
  8 | 
  9 | import com.vesoft.nebula.algorithm.config.Configs.Argument
 10 | import org.junit.Test
 11 | 
 12 | import scala.collection.mutable.ListBuffer
 13 | 
 14 | class ConfigSuite {
 15 | 
 16 |   var configs: Configs = _
 17 | 
 18 |   @Test
 19 |   def getConfigsSuite(): Unit = {
 20 |     val args: ListBuffer[String] = new ListBuffer[String]
 21 |     args.append("-p")
 22 |     args.append("src/test/resources/application.conf")
 23 |     try {
 24 |       val options = Configs.parser(args.toArray, "TestProgram")
 25 |       val p: Argument = options match {
 26 |         case Some(config) => config
 27 |         case _ =>
 28 |           assert(false)
 29 |           sys.exit(-1)
 30 |       }
 31 |       configs = Configs.parse(p.config)
 32 |     } catch {
 33 |       case e: Exception => {
 34 |         e.printStackTrace()
 35 |         assert(false)
 36 |       }
 37 |     }
 38 | 
 39 |   }
 40 | 
 41 |   @Test
 42 |   def getSparkConfigSuite(): Unit = {
 43 |     if (configs == null) {
 44 |       getConfigsSuite()
 45 |     }
 46 |     val sparkConfig = configs.sparkConfig
 47 |     assert(sparkConfig.map.size == 3)
 48 | 
 49 |     val spark = SparkConfig.getSpark(configs)
 50 |     assert(spark.partitionNum.toInt == 100)
 51 |   }
 52 | 
 53 |   @Test
 54 |   def getSourceSinkConfigSuite(): Unit = {
 55 |     if (configs == null) {
 56 |       getConfigsSuite()
 57 |     }
 58 |     val dataSourceSinkEntry = configs.dataSourceSinkEntry
 59 |     assert(dataSourceSinkEntry.source.equals("csv"))
 60 |     assert(dataSourceSinkEntry.sink.equals("nebula"))
 61 |     assert(!dataSourceSinkEntry.hasWeight)
 62 |   }
 63 |   @Test
 64 |   def getNebulaConfigSuite(): Unit = {
 65 |     if (configs == null) {
 66 |       getConfigsSuite()
 67 |     }
 68 |     val nebulaConfigEntry = configs.nebulaConfig
 69 |     val writeConfig       = nebulaConfigEntry.writeConfigEntry
 70 |     assert(writeConfig.graphAddress.equals("127.0.0.1:9669"))
 71 |     assert(writeConfig.metaAddress.equals("127.0.0.1:9559,127.0.0.1:9560"))
 72 |     assert(writeConfig.space.equals("nb"))
 73 |     assert(writeConfig.tag.equals("pagerank"))
 74 |     assert(writeConfig.user.equals("root"))
 75 |     assert(writeConfig.pswd.equals("nebula"))
 76 | 
 77 |     val readConfig = nebulaConfigEntry.readConfigEntry
 78 |     assert(readConfig.address.equals("127.0.0.1:9559"))
 79 |     assert(readConfig.space.equals("nb"))
 80 |     assert(readConfig.labels.size == 1)
 81 |     assert(readConfig.weightCols.size == 1)
 82 |   }
 83 | 
 84 |   @Test
 85 |   def getLocalConfigSuite(): Unit = {
 86 |     if (configs == null) {
 87 |       getConfigsSuite()
 88 |     }
 89 |     val localConfigEntry = configs.localConfigEntry
 90 |     assert(localConfigEntry.filePath.startsWith("hdfs://"))
 91 |     assert(localConfigEntry.srcId.equals("_c0"))
 92 |     assert(localConfigEntry.dstId.equals("_c1"))
 93 |     assert(localConfigEntry.weight == null)
 94 |     assert(!localConfigEntry.header)
 95 |     assert(localConfigEntry.delimiter.equals(","))
 96 |     assert(localConfigEntry.resultPath.equals("/tmp/"))
 97 |   }
 98 | 
 99 |   @Test
100 |   def getAlgoConfigSuite(): Unit = {
101 |     if (configs == null) {
102 |       getConfigsSuite()
103 |     }
104 |     val algoConfig = configs.algorithmConfig
105 |     val algoName   = AlgoConfig.getAlgoName(configs)
106 |     assert(algoName.equals("pagerank"))
107 | 
108 |     val prConfig = PRConfig.getPRConfig(configs)
109 |     assert(prConfig.maxIter == 10)
110 |     assert(prConfig.resetProb < 0.150000001)
111 | 
112 |     val louvainConfig = LouvainConfig.getLouvainConfig(configs)
113 |     assert(louvainConfig.maxIter == 20)
114 |     assert(louvainConfig.internalIter == 10)
115 |     assert(louvainConfig.tol < 0.5000001)
116 | 
117 |     val ccConfig = CcConfig.getCcConfig(configs)
118 |     assert(ccConfig.maxIter == 20)
119 | 
120 |     val lpaConfig = LPAConfig.getLPAConfig(configs)
121 |     assert(lpaConfig.maxIter == 20)
122 | 
123 |     val shortestPathConfig = ShortestPathConfig.getShortestPathConfig(configs)
124 |     assert(shortestPathConfig.landmarks.size == 1)
125 | 
126 |     val kcoreConfig = KCoreConfig.getKCoreConfig(configs)
127 |     assert(kcoreConfig.maxIter == 10)
128 |     assert(kcoreConfig.degree == 1)
129 | 
130 |     val betweennessConfig = BetweennessConfig.getBetweennessConfig(configs)
131 |     assert(betweennessConfig.maxIter == 5)
132 |   }
133 | }
134 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/data/MockNebulaData.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.data
 8 | 
 9 | object MockNebulaData {}
10 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/BetweennessAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.BetweennessConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class BetweennessAlgoSuite {
14 |   @Test
15 |   def betweennessAlgoSuite(): Unit = {
16 |     val spark             = SparkSession.builder().master("local").getOrCreate()
17 |     val data              = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val betweennessConfig = new BetweennessConfig(5)
19 |     val result            = BetweennessCentralityAlgo.apply(spark, data, betweennessConfig, false)
20 |     assert(result.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/CcAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.CcConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class CcAlgoSuite {
14 |   @Test
15 |   def ccAlgoSuite(): Unit = {
16 |     val spark        = SparkSession.builder().master("local").getOrCreate()
17 |     val data         = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val ccAlgoConfig = new CcConfig(5)
19 |     val result       = ConnectedComponentsAlgo.apply(spark, data, ccAlgoConfig, false)
20 |     assert(result.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/DegreeStaticAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import org.apache.spark.sql.SparkSession
10 | import org.junit.Test
11 | 
12 | class DegreeStaticAlgoSuite {
13 |   @Test
14 |   def degreeStaticAlgoSuite(): Unit = {
15 |     val spark  = SparkSession.builder().master("local").getOrCreate()
16 |     val data   = spark.read.option("header", true).csv("src/test/resources/edge.csv")
17 |     val result = DegreeStaticAlgo.apply(spark, data)
18 |     assert(result.count() == 4)
19 |     result.foreach(row => {
20 |       assert(row.get(1).toString.toInt == 8)
21 |       assert(row.get(2).toString.toInt == 4)
22 |       assert(row.get(3).toString.toInt == 4)
23 |     })
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/KCoreAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.KCoreConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class KCoreAlgoSuite {
14 |   @Test
15 |   def kcoreSuite(): Unit = {
16 |     val spark       = SparkSession.builder().master("local").getOrCreate()
17 |     val data        = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val kcoreConfig = new KCoreConfig(10, 3)
19 |     val kcoreResult = KCoreAlgo.apply(spark, data, kcoreConfig)
20 |     assert(kcoreResult.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/LabelPropagationAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.LPAConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class LabelPropagationAlgoSuite {
14 |   @Test
15 |   def lpaAlgoSuite(): Unit = {
16 |     val spark     = SparkSession.builder().master("local").getOrCreate()
17 |     val data      = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val lpaConfig = new LPAConfig(5)
19 |     val result    = LabelPropagationAlgo.apply(spark, data, lpaConfig, false)
20 |     assert(result.count() == 4)
21 |     result.foreach(row => {
22 |       assert(row.get(1).toString.toInt == 1)
23 |     })
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/LouvainAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.{ConfigSuite, Configs, LouvainConfig, SparkConfig}
10 | import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
11 | import org.junit.Test
12 | 
13 | class LouvainAlgoSuite {
14 |   @Test
15 |   def louvainSuite(): Unit = {
16 |     val spark         = SparkSession.builder().master("local").getOrCreate()
17 |     val data          = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val louvainConfig = new LouvainConfig(5, 2, 1.0)
19 |     val louvainResult = LouvainAlgo.apply(spark, data, louvainConfig, false)
20 |     assert(louvainResult.count() == 4)
21 | 
22 |     val dataWithoutWith = spark.read
23 |       .option("header", false)
24 |       .option("delimiter", " ")
25 |       .csv("src/test/resources/edge_noWeight.csv")
26 |       .select("_c0", "_c1")
27 |     val louvainResult1 = LouvainAlgo.apply(spark, dataWithoutWith, louvainConfig, false)
28 |     assert(louvainResult1.count() == 13)
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/PageRankAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.{Configs, PRConfig, SparkConfig}
10 | import org.apache.spark.sql.{DataFrame, SparkSession}
11 | import org.junit.Test
12 | 
13 | class PageRankAlgoSuite {
14 |   @Test
15 |   def pageRankSuite(): Unit = {
16 |     val spark         = SparkSession.builder().master("local").getOrCreate()
17 |     val data          = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val prConfig      = new PRConfig(5, 1.0)
19 |     val louvainResult = PageRankAlgo.apply(spark, data, prConfig, false)
20 |     assert(louvainResult.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/SCCAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.CcConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class SCCAlgoSuite {
14 |   @Test
15 |   def sccAlgoSuite(): Unit = {
16 |     val spark     = SparkSession.builder().master("local").getOrCreate()
17 |     val data      = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val sccConfig = new CcConfig(5)
19 |     val sccResult = StronglyConnectedComponentsAlgo.apply(spark, data, sccConfig, true)
20 |     assert(sccResult.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/ShortestPathAlgoSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import com.vesoft.nebula.algorithm.config.ShortestPathConfig
10 | import org.apache.spark.sql.SparkSession
11 | import org.junit.Test
12 | 
13 | class ShortestPathAlgoSuite {
14 |   @Test
15 |   def shortestPathAlgoSuite(): Unit = {
16 |     val spark              = SparkSession.builder().master("local").getOrCreate()
17 |     val data               = spark.read.option("header", true).csv("src/test/resources/edge.csv")
18 |     val shortestPathConfig = new ShortestPathConfig(Seq(1, 2))
19 |     val result             = ShortestPathAlgo.apply(spark, data, shortestPathConfig, false)
20 |     assert(result.count() == 4)
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/lib/TrangleCountSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.lib
 8 | 
 9 | import org.apache.spark.sql.SparkSession
10 | import org.junit.Test
11 | 
12 | class TrangleCountSuite {
13 |   @Test
14 |   def trangleCountSuite(): Unit = {
15 |     val spark              = SparkSession.builder().master("local").getOrCreate()
16 |     val data               = spark.read.option("header", true).csv("src/test/resources/edge.csv")
17 |     val trangleCountResult = TriangleCountAlgo.apply(spark, data)
18 |     assert(trangleCountResult.count() == 4)
19 |     assert(trangleCountResult.first().get(1) == 3)
20 |     trangleCountResult.foreach(row => {
21 |       assert(row.get(1) == 3)
22 |     })
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/nebula-algorithm/src/test/scala/com/vesoft/nebula/algorithm/utils/NebulaUtilSuite.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.algorithm.utils
 8 | 
 9 | import org.junit.Test
10 | 
11 | class NebulaUtilSuite {
12 | 
13 |   @Test
14 |   def validateWithWeight: Unit = {
15 |     val hostPorts: String        = "127.0.0.1:9559"
16 |     val nameSpace: String        = "nb"
17 |     val labels: List[String]     = List("serve", "follow")
18 |     val hasWeight: Boolean       = true
19 |     val weightCols: List[String] = List("start_year", "degree")
20 |   }
21 | 
22 |   @Test
23 |   def validateWithoutWeight: Unit = {
24 |     val hostPorts: String        = "127.0.0.1:9559"
25 |     val nameSpace: String        = "nb"
26 |     val labels: List[String]     = List("serve")
27 |     val hasWeight: Boolean       = false
28 |     val weightCols: List[String] = List()
29 |   }
30 | 
31 |   @Test
32 |   def getResultPathWithEnding: Unit = {
33 |     val path: String          = "/tmp/"
34 |     val algorithmName: String = "aaa"
35 |     assert(NebulaUtil.getResultPath(path, algorithmName).equals("/tmp/aaa"))
36 |   }
37 | 
38 |   @Test
39 |   def getResultPathWithoutEnding: Unit = {
40 |     val path: String          = "/tmp"
41 |     val algorithmName: String = "aaa"
42 |     assert(NebulaUtil.getResultPath(path, algorithmName).equals("/tmp/aaa"))
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/nebula-exchange/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 
25 | # build target
26 | target/
27 | 
28 | # IDE
29 | .idea/
30 | .eclipse/
31 | *.iml
32 | 
33 | spark-importer.ipr
34 | spark-importer.iws
35 | 
36 | .DS_Store
37 | 


--------------------------------------------------------------------------------
/nebula-exchange/README-CN.md:
--------------------------------------------------------------------------------
 1 | # 欢迎使用 Nebula Exchange 2.0         
 2 | [English](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-exchange/README.md)
 3 | 
 4 | Nebula Exchange 2.0（简称为 Exchange 2.0）是一款 Apache Spark&trade; 应用，用于在分布式环境中将集群中的数据批量迁移到 Nebula Graph 中，能支持多种不同格式的批式数据和流式数据的迁移。
 5 | 
 6 | Exchange 2.0 仅支持 Nebula Graph 2.x。
 7 | 
 8 | 如果您正在使用 Nebula Graph v1.x，请使用 [Nebula Exchange v1.0](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools/exchange) ，或参考 Exchange 1.0 的使用文档[《Nebula Exchange 用户手册》](https://docs.nebula-graph.com.cn/nebula-exchange/about-exchange/ex-ug-what-is-exchange/ "点击前往 Nebula Graph 网站")。
 9 | 
10 | ## 如何获取
11 | 
12 | 1. 编译打包最新的 Exchange。
13 | 
14 |     ```bash
15 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
16 |     $ cd nebula-spark-utils/nebula-exchange
17 |     $ mvn clean package -Dmaven.test.skip=true -Dgpg.skip -Dmaven.javadoc.skip=true
18 |     ```
19 | 
20 |     编译打包完成后，可以在 nebula-spark-utils/nebula-exchange/target/ 目录下看到 nebula-exchange-2.0-SNAPSHOT.jar 文件。
21 | 2. 在 Maven 远程仓库下载
22 |     
23 |     https://repo1.maven.org/maven2/com/vesoft/nebula-exchange/
24 | ## 使用说明
25 | 
26 | 特性 & 注意事项：
27 | 
28 | *1. Nebula Graph 2.0 支持 String 类型和 Integer 类型的点 id 。*
29 | 
30 | *2. Exchange 2.0 新增 null、Date、DateTime、Time 类型数据的导入（ DateTime 是 UTC 时区，非 Local time）。*
31 | 
32 | *3. Exchange 2.0 支持 Hive on Spark 以外的 Hive 数据源，需在配置文件中配置 Hive 源，具体配置示例参考 [application.conf](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/nebula-exchange/src/main/resources/application.conf) 中 Hive 的配置。*
33 | 
34 | *4. Exchange 2.0 将导入失败的 INSERT 语句进行落盘，存于配置文件的 error/output 路径中。*
35 | 
36 | *5. 配置文件参考 [application.conf](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/nebula-exchange/src/main/resources/application.conf )。*
37 | 
38 | *6. Exchange 2.0 的导入命令：*
39 | ```
40 | $SPARK_HOME/bin/spark-submit --class com.vesoft.nebula.exchange.Exchange --master local nebula-exchange-2.0.0.jar -c /path/to/application.conf
41 | ```
42 | 如果数据源有HIVE，则导入命令最后还需要加 `-h` 表示启用HIVE数据源。
43 | 
44 | 注：在Yarn-Cluster模式下提交 Exchange，请使用如下提交命令：
45 | ```
46 | $SPARK_HOME/bin/spark-submit --class com.vesoft.nebula.exchange.Exchange \
47 | --master yarn-cluster \
48 | --files application.conf \
49 | --conf spark.driver.extraClassPath=./ \
50 | --conf spark.executor.extraClassPath=./ \
51 | nebula-exchange-2.0.0.jar \
52 | -c application.conf
53 | ```
54 | 
55 | 关于 Nebula Exchange 的更多说明，请参考 Exchange 2.0 的[使用手册](https://docs.nebula-graph.com.cn/2.0.1/nebula-exchange/about-exchange/ex-ug-what-is-exchange/) 。
56 | 
57 | ## 贡献
58 | 
59 | Nebula Exchange 2.0 是一个完全开源的项目，欢迎开源爱好者通过以下方式参与：
60 | 
61 | - 前往 [Nebula Graph 论坛](https://discuss.nebula-graph.com.cn/ "点击前往“Nebula Graph 论坛") 上参与 Issue 讨论，如答疑、提供想法或者报告无法解决的问题
62 | - 撰写或改进文档
63 | - 提交优化代码
64 | 


--------------------------------------------------------------------------------
/nebula-exchange/README.md:
--------------------------------------------------------------------------------
 1 | # Nebula Exchange 2.0
 2 |  [中文版](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-exchange/README-CN.md)
 3 |  
 4 | Nebula Exchange (Exchange for short) is an Apache Spark application. It is used to migrate cluster data in bulk from Spark to Nebula Graph in a distributed environment. It supports migration of batch data and streaming data in various formats.
 5 | 
 6 | Exchange 2.0 only supports Nebula Graph 2.0 . If you want to import data for Nebula Graph v1.x，please use [Nebula Exchange v1.0](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools/exchange).
 7 | 
 8 | ## How to get
 9 | 
10 | 1. Package latest Exchange。
11 | 
12 |     ```bash
13 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
14 |     $ cd nebula-spark-utils/nebula-exchange
15 |     $ mvn clean package -Dmaven.test.skip=true -Dgpg.skip -Dmaven.javadoc.skip=true
16 |     ```
17 | 
18 |     After the packaging, you can see the newly generated nebula-exchange-2.0-SNAPSHOT.jar under the nebula-spark-utils/nebula-exchange/target/ directory.
19 | 2. Download from Maven repository
20 |    
21 |    https://repo1.maven.org/maven2/com/vesoft/nebula-exchange/
22 | ## How to use
23 | 
24 | Import command:
25 | ```
26 | $SPARK_HOME/bin/spark-submit --class com.vesoft.nebula.exchange.Exchange --master local nebula-exchange-2.0.0.jar -c /path/to/application.conf
27 | ```
28 | If your source is HIVE, import command is:
29 | ```
30 | $SPARK_HOME/bin/spark-submit --class com.vesoft.nebula.exchange.Exchange --master local nebula-exchange-2.0.0.jar -c /path/to/application.conf -h
31 | ```
32 | 
33 | Note：Submit Exchange with Yarn-Cluster mode, please use following command：
34 | ```
35 | $SPARK_HOME/bin/spark-submit --class com.vesoft.nebula.exchange.Exchange \
36 | --master yarn-cluster \
37 | --files application.conf \
38 | --conf spark.driver.extraClassPath=./ \
39 | --conf spark.executor.extraClassPath=./ \
40 | nebula-exchange-2.0.0.jar \
41 | -c application.conf
42 | ```
43 | 
44 | For more details about Exchange, please refer to [Exchange 2.0](https://docs.nebula-graph.io/2.0.1/16.eco-tools/1.nebula-exchange/) .
45 | 
46 | 
47 | ## New Features
48 | 
49 | 1. Supports importing vertex data with String and Integer type IDs.
50 | 2. Supports importing data of the Null, Date, DateTime, and Time types(DateTime uses UTC, not local time).
51 | 3. Supports importing data from other Hive sources besides Hive on Spark.
52 | 4. Supports recording and retrying the INSERT statement after failures during data import.
53 | 
54 | Refer to [application.conf](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/nebula-exchange/src/main/resources/application.conf) as an example to edit the configuration file.
55 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/CheckPointHandler.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange
 8 | 
 9 | import com.vesoft.nebula.exchange.config.{SourceCategory}
10 | import com.vesoft.nebula.exchange.utils.HDFSUtils
11 | import com.vesoft.nebula.exchange.config.SchemaConfigEntry
12 | import org.apache.spark.TaskContext
13 | 
14 | /**
15 |   * CheckPointHandler handle the checkpoint files for Neo4j and Janusgraph
16 |   */
17 | object CheckPointHandler {
18 | 
19 |   def checkSupportResume(value: SourceCategory.Value): Boolean = {
20 |     value match {
21 |       case SourceCategory.NEO4J       => true
22 |       case SourceCategory.JANUS_GRAPH => true
23 |       case _                          => false
24 |     }
25 |   }
26 | 
27 |   def getPathAndOffset(schemaConfig: SchemaConfigEntry,
28 |                        breakPointCount: Long): Option[(String, Long)] = {
29 |     val partitionId = TaskContext.getPartitionId()
30 |     if (checkSupportResume(schemaConfig.dataSourceConfigEntry.category) && schemaConfig.checkPointPath.isDefined) {
31 |       val path   = s"${schemaConfig.checkPointPath.get}/${schemaConfig.name}.${partitionId}"
32 |       val offset = breakPointCount + fetchOffset(path)
33 |       Some((path, offset))
34 |     } else {
35 |       None
36 |     }
37 |   }
38 | 
39 |   def fetchOffset(path: String): Long = {
40 |     HDFSUtils.getContent(path).toLong
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/ErrorHandler.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange
 8 | 
 9 | import org.apache.hadoop.conf.Configuration
10 | import org.apache.hadoop.fs.{FileSystem, Path}
11 | import org.apache.log4j.Logger
12 | 
13 | import scala.collection.mutable.ArrayBuffer
14 | 
15 | object ErrorHandler {
16 |   @transient
17 |   private[this] val LOG = Logger.getLogger(this.getClass)
18 | 
19 |   /**
20 |     * clean all the failed data for error path before reload.
21 |     *
22 |     * @param path path to clean
23 |     */
24 |   def clear(path: String): Unit = {
25 |     try {
26 |       val fileSystem  = FileSystem.get(new Configuration())
27 |       val filesStatus = fileSystem.listStatus(new Path(path))
28 |       for (file <- filesStatus) {
29 |         if (!file.getPath.getName.startsWith("reload.")) {
30 |           fileSystem.delete(file.getPath, true)
31 |         }
32 |       }
33 |     } catch {
34 |       case e: Throwable => {
35 |         LOG.error(s"$path cannot be clean, but this error does not affect the import result, " +
36 |                     s"you can only focus on the reload files.",
37 |                   e)
38 |       }
39 |     }
40 |   }
41 | 
42 |   /**
43 |     * save the failed execute statement.
44 |     *
45 |     * @param buffer buffer saved failed ngql
46 |     * @param path path to write these buffer ngql
47 |     */
48 |   def save(buffer: ArrayBuffer[String], path: String): Unit = {
49 |     LOG.info(s"create reload path $path")
50 |     val fileSystem = FileSystem.get(new Configuration())
51 |     val errors     = fileSystem.create(new Path(path))
52 | 
53 |     try {
54 |       for (error <- buffer) {
55 |         errors.writeBytes(error)
56 |         errors.writeBytes("\n")
57 |       }
58 |     } finally {
59 |       errors.close()
60 |     }
61 |   }
62 | 
63 |   /**
64 |     * check if path exists
65 |     *
66 |     * @param path error path
67 |     *@return true if path exists
68 |     */
69 |   def existError(path: String): Boolean = {
70 |     val fileSystem = FileSystem.get(new Configuration())
71 |     fileSystem.exists(new Path(path))
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/GraphProvider.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange
 8 | 
 9 | import com.google.common.net.HostAndPort
10 | import com.vesoft.nebula.client.graph.NebulaPoolConfig
11 | import com.vesoft.nebula.client.graph.data.{HostAddress, ResultSet}
12 | import com.vesoft.nebula.client.graph.net.{NebulaPool, Session}
13 | import com.vesoft.nebula.exchange.config.UserConfigEntry
14 | import org.apache.log4j.Logger
15 | 
16 | import scala.collection.JavaConverters._
17 | import scala.collection.mutable.ListBuffer
18 | 
19 | /**
20 |   * GraphProvider for Nebula Graph Service
21 |   */
22 | class GraphProvider(addresses: List[HostAndPort], timeout: Int)
23 |     extends AutoCloseable
24 |     with Serializable {
25 |   private[this] lazy val LOG = Logger.getLogger(this.getClass)
26 | 
27 |   @transient val nebulaPoolConfig = new NebulaPoolConfig
28 |   @transient val pool: NebulaPool = new NebulaPool
29 |   val address                     = new ListBuffer[HostAddress]()
30 |   for (addr <- addresses) {
31 |     address.append(new HostAddress(addr.getHostText, addr.getPort))
32 |   }
33 |   val randAddr = scala.util.Random.shuffle(address)
34 | 
35 |   nebulaPoolConfig.setTimeout(timeout)
36 |   pool.init(randAddr.asJava, nebulaPoolConfig)
37 | 
38 |   def getGraphClient(userConfigEntry: UserConfigEntry): Session = {
39 |     pool.getSession(userConfigEntry.user, userConfigEntry.password, true);
40 |   }
41 | 
42 |   def releaseGraphClient(session: Session): Unit = {
43 |     session.release()
44 |   }
45 | 
46 |   override def close(): Unit = {
47 |     pool.close()
48 |   }
49 | 
50 |   def switchSpace(session: Session, space: String): ResultSet = {
51 |     val switchStatment = s"use $space"
52 |     LOG.info(s"switch space $space")
53 |     val result = submit(session, switchStatment)
54 |     result
55 |   }
56 | 
57 |   def submit(session: Session, statement: String): ResultSet = {
58 |     session.execute(statement)
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/MetaProvider.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.exchange
  8 | 
  9 | import com.google.common.net.HostAndPort
 10 | import com.vesoft.nebula.client.graph.data.HostAddress
 11 | import com.vesoft.nebula.client.meta.MetaClient
 12 | import com.vesoft.nebula.exchange.config.Type
 13 | import com.vesoft.nebula.meta.{EdgeItem, PropertyType, TagItem}
 14 | import org.apache.log4j.Logger
 15 | 
 16 | import scala.collection.JavaConverters._
 17 | import scala.collection.mutable
 18 | import scala.collection.mutable.ListBuffer
 19 | 
 20 | /**
 21 |   * MetaProvider provide nebula graph meta query operations.
 22 |   */
 23 | class MetaProvider(addresses: List[HostAndPort], timeout: Int, retry: Int)
 24 |     extends AutoCloseable
 25 |     with Serializable {
 26 |   private[this] lazy val LOG = Logger.getLogger(this.getClass)
 27 | 
 28 |   val address: ListBuffer[HostAddress] = new ListBuffer[HostAddress]
 29 |   for (addr <- addresses) {
 30 |     address.append(new HostAddress(addr.getHostText, addr.getPort))
 31 |   }
 32 | 
 33 |   private val metaClient = new MetaClient(address.asJava)
 34 |   metaClient.connect()
 35 | 
 36 |   def getPartNumber(space: String): Int = {
 37 |     metaClient.getPartsAlloc(space).size()
 38 |   }
 39 | 
 40 |   def getVidType(space: String): VidType.Value = {
 41 |     val vidType = metaClient.getSpace(space).getProperties.getVid_type.getType
 42 |     if (vidType == PropertyType.FIXED_STRING) {
 43 |       return VidType.STRING
 44 |     }
 45 |     VidType.INT
 46 |   }
 47 | 
 48 |   def getTagSchema(space: String, tag: String): Map[String, Integer] = {
 49 |     val tagSchema = metaClient.getTag(space, tag)
 50 |     val schema    = new mutable.HashMap[String, Integer]
 51 | 
 52 |     val columns = tagSchema.getColumns
 53 |     for (colDef <- columns.asScala) {
 54 |       schema.put(new String(colDef.getName), colDef.getType.getType.getValue)
 55 |     }
 56 |     schema.toMap
 57 |   }
 58 | 
 59 |   def getEdgeSchema(space: String, edge: String): Map[String, Integer] = {
 60 |     val edgeSchema = metaClient.getEdge(space, edge)
 61 |     val schema     = new mutable.HashMap[String, Integer]
 62 | 
 63 |     val columns = edgeSchema.getColumns
 64 |     for (colDef <- columns.asScala) {
 65 |       schema.put(new String(colDef.getName), colDef.getType.getType.getValue)
 66 |     }
 67 |     schema.toMap
 68 |   }
 69 | 
 70 |   def getLabelType(space: String, label: String): Type.Value = {
 71 |     val tags = metaClient.getTags(space)
 72 |     for (tag <- tags.asScala) {
 73 |       if (new String(tag.getTag_name).equals(label)) {
 74 |         return Type.VERTEX
 75 |       }
 76 |     }
 77 |     val edges = metaClient.getEdges(space)
 78 |     for (edge <- edges.asScala) {
 79 |       if (new String(edge.getEdge_name).equals(label)) {
 80 |         return Type.EDGE
 81 |       }
 82 |     }
 83 |     null
 84 |   }
 85 | 
 86 |   def getSpaceVidLen(space: String): Int = {
 87 |     val spaceItem = metaClient.getSpace(space);
 88 |     if (spaceItem == null) {
 89 |       throw new IllegalArgumentException(s"space $space does not exist.")
 90 |     }
 91 |     spaceItem.getProperties.getVid_type.getType_length
 92 |   }
 93 | 
 94 |   def getTagItem(space: String, tag: String): TagItem = {
 95 |     val tagItemList = metaClient.getTags(space).asScala
 96 |     for (tagItem: TagItem <- tagItemList) {
 97 |       if (new String(tagItem.tag_name).equals(tag)) {
 98 |         return tagItem
 99 |       }
100 |     }
101 |     throw new IllegalArgumentException(s"tag ${space}.${tag} does not exist.")
102 |   }
103 | 
104 |   def getEdgeItem(space: String, edge: String): EdgeItem = {
105 |     val edgeItemList = metaClient.getEdges(space).asScala
106 |     for (edgeItem: EdgeItem <- edgeItemList) {
107 |       if (new String(edgeItem.edge_name).equals(edge)) {
108 |         return edgeItem
109 |       }
110 |     }
111 |     throw new IllegalArgumentException(s"edge ${space}.${edge} does not exist.")
112 |   }
113 | 
114 |   override def close(): Unit = {
115 |     metaClient.close()
116 |   }
117 | 
118 | }
119 | 
120 | object VidType extends Enumeration {
121 |   type Type = Value
122 | 
123 |   val STRING = Value("STRING")
124 |   val INT    = Value("INT")
125 | }
126 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/config/SchemaConfigs.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.exchange.config
  8 | 
  9 | import com.vesoft.nebula.exchange.KeyPolicy
 10 | 
 11 | /**
 12 |   * SchemaConfigEntry is tag/edge super class use to save some basic parameter for importer.
 13 |   */
 14 | sealed trait SchemaConfigEntry {
 15 | 
 16 |   /** nebula tag or edge name */
 17 |   def name: String
 18 | 
 19 |   /** see{@link DataSourceConfigEntry}*/
 20 |   def dataSourceConfigEntry: DataSourceConfigEntry
 21 | 
 22 |   /** see{@link DataSinkConfigEntry}*/
 23 |   def dataSinkConfigEntry: DataSinkConfigEntry
 24 | 
 25 |   /** data source fields which are going to be import to nebula as properties */
 26 |   def fields: List[String]
 27 | 
 28 |   /** nebula properties which are going to fill value with data source value*/
 29 |   def nebulaFields: List[String]
 30 | 
 31 |   /** vertex or edge amount of one batch import */
 32 |   def batch: Int
 33 | 
 34 |   /** spark partition */
 35 |   def partition: Int
 36 | 
 37 |   /** check point path */
 38 |   def checkPointPath: Option[String]
 39 | }
 40 | 
 41 | /**
 42 |   *
 43 |   * @param name
 44 |   * @param dataSourceConfigEntry
 45 |   * @param dataSinkConfigEntry
 46 |   * @param fields
 47 |   * @param nebulaFields
 48 |   * @param vertexField
 49 |   * @param vertexPolicy
 50 |   * @param batch
 51 |   * @param partition
 52 |   * @param checkPointPath
 53 |   */
 54 | case class TagConfigEntry(override val name: String,
 55 |                           override val dataSourceConfigEntry: DataSourceConfigEntry,
 56 |                           override val dataSinkConfigEntry: DataSinkConfigEntry,
 57 |                           override val fields: List[String],
 58 |                           override val nebulaFields: List[String],
 59 |                           vertexField: String,
 60 |                           vertexPolicy: Option[KeyPolicy.Value],
 61 |                           override val batch: Int,
 62 |                           override val partition: Int,
 63 |                           override val checkPointPath: Option[String])
 64 |     extends SchemaConfigEntry {
 65 |   require(name.trim.nonEmpty && vertexField.trim.nonEmpty && batch > 0)
 66 | 
 67 |   override def toString: String = {
 68 |     s"Tag name: $name, " +
 69 |       s"source: $dataSourceConfigEntry, " +
 70 |       s"sink: $dataSinkConfigEntry, " +
 71 |       s"vertex field: $vertexField, " +
 72 |       s"vertex policy: $vertexPolicy, " +
 73 |       s"batch: $batch, " +
 74 |       s"partition: $partition."
 75 |   }
 76 | }
 77 | 
 78 | /**
 79 |   *
 80 |   * @param name
 81 |   * @param dataSourceConfigEntry
 82 |   * @param dataSinkConfigEntry
 83 |   * @param fields
 84 |   *  @param nebulaFields
 85 |   * @param sourceField
 86 |   * @param sourcePolicy
 87 |   * @param rankingField
 88 |   * @param targetField
 89 |   * @param targetPolicy
 90 |   * @param isGeo
 91 |   * @param latitude
 92 |   * @param longitude
 93 |   * @param batch
 94 |   * @param partition
 95 |   * @param checkPointPath
 96 |   */
 97 | case class EdgeConfigEntry(override val name: String,
 98 |                            override val dataSourceConfigEntry: DataSourceConfigEntry,
 99 |                            override val dataSinkConfigEntry: DataSinkConfigEntry,
100 |                            override val fields: List[String],
101 |                            override val nebulaFields: List[String],
102 |                            sourceField: String,
103 |                            sourcePolicy: Option[KeyPolicy.Value],
104 |                            rankingField: Option[String],
105 |                            targetField: String,
106 |                            targetPolicy: Option[KeyPolicy.Value],
107 |                            isGeo: Boolean,
108 |                            latitude: Option[String],
109 |                            longitude: Option[String],
110 |                            override val batch: Int,
111 |                            override val partition: Int,
112 |                            override val checkPointPath: Option[String])
113 |     extends SchemaConfigEntry {
114 |   require(
115 |     name.trim.nonEmpty && sourceField.trim.nonEmpty &&
116 |       targetField.trim.nonEmpty && batch > 0)
117 | 
118 |   override def toString: String = {
119 |     if (isGeo) {
120 |       s"Edge name: $name, " +
121 |         s"source: $dataSourceConfigEntry, " +
122 |         s"sink: $dataSinkConfigEntry, " +
123 |         s"latitude: $latitude, " +
124 |         s"longitude: $longitude, " +
125 |         s"source field: $sourceField, " +
126 |         s"source policy: $sourcePolicy, " +
127 |         s"ranking: $rankingField, " +
128 |         s"target field: $targetField, " +
129 |         s"target policy: $targetPolicy, " +
130 |         s"batch: $batch, " +
131 |         s"partition: $partition."
132 |     } else {
133 |       s"Edge name: $name, " +
134 |         s"source: $dataSourceConfigEntry, " +
135 |         s"sink: $dataSinkConfigEntry, " +
136 |         s"source field: $sourceField, " +
137 |         s"source policy: $sourcePolicy, " +
138 |         s"ranking: $rankingField, " +
139 |         s"target field: $targetField, " +
140 |         s"target policy: $targetPolicy, " +
141 |         s"batch: $batch, " +
142 |         s"partition: $partition."
143 |     }
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/config/SinkConfigs.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.config
 8 | 
 9 | /**
10 |   * SinkCategory is used to expression the writer's type.
11 |   */
12 | object SinkCategory extends Enumeration {
13 |   type Type = Value
14 | 
15 |   val CLIENT = Value("CLIENT")
16 |   val SST    = Value("SST")
17 | }
18 | 
19 | class SinkCategory
20 | 
21 | /**
22 |   * DataSinkConfigEntry
23 |   */
24 | sealed trait DataSinkConfigEntry {
25 |   def category: SinkCategory.Value
26 | }
27 | 
28 | /**
29 |   * FileBaseSinkConfigEntry
30 |   */
31 | case class FileBaseSinkConfigEntry(override val category: SinkCategory.Value,
32 |                                    localPath: String,
33 |                                    remotePath: String,
34 |                                    fsName: Option[String])
35 |     extends DataSinkConfigEntry {
36 |   override def toString: String = {
37 |     s"File sink: from ${localPath} to ${fsName.get}${remotePath}"
38 |   }
39 | }
40 | 
41 | /**
42 |   * NebulaSinkConfigEntry use to specified the nebula service's address.
43 |   */
44 | case class NebulaSinkConfigEntry(override val category: SinkCategory.Value, addresses: List[String])
45 |     extends DataSinkConfigEntry {
46 |   override def toString: String = {
47 |     s"Nebula sink addresses: ${addresses.mkString("[", ", ", "]")}"
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/package.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula
 8 | 
 9 | import com.google.common.base.Optional
10 | import com.google.common.util.concurrent.ListenableFuture
11 | import com.vesoft.nebula.exchange.utils.NebulaUtils
12 | 
13 | import scala.collection.mutable.ListBuffer
14 | 
15 | package object exchange {
16 | 
17 |   type GraphSpaceID   = Int
18 |   type PartitionID    = Int
19 |   type TagID          = Int
20 |   type EdgeType       = Int
21 |   type SchemaID       = (TagID, EdgeType)
22 |   type TagVersion     = Long
23 |   type EdgeVersion    = Long
24 |   type SchemaVersion  = (TagVersion, EdgeVersion)
25 |   type VertexID       = Long
26 |   type VertexIDSlice  = String
27 |   type EdgeRank       = Long
28 |   type PropertyNames  = List[String]
29 |   type PropertyValues = List[Any]
30 |   type ProcessResult  = ListBuffer[WriterResult]
31 |   type WriterResult   = ListenableFuture[Optional[Integer]]
32 | 
33 |   case class Vertex(vertexID: VertexIDSlice, values: PropertyValues) {
34 | 
35 |     def propertyValues = values.mkString(", ")
36 | 
37 |     override def toString: String = {
38 |       s"Vertex ID: ${vertexID}, " +
39 |         s"Values: ${values.mkString(", ")}"
40 |     }
41 |   }
42 | 
43 |   case class Vertices(names: PropertyNames,
44 |                       values: List[Vertex],
45 |                       policy: Option[KeyPolicy.Value] = None) {
46 | 
47 |     def propertyNames: String = NebulaUtils.escapePropName(names).mkString(",")
48 | 
49 |     override def toString: String = {
50 |       s"Vertices: " +
51 |         s"Property Names: ${names.mkString(", ")}" +
52 |         s"Vertex Values: ${values.mkString(", ")} " +
53 |         s"with policy ${policy}"
54 |     }
55 |   }
56 | 
57 |   case class Edge(source: VertexIDSlice,
58 |                   destination: VertexIDSlice,
59 |                   ranking: Option[EdgeRank],
60 |                   values: PropertyValues) {
61 | 
62 |     def this(source: VertexIDSlice, destination: VertexIDSlice, values: PropertyValues) = {
63 |       this(source, destination, None, values)
64 |     }
65 | 
66 |     def propertyValues: String = values.mkString(", ")
67 | 
68 |     override def toString: String = {
69 |       s"Edge: ${source}->${destination}@${ranking} values: ${propertyValues}"
70 |     }
71 |   }
72 | 
73 |   case class Edges(names: PropertyNames,
74 |                    values: List[Edge],
75 |                    sourcePolicy: Option[KeyPolicy.Value] = None,
76 |                    targetPolicy: Option[KeyPolicy.Value] = None) {
77 |     def propertyNames: String = NebulaUtils.escapePropName(names).mkString(",")
78 | 
79 |     override def toString: String = {
80 |       "Edges:" +
81 |         s"Property Names: ${names.mkString(", ")}" +
82 |         s"with source policy ${sourcePolicy}" +
83 |         s"with target policy ${targetPolicy}"
84 |     }
85 |   }
86 | 
87 |   object KeyPolicy extends Enumeration {
88 |     type POLICY = Value
89 |     val HASH = Value("hash")
90 |     val UUID = Value("uuid")
91 |   }
92 | 
93 |   case class Offset(start: Long, size: Long)
94 | }
95 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/processor/ReloadProcessor.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.processor
 8 | 
 9 | import com.vesoft.nebula.exchange.{ErrorHandler, GraphProvider}
10 | import com.vesoft.nebula.exchange.config.Configs
11 | import com.vesoft.nebula.exchange.writer.NebulaGraphClientWriter
12 | import org.apache.log4j.Logger
13 | import org.apache.spark.TaskContext
14 | import org.apache.spark.sql.{DataFrame, Row}
15 | import org.apache.spark.util.LongAccumulator
16 | 
17 | import scala.collection.mutable.ArrayBuffer
18 | 
19 | class ReloadProcessor(data: DataFrame,
20 |                       config: Configs,
21 |                       batchSuccess: LongAccumulator,
22 |                       batchFailure: LongAccumulator)
23 |     extends Processor {
24 |   @transient
25 |   private[this] lazy val LOG = Logger.getLogger(this.getClass)
26 | 
27 |   override def process(): Unit = {
28 |     data.foreachPartition(processEachPartition(_))
29 |   }
30 | 
31 |   private def processEachPartition(iterator: Iterator[Row]): Unit = {
32 |     val graphProvider =
33 |       new GraphProvider(config.databaseConfig.getGraphAddress, config.connectionConfig.timeout)
34 | 
35 |     val writer = new NebulaGraphClientWriter(config.databaseConfig,
36 |                                              config.userConfig,
37 |                                              config.rateConfig,
38 |                                              null,
39 |                                              graphProvider)
40 | 
41 |     val errorBuffer = ArrayBuffer[String]()
42 | 
43 |     writer.prepare()
44 |     // batch write
45 |     val startTime = System.currentTimeMillis
46 |     iterator.foreach { row =>
47 |       val failStatement = writer.writeNgql(row.getString(0))
48 |       if (failStatement == null) {
49 |         batchSuccess.add(1)
50 |       } else {
51 |         errorBuffer.append(failStatement)
52 |         batchFailure.add(1)
53 |       }
54 |     }
55 |     if (errorBuffer.nonEmpty) {
56 |       ErrorHandler.save(errorBuffer,
57 |                         s"${config.errorConfig.errorPath}/reload.${TaskContext.getPartitionId()}")
58 |       errorBuffer.clear()
59 |     }
60 |     LOG.info(s"data reload in partition ${TaskContext
61 |       .getPartitionId()} cost ${System.currentTimeMillis() - startTime}ms")
62 |     writer.close()
63 |     graphProvider.close()
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/reader/FileBaseReader.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.exchange.reader
  8 | 
  9 | import com.vesoft.nebula.exchange.config.FileBaseSourceConfigEntry
 10 | import com.vesoft.nebula.exchange.utils.NebulaUtils.DEFAULT_EMPTY_VALUE
 11 | import org.apache.spark.sql.catalyst.encoders.RowEncoder
 12 | import org.apache.spark.sql.types.StructType
 13 | import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 14 | 
 15 | /**
 16 |   * The FileBaseReader is the abstract class for HDFS file reader.
 17 |   *
 18 |   * @param session
 19 |   * @param path
 20 |   */
 21 | abstract class FileBaseReader(val session: SparkSession, val path: String) extends Reader {
 22 | 
 23 |   require(path.trim.nonEmpty)
 24 | 
 25 |   override def close(): Unit = {
 26 |     session.close()
 27 |   }
 28 | }
 29 | 
 30 | /**
 31 |   * The ParquetReader extend the FileBaseReader and support read parquet file from HDFS.
 32 |   *
 33 |   * @param session
 34 |   * @param parquetConfig
 35 |   */
 36 | class ParquetReader(override val session: SparkSession, parquetConfig: FileBaseSourceConfigEntry)
 37 |     extends FileBaseReader(session, parquetConfig.path) {
 38 | 
 39 |   override def read(): DataFrame = {
 40 |     session.read.parquet(path)
 41 |   }
 42 | }
 43 | 
 44 | /**
 45 |   * The ORCReader extend the FileBaseReader and support read orc file from HDFS.
 46 |   *
 47 |   * @param session
 48 |   * @param orcConfig
 49 |   */
 50 | class ORCReader(override val session: SparkSession, orcConfig: FileBaseSourceConfigEntry)
 51 |     extends FileBaseReader(session, orcConfig.path) {
 52 | 
 53 |   override def read(): DataFrame = {
 54 |     session.read.orc(path)
 55 |   }
 56 | }
 57 | 
 58 | /**
 59 |   * The JSONReader extend the FileBaseReader and support read json file from HDFS.
 60 |   *
 61 |   * @param session
 62 |   * @param jsonConfig
 63 |   */
 64 | class JSONReader(override val session: SparkSession, jsonConfig: FileBaseSourceConfigEntry)
 65 |     extends FileBaseReader(session, jsonConfig.path) {
 66 | 
 67 |   override def read(): DataFrame = {
 68 |     session.read.json(path)
 69 |   }
 70 | }
 71 | 
 72 | /**
 73 |   * The CSVReader extend the FileBaseReader and support read csv file from HDFS.
 74 |   * All types of the structure are StringType.
 75 |   *
 76 |   * @param session
 77 |   * @param csvConfig
 78 |   */
 79 | class CSVReader(override val session: SparkSession, csvConfig: FileBaseSourceConfigEntry)
 80 |     extends FileBaseReader(session, csvConfig.path) {
 81 | 
 82 |   override def read(): DataFrame = {
 83 |     session.read
 84 |       .option("delimiter", csvConfig.separator.get)
 85 |       .option("header", csvConfig.header.get)
 86 |       .option("emptyValue", DEFAULT_EMPTY_VALUE)
 87 |       .csv(path)
 88 |   }
 89 | }
 90 | 
 91 | /**
 92 |   * The CustomReader extend the FileBaseReader and support read text file from HDFS.
 93 |   * Transformation is a function convert a line into Row.
 94 |   * The structure of the row should be specified.
 95 |   *
 96 |   * @param session
 97 |   * @param customConfig
 98 |   * @param transformation
 99 |   * @param structType
100 |   */
101 | abstract class CustomReader(override val session: SparkSession,
102 |                             customConfig: FileBaseSourceConfigEntry,
103 |                             transformation: String => Row,
104 |                             filter: Row => Boolean,
105 |                             structType: StructType)
106 |     extends FileBaseReader(session, customConfig.path) {
107 | 
108 |   override def read(): DataFrame = {
109 |     val encoder = RowEncoder.apply(structType)
110 |     session.read
111 |       .text(path)
112 |       .filter(!_.getString(0).isEmpty)
113 |       .map(row => transformation(row.getString(0)))(encoder)
114 |       .filter(filter)
115 |   }
116 | }
117 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/reader/Reader.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.reader
 8 | 
 9 | import com.vesoft.nebula.exchange.Offset
10 | import com.vesoft.nebula.exchange.utils.HDFSUtils
11 | import org.apache.spark.sql.{DataFrame, SparkSession}
12 | 
13 | /**
14 |   * The Reader is used to create a DataFrame from the source, such as Hive or HDFS.
15 |   */
16 | trait Reader extends Serializable {
17 |   def session: SparkSession
18 | 
19 |   def read(): DataFrame
20 | 
21 |   def close(): Unit
22 | }
23 | 
24 | trait CheckPointSupport extends Serializable {
25 | 
26 |   def getOffsets(totalCount: Long,
27 |                  parallel: Int,
28 |                  checkPointPath: Option[String],
29 |                  checkPointNamePrefix: String): List[Offset] = {
30 |     if (totalCount <= 0)
31 |       throw new RuntimeException(s"${checkPointNamePrefix}: return data count<=0")
32 | 
33 |     val batchSizes = List.fill((totalCount % parallel).toInt)(totalCount / parallel + 1) ::: List
34 |       .fill((parallel - totalCount % parallel).toInt)(totalCount / parallel)
35 | 
36 |     val startOffsets = batchSizes.scanLeft(0L)(_ + _).init
37 | 
38 |     val checkPointOffsets = checkPointPath match {
39 |       case Some(path) =>
40 |         val files = Range(0, parallel).map(i => s"${path}/${checkPointNamePrefix}.${i}").toList
41 |         if (files.forall(HDFSUtils.exists))
42 |           files.map(HDFSUtils.getContent(_).trim.toLong).sorted
43 |         else startOffsets
44 |       case _ => startOffsets
45 |     }
46 | 
47 |     if (checkPointOffsets.zip(startOffsets).exists(x => x._1 < x._2))
48 |       throw new RuntimeException(
49 |         s"Check Point file maybe previous. Please delete ${checkPointPath}/${checkPointNamePrefix}.* file")
50 | 
51 |     val eachPartitionLimit = {
52 |       batchSizes
53 |         .zip(startOffsets.zip(checkPointOffsets))
54 |         .map(x => {
55 |           x._1 - (x._2._2 - x._2._1)
56 |         })
57 |     }
58 |     val offsets = checkPointOffsets.zip(eachPartitionLimit).map(x => Offset(x._1, x._2))
59 |     if (offsets.exists(_.size < 0L))
60 |       throw new RuntimeException(
61 |         s"Check point file maybe broken. Please delete ${checkPointPath}/${checkPointNamePrefix}.* file")
62 |     offsets
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/reader/StreamingBaseReader.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.reader
 8 | 
 9 | import com.vesoft.nebula.exchange.config.{KafkaSourceConfigEntry, PulsarSourceConfigEntry}
10 | import org.apache.spark.sql.{DataFrame, SparkSession}
11 | 
12 | /**
13 |   * Spark Streaming
14 |   *
15 |   * @param session
16 |   */
17 | abstract class StreamingBaseReader(override val session: SparkSession) extends Reader {
18 | 
19 |   override def close(): Unit = {
20 |     session.close()
21 |   }
22 | }
23 | 
24 | /**
25 |   *
26 |   * @param session
27 |   * @param kafkaConfig
28 |   */
29 | class KafkaReader(override val session: SparkSession, kafkaConfig: KafkaSourceConfigEntry)
30 |     extends StreamingBaseReader(session) {
31 | 
32 |   require(kafkaConfig.server.trim.nonEmpty && kafkaConfig.topic.trim.nonEmpty)
33 | 
34 |   override def read(): DataFrame = {
35 |     session.readStream
36 |       .format("kafka")
37 |       .option("kafka.bootstrap.servers", kafkaConfig.server)
38 |       .option("subscribe", kafkaConfig.topic)
39 |       .load()
40 |   }
41 | }
42 | 
43 | /**
44 |   *
45 |   * @param session
46 |   * @param pulsarConfig
47 |   */
48 | class PulsarReader(override val session: SparkSession, pulsarConfig: PulsarSourceConfigEntry)
49 |     extends StreamingBaseReader(session) {
50 | 
51 |   override def read(): DataFrame = {
52 |     session.readStream
53 |       .format("pulsar")
54 |       .option("service.url", pulsarConfig.serviceUrl)
55 |       .option("admin.url", pulsarConfig.adminUrl)
56 |       .options(pulsarConfig.options)
57 |       .load()
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/HDFSUtils.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.utils
 8 | 
 9 | import java.io.File
10 | import java.nio.charset.Charset
11 | import org.apache.hadoop.conf.Configuration
12 | import org.apache.hadoop.fs.{FileSystem, Path}
13 | import org.apache.log4j.Logger
14 | import scala.io.Source
15 | 
16 | object HDFSUtils {
17 |   private[this] val LOG = Logger.getLogger(this.getClass)
18 | 
19 |   def getFileSystem(namenode: String = null): FileSystem = {
20 |     val conf = new Configuration()
21 |     if (namenode != null) {
22 |       conf.set("fs.default.name", namenode)
23 |       conf.set("fs.defaultFS", namenode)
24 |     }
25 |     FileSystem.get(conf)
26 |   }
27 | 
28 |   def list(path: String): List[String] = {
29 |     val system = getFileSystem()
30 |     system.listStatus(new Path(path)).map(_.getPath.getName).toList
31 |   }
32 | 
33 |   def exists(path: String): Boolean = {
34 |     val system = getFileSystem()
35 |     system.exists(new Path(path))
36 |   }
37 | 
38 |   def getContent(path: String): String = {
39 |     val system      = getFileSystem()
40 |     val inputStream = system.open(new Path(path))
41 |     Source.fromInputStream(inputStream).mkString
42 |   }
43 | 
44 |   def saveContent(path: String,
45 |                   content: String,
46 |                   charset: Charset = Charset.defaultCharset()): Unit = {
47 |     val system       = getFileSystem()
48 |     val outputStream = system.create(new Path(path))
49 |     try {
50 |       outputStream.write(content.getBytes(charset))
51 |     } finally {
52 |       outputStream.close()
53 |     }
54 |   }
55 | 
56 |   def upload(localPath: String, remotePath: String, namenode: String = null): Unit = {
57 |     try {
58 |       val localFile = new File(localPath)
59 |       if (!localFile.exists() || localFile.length() <= 0) {
60 |         return
61 |       }
62 |     } catch {
63 |       case e: Throwable =>
64 |         LOG.warn("check for empty local file error, but you can ignore this check error. " +
65 |                    "If there is empty sst file in your hdfs, please delete it manually",
66 |                  e)
67 |     }
68 |     val system = getFileSystem(namenode)
69 |     system.copyFromLocalFile(new Path(localPath), new Path(remotePath))
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/KafkaUtils.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.utils
 8 | 
 9 | import com.vesoft.nebula.exchange.{Edge, Vertex}
10 | 
11 | object KafkaUtils {
12 | 
13 |   def writeVertices(vertices: Vertex*): Unit = {}
14 |   def writeEdge(edges: Edge*): Unit          = {}
15 | }
16 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/NebulaUtils.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.exchange.utils
  8 | 
  9 | import com.google.common.primitives.UnsignedLong
 10 | import com.vesoft.nebula.exchange.{MetaProvider, VidType}
 11 | import com.vesoft.nebula.exchange.config.{SchemaConfigEntry, Type}
 12 | import org.apache.commons.codec.digest.MurmurHash2
 13 | import org.apache.log4j.Logger
 14 | 
 15 | import scala.collection.JavaConversions.seqAsJavaList
 16 | import scala.collection.mutable
 17 | import scala.collection.mutable.ListBuffer
 18 | 
 19 | object NebulaUtils {
 20 |   val DEFAULT_EMPTY_VALUE: String = "_NEBULA_EMPTY"
 21 | 
 22 |   private[this] val LOG = Logger.getLogger(this.getClass)
 23 | 
 24 |   def getDataSourceFieldType(sourceConfig: SchemaConfigEntry,
 25 |                              space: String,
 26 |                              metaProvider: MetaProvider): Map[String, Int] = {
 27 |     val nebulaFields = sourceConfig.nebulaFields
 28 |     val sourceFields = sourceConfig.fields
 29 |     val label        = sourceConfig.name
 30 | 
 31 |     var nebulaSchemaMap: Map[String, Integer] = null
 32 |     val dataType: Type.Value                  = metaProvider.getLabelType(space, label)
 33 |     if (dataType == null) {
 34 |       throw new IllegalArgumentException(s"label $label does not exist.")
 35 |     }
 36 |     if (dataType == Type.VERTEX) {
 37 |       nebulaSchemaMap = metaProvider.getTagSchema(space, label)
 38 |     } else {
 39 |       nebulaSchemaMap = metaProvider.getEdgeSchema(space, label)
 40 |     }
 41 | 
 42 |     val sourceSchemaMap: mutable.Map[String, Int] = mutable.HashMap[String, Int]()
 43 |     for (i <- nebulaFields.indices) {
 44 |       sourceSchemaMap.put(sourceFields.get(i), nebulaSchemaMap(nebulaFields.get(i)))
 45 |     }
 46 |     sourceSchemaMap.toMap
 47 |   }
 48 | 
 49 |   def isNumic(str: String): Boolean = {
 50 |     val newStr: String = if (str.startsWith("-")) {
 51 |       str.substring(1)
 52 |     } else { str }
 53 | 
 54 |     for (char <- newStr.toCharArray) {
 55 |       if (!Character.isDigit(char)) return false
 56 |     }
 57 |     true
 58 |   }
 59 | 
 60 |   def escapeUtil(str: String): String = {
 61 |     var s = str
 62 |     if (s.contains("\\")) {
 63 |       s = s.replaceAll("\\\\", "\\\\\\\\")
 64 |     }
 65 |     if (s.contains("\t")) {
 66 |       s = s.replaceAll("\t", "\\\\t")
 67 |     }
 68 |     if (s.contains("\n")) {
 69 |       s = s.replaceAll("\n", "\\\\n")
 70 |     }
 71 |     if (s.contains("\"")) {
 72 |       s = s.replaceAll("\"", "\\\\\"")
 73 |     }
 74 |     if (s.contains("\'")) {
 75 |       s = s.replaceAll("\'", "\\\\'")
 76 |     }
 77 |     if (s.contains("\r")) {
 78 |       s = s.replaceAll("\r", "\\\\r")
 79 |     }
 80 |     if (s.contains("\b")) {
 81 |       s = s.replaceAll("\b", "\\\\b")
 82 |     }
 83 |     s
 84 |   }
 85 | 
 86 |   def getPartitionId(id: String, partitionSize: Int, vidType: VidType.Value): Int = {
 87 |     val hashValue: Long = if (vidType == VidType.STRING) {
 88 |       MurmurHash2.hash64(id.getBytes, id.length, 0xc70f6907)
 89 |     } else {
 90 |       id.toLong
 91 |     }
 92 |     val unsignedValue = UnsignedLong.fromLongBits(hashValue)
 93 |     val partSize      = UnsignedLong.fromLongBits(partitionSize)
 94 |     unsignedValue.mod(partSize).intValue + 1
 95 |   }
 96 | 
 97 |   def escapePropName(nebulaFields: List[String]): List[String] = {
 98 |     val propNames: ListBuffer[String] = new ListBuffer[String]
 99 |     for (key <- nebulaFields) {
100 |       val sb = new StringBuilder()
101 |       sb.append("`")
102 |       sb.append(key)
103 |       sb.append("`")
104 |       propNames.append(sb.toString())
105 |     }
106 |     propNames.toList
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/utils/Neo4jUtils.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.utils
 8 | 
 9 | import org.neo4j.driver.Value
10 | 
11 | object Neo4jUtils {
12 | 
13 |   def convertNeo4jData(value: Value): String = {
14 |     value.`type`().name() match {
15 |       case "NULL" => {
16 |         null
17 |       }
18 |       case "STRING" => {
19 |         value.asString()
20 |       }
21 |       case "INTEGER" => {
22 |         value.asInt().toString
23 |       }
24 |       case "FLOAT" | "DOUBLE" => {
25 |         value.asDouble().toString
26 |       }
27 |       case "BOOLEAN" => {
28 |         value.asBoolean().toString
29 |       }
30 |       case "DATE" | "LOCAL_DATE" => {
31 |         value.asLocalDate().toString
32 |       }
33 |       case "DATE_TIME" | "LOCAL_DATE_TIME" => {
34 |         value.asLocalDateTime().toString
35 |       }
36 |       case "TIME" | "LOCAL_TIME" => {
37 |         value.asLocalTime().toString
38 |       }
39 |       case "BYTES" => {
40 |         new String(value.asByteArray())
41 |       }
42 |       case "LIST" => {
43 |         value.asList().toString
44 |       }
45 |       case "MAP" => {
46 |         value.asMap().toString
47 |       }
48 |       case _ => {
49 |         value.toString
50 |       }
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/writer/FileBaseWriter.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.writer
 8 | 
 9 | import org.rocksdb.{EnvOptions, Options, RocksDB, SstFileWriter}
10 | import org.slf4j.LoggerFactory
11 | 
12 | /**
13 |   * NebulaSSTWriter
14 |   * @param path
15 |   */
16 | class NebulaSSTWriter(path: String) extends Writer {
17 |   require(path.trim.nonEmpty)
18 |   var isOpen = false
19 | 
20 |   private val LOG = LoggerFactory.getLogger(getClass)
21 | 
22 |   try {
23 |     RocksDB.loadLibrary()
24 |     LOG.info("Loading RocksDB successfully")
25 |   } catch {
26 |     case _: Exception =>
27 |       LOG.error("Can't load RocksDB library!")
28 |   }
29 | 
30 |   // TODO More Config ...
31 |   val options = new Options()
32 |     .setCreateIfMissing(true)
33 | 
34 |   val env                   = new EnvOptions()
35 |   var writer: SstFileWriter = _
36 | 
37 |   override def prepare(): Unit = {
38 |     writer = new SstFileWriter(env, options)
39 |     writer.open(path)
40 |     isOpen = true
41 |   }
42 | 
43 |   def write(key: Array[Byte], value: Array[Byte]): Unit = {
44 |     writer.put(key, value)
45 |   }
46 | 
47 |   override def close(): Unit = {
48 |     if (isOpen) {
49 |       writer.finish()
50 |       writer.close()
51 |     }
52 |     options.close()
53 |     env.close()
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/main/scala/com/vesoft/nebula/exchange/writer/Writer.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.exchange.writer
 8 | 
 9 | /**
10 |   *
11 |   */
12 | trait Writer extends Serializable {
13 | 
14 |   def prepare(): Unit
15 | 
16 |   def close()
17 | }
18 | 


--------------------------------------------------------------------------------
/nebula-exchange/src/test/scala/com/vesoft/nebula/exchange/processor/ProcessorSuite.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2021 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package scala.com.vesoft.nebula.exchange.processor
  8 | 
  9 | import com.vesoft.nebula.exchange.processor.Processor
 10 | import com.vesoft.nebula.{Date, DateTime, NullType, Time, Value}
 11 | import com.vesoft.nebula.meta.PropertyType
 12 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 13 | import org.apache.spark.sql.types.{
 14 |   BooleanType,
 15 |   DoubleType,
 16 |   IntegerType,
 17 |   LongType,
 18 |   ShortType,
 19 |   StringType,
 20 |   StructField,
 21 |   StructType
 22 | }
 23 | import org.junit.Test
 24 | 
 25 | class ProcessorSuite extends Processor {
 26 |   val values = List("Bob",
 27 |                     "fixedBob",
 28 |                     12,
 29 |                     200,
 30 |                     1000,
 31 |                     100000,
 32 |                     "2021-01-01",
 33 |                     "2021-01-01T12:00:00",
 34 |                     "12:00:00",
 35 |                     "2021-01-01T12:00:00",
 36 |                     true,
 37 |                     12.01,
 38 |                     22.12,
 39 |                     null)
 40 |   val schema: StructType = StructType(
 41 |     List(
 42 |       StructField("col1", StringType, nullable = true),
 43 |       StructField("col2", StringType, nullable = true),
 44 |       StructField("col3", ShortType, nullable = true),
 45 |       StructField("col4", ShortType, nullable = true),
 46 |       StructField("col5", IntegerType, nullable = true),
 47 |       StructField("col6", LongType, nullable = true),
 48 |       StructField("col7", StringType, nullable = true),
 49 |       StructField("col8", StringType, nullable = true),
 50 |       StructField("col9", StringType, nullable = true),
 51 |       StructField("col10", StringType, nullable = true),
 52 |       StructField("col11", BooleanType, nullable = true),
 53 |       StructField("col12", DoubleType, nullable = true),
 54 |       StructField("col13", DoubleType, nullable = true),
 55 |       StructField("col14", StringType, nullable = true)
 56 |     ))
 57 |   val row = new GenericRowWithSchema(values.toArray, schema)
 58 |   val map = Map(
 59 |     "col1"  -> PropertyType.STRING.getValue,
 60 |     "col2"  -> PropertyType.FIXED_STRING.getValue,
 61 |     "col3"  -> PropertyType.INT8.getValue,
 62 |     "col4"  -> PropertyType.INT16.getValue,
 63 |     "col5"  -> PropertyType.INT32.getValue,
 64 |     "col6"  -> PropertyType.INT64.getValue,
 65 |     "col7"  -> PropertyType.DATE.getValue,
 66 |     "col8"  -> PropertyType.DATETIME.getValue,
 67 |     "col9"  -> PropertyType.TIME.getValue,
 68 |     "col10" -> PropertyType.TIMESTAMP.getValue,
 69 |     "col11" -> PropertyType.BOOL.getValue,
 70 |     "col12" -> PropertyType.DOUBLE.getValue,
 71 |     "col13" -> PropertyType.FLOAT.getValue,
 72 |     "col14" -> PropertyType.STRING.getValue
 73 |   )
 74 | 
 75 |   @Test
 76 |   def extraValueForClientSuite(): Unit = {
 77 |     assert(extraValueForClient(row, "col1", map).toString.equals("\"Bob\""))
 78 |     assert(extraValueForClient(row, "col2", map).toString.equals("\"fixedBob\""))
 79 |     assert(extraValueForClient(row, "col3", map).toString.toInt == 12)
 80 |     assert(extraValueForClient(row, "col4", map).toString.toInt == 200)
 81 |     assert(extraValueForClient(row, "col5", map).toString.toInt == 1000)
 82 |     assert(extraValueForClient(row, "col6", map).toString.toLong == 100000)
 83 |     assert(extraValueForClient(row, "col7", map).toString.equals("date(\"2021-01-01\")"))
 84 |     assert(
 85 |       extraValueForClient(row, "col8", map).toString.equals("datetime(\"2021-01-01T12:00:00\")"))
 86 |     assert(extraValueForClient(row, "col9", map).toString.equals("time(\"12:00:00\")"))
 87 |     assert(
 88 |       extraValueForClient(row, "col10", map).toString.equals("timestamp(\"2021-01-01T12:00:00\")"))
 89 |     assert(extraValueForClient(row, "col11", map).toString.toBoolean)
 90 |     assert(extraValueForClient(row, "col12", map).toString.toDouble > 12.00)
 91 |     assert(extraValueForClient(row, "col13", map).toString.toDouble > 22.10)
 92 |     assert(extraValueForClient(row, "col14", map) == null)
 93 |   }
 94 | 
 95 |   @Test
 96 |   def extraValueForSSTSuite(): Unit = {
 97 |     assert(extraValueForSST(row, "col1", map).toString.equals("Bob"))
 98 |     assert(extraValueForSST(row, "col2", map).toString.equals("fixedBob"))
 99 |     assert(extraValueForSST(row, "col3", map).toString.toInt == 12)
100 |     assert(extraValueForSST(row, "col4", map).toString.toInt == 200)
101 |     assert(extraValueForSST(row, "col5", map).toString.toInt == 1000)
102 |     assert(extraValueForSST(row, "col6", map).toString.toLong == 100000)
103 |     val date = new Date(2021, 1, 1)
104 |     assert(extraValueForSST(row, "col7", map).equals(date))
105 |     val datetime = new DateTime(2021, 1, 1, 12, 0, 0, 0)
106 |     assert(extraValueForSST(row, "col8", map).equals(datetime))
107 | 
108 |     val time = new Time(12, 0, 0, 0)
109 |     assert(extraValueForSST(row, "col9", map).equals(time))
110 | 
111 |     try {
112 |       extraValueForSST(row, "col10", map).toString
113 |     } catch {
114 |       case e: Exception => assert(true)
115 |     }
116 | 
117 |     assert(extraValueForSST(row, "col11", map).toString.toBoolean)
118 |     assert(extraValueForSST(row, "col12", map).toString.toDouble > 12.0)
119 |     assert(extraValueForSST(row, "col13", map).toString.toFloat > 22.10)
120 | 
121 |     val nullValue = new Value()
122 |     nullValue.setNVal(NullType.__NULL__)
123 |     assert(extraValueForSST(row, "col14", map).equals(nullValue))
124 |   }
125 | 
126 |   /**
127 |     * process dataframe to vertices or edges
128 |     */
129 |   override def process(): Unit = ???
130 | 
131 | }
132 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 
25 | # build target
26 | target/
27 | 
28 | # IDE
29 | .idea/
30 | .eclipse/
31 | *.iml
32 | 
33 | spark-importer.ipr
34 | spark-importer.iws
35 | 
36 | .DS_Store
37 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/README.md:
--------------------------------------------------------------------------------
  1 | # Nebula Spark Connector 2.0
  2 | [中文版](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-spark-connector/README_CN.md)
  3 | 
  4 | ## Introduction
  5 | 
  6 | Nebula Spark Connector 2.0 only supports Nebula Graph 2.x. If you are using Nebula Graph v1.x, please use [Nebula Spark Connector v1.0](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools/nebula-spark) .
  7 | 
  8 | ## How to Compile
  9 | 
 10 | 1. Package Nebula Spark Connector 2.0.
 11 | 
 12 |     ```bash
 13 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
 14 |     $ cd nebula-spark-utils/nebula-spark-connector
 15 |     $ mvn clean package -Dmaven.test.skip=true -Dgpg.skip -Dmaven.javadoc.skip=true
 16 |     ```
 17 | 
 18 |     After the packaging, you can see the newly generated nebula-spark-connector-2.0.0.jar under the nebula-spark-utils/nebula-spark-connector/target/ directory.
 19 | 
 20 | ## New Features (Compared to Nebula Spark Connector 1.0)
 21 | * Supports more connection configurations, such as timeout, connectionRetry, and executionRetry.
 22 | * Supports more data configurations, such as whether vertexId can be written as vertex's property, whether srcId, dstId and rank can be written as edge's properties.
 23 | * Spark Reader Supports non-property, all-property, and specific-properties read.
 24 | * Spark Reader Supports reading data from Nebula Graph to Graphx as VertexRD and EdgeRDD, it also supports String type vertexId.
 25 | * Nebula Spark Connector 2.0 uniformly uses SparkSQL's DataSourceV2 for data source expansion.
 26 | * Nebula Spark Connector 2.1.0 support UPDATE write mode to NebulaGraph, see [Update Vertex](https://docs.nebula-graph.io/2.0.1/3.ngql-guide/12.vertex-statements/2.update-vertex/) .
 27 | 
 28 | ## How to Use
 29 | 
 30 |   Write DataFrame `INSERT` into Nebula Graph as Vertices:
 31 |   ```
 32 |     val config = NebulaConnectionConfig
 33 |       .builder()
 34 |       .withMetaAddress("127.0.0.1:9559")
 35 |       .withGraphAddress("127.0.0.1:9669")
 36 |       .build()
 37 |     val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig
 38 |       .builder()
 39 |       .withSpace("test")
 40 |       .withTag("person")
 41 |       .withVidField("id")
 42 |       .withVidAsProp(true)
 43 |       .withBatch(1000)
 44 |       .build()
 45 |     df.write.nebula(config, nebulaWriteVertexConfig).writeVertices()
 46 |   ```
 47 |   Write DataFrame `UPDATE` into Nebula Graph as Vertices:
 48 |   ```
 49 |     val config = NebulaConnectionConfig
 50 |       .builder()
 51 |       .withMetaAddress("127.0.0.1:9559")
 52 |       .withGraphAddress("127.0.0.1:9669")
 53 |       .build()
 54 |     val nebulaWriteVertexConfig: WriteNebulaVertexConfig = WriteNebulaVertexConfig
 55 |       .builder()
 56 |       .withSpace("test")
 57 |       .withTag("person")
 58 |       .withVidField("id")
 59 |       .withVidAsProp(true)
 60 |       .withBatch(1000)
 61 |       .withWriteMode(WriteMode.UPDATE)
 62 |       .build()
 63 |     df.write.nebula(config, nebulaWriteVertexConfig).writeVertices()
 64 |   ```
 65 |   Read vertices from Nebula Graph: 
 66 |   ```
 67 |     val config = NebulaConnectionConfig
 68 |       .builder()
 69 |       .withMetaAddress("127.0.0.1:9559")
 70 |       .withConenctionRetry(2)
 71 |       .build()
 72 |     val nebulaReadVertexConfig: ReadNebulaConfig = ReadNebulaConfig
 73 |       .builder()
 74 |       .withSpace("exchange")
 75 |       .withLabel("person")
 76 |       .withNoColumn(false)
 77 |       .withReturnCols(List("birthday"))
 78 |       .withLimit(10)
 79 |       .withPartitionNum(10)
 80 |       .build()
 81 |     val vertex = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF()
 82 |   ```
 83 | 
 84 |   Read vertices and edges from Nebula Graph to construct Graphx's graph:
 85 |   ```
 86 |     val config = NebulaConnectionConfig
 87 |       .builder()
 88 |       .withMetaAddress("127.0.0.1:9559")
 89 |       .build()
 90 |     val nebulaReadVertexConfig = ReadNebulaConfig
 91 |       .builder()
 92 |       .withSpace("exchange")
 93 |       .withLabel("person")
 94 |       .withNoColumn(false)
 95 |       .withReturnCols(List("birthday"))
 96 |       .withLimit(10)
 97 |       .withPartitionNum(10)
 98 |       .build()
 99 |     val nebulaReadEdgeConfig = ReadNebulaConfig
100 |       .builder()
101 |       .withSpace("exchange")
102 |       .withLabel("knows1")
103 |       .withNoColumn(false)
104 |       .withReturnCols(List("timep"))
105 |       .withLimit(10)
106 |       .withPartitionNum(10)
107 |       .build()
108 | 
109 |     val vertex = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToGraphx()
110 |     val edgeRDD = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToGraphx()
111 |     val graph = Graph(vertexRDD, edgeRDD)
112 |   ```
113 |   After getting Graphx's Graph, you can develop graph algorithms in Graphx like [Nebula-Spark-Algorithm](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools/nebula-algorithm).
114 | 
115 | For more information on usage, please refer to [Example](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/example/src/main/scala/com/vesoft/nebula/examples/connector).
116 | 
117 | ## How to Contribute
118 | 
119 | Nebula Spark Connector 2.0 is a completely opensource project, opensource enthusiasts are welcome to participate in the following ways:
120 | 
121 | - Go to [Nebula Graph Forum](https://discuss.nebula-graph.com.cn/ "go to“Nebula Graph Forum") to discuss with other users. You can raise your own questions, help others' problems, share your thoughts.
122 | - Write or improve documents.
123 | - Submit code to add new features or fix bugs.
124 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/README_CN.md:
--------------------------------------------------------------------------------
  1 | # 欢迎使用 Nebula Spark Connector 2.0
  2 | [English](https://github.com/vesoft-inc/nebula-spark-utils/blob/master/nebula-spark-connector/README.md)
  3 | ## 介绍
  4 | 
  5 | Nebula Spark Connector 2.0 仅支持 Nebula Graph 2.x。如果您正在使用 Nebula Graph v1.x，请使用 [Nebula Spark Connector v1.0](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools)。
  6 | 
  7 | ## 如何编译
  8 | 
  9 | 1. 编译打包 Nebula Spark Connector 2.0。
 10 | 
 11 |     ```bash
 12 |     $ git clone https://github.com/vesoft-inc/nebula-spark-utils.git
 13 |     $ cd nebula-spark-utils/nebula-spark-connector
 14 |     $ mvn clean package -Dmaven.test.skip=true -Dgpg.skip -Dmaven.javadoc.skip=true
 15 |     ```
 16 | 
 17 |     编译打包完成后，可以在 nebula-spark-utils/nebula-spark-connector/target/ 目录下看到 nebula-spark-connector-2.0.0.jar 文件。
 18 | 
 19 | ## 特性
 20 | 
 21 | * 提供了更多连接配置项，如超时时间、连接重试次数、执行重试次数
 22 | * 提供了更多数据配置项，如写入数据时是否将 vertexId 同时作为属性写入、是否将 srcId、dstId、rank 等同时作为属性写入
 23 | * Spark Reader 支持无属性读取，支持全属性读取
 24 | * Spark Reader 支持将 Nebula Graph 数据读取成 Graphx 的 VertexRD 和 EdgeRDD，支持非 Long 型 vertexId
 25 | * Nebula Spark Connector 2.0 统一了 SparkSQL 的扩展数据源，统一采用 DataSourceV2 进行 Nebula Graph 数据扩展
 26 | * Nebula Spark Connector 2.1.0 增加了 UPDATE 写入模式，相关说明参考[Update Vertex](https://docs.nebula-graph.com.cn/2.0.1/3.ngql-guide/12.vertex-statements/2.update-vertex/) 。
 27 | 
 28 | ## 使用说明
 29 | 
 30 |   将 DataFrame 作为点 `INSERT` 写入 Nebula Graph :
 31 |   ```
 32 |     val config = NebulaConnectionConfig
 33 |       .builder()
 34 |       .withMetaAddress("127.0.0.1:9559")
 35 |       .withGraphAddress("127.0.0.1:9669")
 36 |       .build()
 37 |     val nebulaWriteVertexConfig = WriteNebulaVertexConfig
 38 |       .builder()
 39 |       .withSpace("test")
 40 |       .withTag("person")
 41 |       .withVidField("id")
 42 |       .withVidAsProp(true)
 43 |       .withBatch(1000)
 44 |       .build()
 45 |     df.write.nebula(config, nebulaWriteVertexConfig).writeVertices()
 46 |   ```
 47 |   将 DataFrame 作为点 `UPDATE` 写入 Nebula Graph :
 48 |   ```
 49 |     val config = NebulaConnectionConfig
 50 |       .builder()
 51 |       .withMetaAddress("127.0.0.1:9559")
 52 |       .withGraphAddress("127.0.0.1:9669")
 53 |       .build()
 54 |     val nebulaWriteVertexConfig = WriteNebulaVertexConfig
 55 |       .builder()
 56 |       .withSpace("test")
 57 |       .withTag("person")
 58 |       .withVidField("id")
 59 |       .withVidAsProp(true)
 60 |       .withBatch(1000)
 61 |       .withWriteMode(WriteMode.UPDATE)
 62 |       .build()
 63 |     df.write.nebula(config, nebulaWriteVertexConfig).writeVertices()
 64 |   ```
 65 | 
 66 |   读取 Nebula Graph 的点数据: 
 67 |   ```
 68 |     val config = NebulaConnectionConfig
 69 |       .builder()
 70 |       .withMetaAddress("127.0.0.1:9559")
 71 |       .withConenctionRetry(2)
 72 |       .build()
 73 |     val nebulaReadVertexConfig = ReadNebulaConfig
 74 |       .builder()
 75 |       .withSpace("exchange")
 76 |       .withLabel("person")
 77 |       .withNoColumn(false)
 78 |       .withReturnCols(List("birthday"))
 79 |       .withLimit(10)
 80 |       .withPartitionNum(10)
 81 |       .build()
 82 |     val vertex = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToDF()
 83 |   ```
 84 | 
 85 |   读取 Nebula Graph 的点边数据构造 Graphx 的图：
 86 |   ```
 87 |     val config = NebulaConnectionConfig
 88 |       .builder()
 89 |       .withMetaAddress("127.0.0.1:9559")
 90 |       .withConenctionRetry(2)
 91 |       .build()
 92 |     val nebulaReadVertexConfig = ReadNebulaConfig
 93 |       .builder()
 94 |       .withSpace("exchange")
 95 |       .withLabel("person")
 96 |       .withNoColumn(false)
 97 |       .withReturnCols(List("birthday"))
 98 |       .withLimit(10)
 99 |       .withPartitionNum(10)
100 |       .build()
101 | 
102 |     val nebulaReadEdgeConfig = ReadNebulaConfig
103 |       .builder()
104 |       .withSpace("exchange")
105 |       .withLabel("knows1")
106 |       .withNoColumn(false)
107 |       .withReturnCols(List("timep"))
108 |       .withLimit(10)
109 |       .withPartitionNum(10)
110 |       .build()
111 | 
112 |     val vertex = spark.read.nebula(config, nebulaReadVertexConfig).loadVerticesToGraphx()
113 |     val edgeRDD = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToGraphx()
114 |     val graph = Graph(vertexRDD, edgeRDD)
115 |   ```
116 |   得到 Graphx 的 Graph 之后，可以根据 [Nebula-Spark-Algorithm](https://github.com/vesoft-inc/nebula-java/tree/v1.0/tools/nebula-algorithm) 的示例在 Graphx 框架中进行算法开发。
117 | 
118 | 更多使用示例请参考 [Example](https://github.com/vesoft-inc/nebula-spark-utils/tree/master/example/src/main/scala/com/vesoft/nebula/examples/connector) 。
119 | 
120 | ## 贡献
121 | 
122 | Nebula Spark Connector 2.0 是一个完全开源的项目，欢迎开源爱好者通过以下方式参与：
123 | 
124 | - 前往 [Nebula Graph 论坛](https://discuss.nebula-graph.com.cn/ "点击前往“Nebula Graph 论坛") 上参与 Issue 讨论，如答疑、提供想法或者报告无法解决的问题
125 | - 撰写或改进文档
126 | - 提交优化代码
127 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaDataSource.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector
  8 | 
  9 | import java.util.Map.Entry
 10 | import java.util.Optional
 11 | 
 12 | import com.vesoft.nebula.connector.exception.IllegalOptionException
 13 | import com.vesoft.nebula.connector.reader.{NebulaDataSourceEdgeReader, NebulaDataSourceVertexReader}
 14 | import com.vesoft.nebula.connector.writer.{NebulaDataSourceEdgeWriter, NebulaDataSourceVertexWriter}
 15 | import org.apache.spark.sql.SaveMode
 16 | import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 17 | import org.apache.spark.sql.sources.DataSourceRegister
 18 | import org.apache.spark.sql.sources.v2.reader.DataSourceReader
 19 | import org.apache.spark.sql.sources.v2.writer.DataSourceWriter
 20 | import org.apache.spark.sql.sources.v2.{DataSourceOptions, DataSourceV2, ReadSupport, WriteSupport}
 21 | import org.apache.spark.sql.types.StructType
 22 | import org.slf4j.LoggerFactory
 23 | 
 24 | import scala.collection.JavaConversions.iterableAsScalaIterable
 25 | 
 26 | class NebulaDataSource
 27 |     extends DataSourceV2
 28 |     with ReadSupport
 29 |     with WriteSupport
 30 |     with DataSourceRegister {
 31 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 32 | 
 33 |   /**
 34 |     * The string that represents the format that nebula data source provider uses.
 35 |     */
 36 |   override def shortName(): String = "nebula"
 37 | 
 38 |   /**
 39 |     * Creates a {@link DataSourceReader} to scan the data from Nebula Graph.
 40 |     */
 41 |   override def createReader(options: DataSourceOptions): DataSourceReader = {
 42 |     val nebulaOptions = getNebulaOptions(options, OperaType.READ)
 43 |     val dataType      = nebulaOptions.dataType
 44 | 
 45 |     LOG.info("create reader")
 46 |     LOG.info(s"options ${options.asMap()}")
 47 | 
 48 |     if (DataTypeEnum.VERTEX == DataTypeEnum.withName(dataType)) {
 49 |       new NebulaDataSourceVertexReader(nebulaOptions)
 50 |     } else {
 51 |       new NebulaDataSourceEdgeReader(nebulaOptions)
 52 |     }
 53 |   }
 54 | 
 55 |   /**
 56 |     * Creates an optional {@link DataSourceWriter} to save the data to Nebula Graph.
 57 |     */
 58 |   override def createWriter(writeUUID: String,
 59 |                             schema: StructType,
 60 |                             mode: SaveMode,
 61 |                             options: DataSourceOptions): Optional[DataSourceWriter] = {
 62 | 
 63 |     val nebulaOptions = getNebulaOptions(options, OperaType.WRITE)
 64 |     val dataType      = nebulaOptions.dataType
 65 |     if (mode == SaveMode.Ignore || mode == SaveMode.ErrorIfExists) {
 66 |       LOG.warn(s"Currently do not support mode")
 67 |     }
 68 | 
 69 |     LOG.info("create writer")
 70 |     LOG.info(s"options ${options.asMap()}")
 71 | 
 72 |     if (DataTypeEnum.VERTEX == DataTypeEnum.withName(dataType)) {
 73 |       val vertexFiled = nebulaOptions.vertexField
 74 |       val vertexIndex: Int = {
 75 |         var index: Int = -1
 76 |         for (i <- schema.fields.indices) {
 77 |           if (schema.fields(i).name.equals(vertexFiled)) {
 78 |             index = i
 79 |           }
 80 |         }
 81 |         if (index < 0) {
 82 |           throw new IllegalOptionException(
 83 |             s" vertex field ${vertexFiled} does not exist in dataframe")
 84 |         }
 85 |         index
 86 |       }
 87 |       Optional.of(new NebulaDataSourceVertexWriter(nebulaOptions, vertexIndex, schema))
 88 |     } else {
 89 |       val srcVertexFiled = nebulaOptions.srcVertexField
 90 |       val dstVertexField = nebulaOptions.dstVertexField
 91 |       val rankExist      = !nebulaOptions.rankField.isEmpty
 92 |       val edgeFieldsIndex = {
 93 |         var srcIndex: Int  = -1
 94 |         var dstIndex: Int  = -1
 95 |         var rankIndex: Int = -1
 96 |         for (i <- schema.fields.indices) {
 97 |           if (schema.fields(i).name.equals(srcVertexFiled)) {
 98 |             srcIndex = i
 99 |           }
100 |           if (schema.fields(i).name.equals(dstVertexField)) {
101 |             dstIndex = i
102 |           }
103 |           if (rankExist) {
104 |             if (schema.fields(i).name.equals(nebulaOptions.rankField)) {
105 |               rankIndex = i
106 |             }
107 |           }
108 |         }
109 |         // check src filed and dst field
110 |         if (srcIndex < 0 || dstIndex < 0) {
111 |           throw new IllegalOptionException(
112 |             s" srcVertex field ${srcVertexFiled} or dstVertex field ${dstVertexField} do not exist in dataframe")
113 |         }
114 |         // check rank field
115 |         if (rankExist && rankIndex < 0) {
116 |           throw new IllegalOptionException(s"rank field does not exist in dataframe")
117 |         }
118 | 
119 |         if (!rankExist) {
120 |           (srcIndex, dstIndex, Option.empty)
121 |         } else {
122 |           (srcIndex, dstIndex, Option(rankIndex))
123 |         }
124 | 
125 |       }
126 |       Optional.of(
127 |         new NebulaDataSourceEdgeWriter(nebulaOptions,
128 |                                        edgeFieldsIndex._1,
129 |                                        edgeFieldsIndex._2,
130 |                                        edgeFieldsIndex._3,
131 |                                        schema))
132 |     }
133 |   }
134 | 
135 |   /**
136 |     * construct nebula options with DataSourceOptions
137 |     */
138 |   def getNebulaOptions(options: DataSourceOptions, operateType: OperaType.Value): NebulaOptions = {
139 |     var parameters: Map[String, String] = Map()
140 |     for (entry: Entry[String, String] <- options.asMap().entrySet) {
141 |       parameters += (entry.getKey -> entry.getValue)
142 |     }
143 |     val nebulaOptions = new NebulaOptions(CaseInsensitiveMap(parameters))(operateType)
144 |     nebulaOptions
145 |   }
146 | }
147 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaEnum.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector
 8 | 
 9 | object DataTypeEnum extends Enumeration {
10 | 
11 |   type DataType = Value
12 |   val VERTEX = Value("vertex")
13 |   val EDGE   = Value("edge")
14 | 
15 |   def validDataType(dataType: String): Boolean = {
16 |     dataType.equalsIgnoreCase(VERTEX.toString) || dataType.equalsIgnoreCase(EDGE.toString)
17 |   }
18 | }
19 | 
20 | object KeyPolicy extends Enumeration {
21 | 
22 |   type POLICY = Value
23 |   val HASH = Value("hash")
24 |   val UUID = Value("uuid")
25 | }
26 | 
27 | object OperaType extends Enumeration {
28 | 
29 |   type Operation = Value
30 |   val READ  = Value("read")
31 |   val WRITE = Value("write")
32 | }
33 | 
34 | object WriteMode extends Enumeration {
35 | 
36 |   type Mode = Value
37 |   val INSERT = Value("insert")
38 |   val UPDATE = Value("update")
39 |   val DELETE = Value("delete")
40 | }
41 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaUtils.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector
  8 | 
  9 | import com.vesoft.nebula.client.graph.data.{DateTimeWrapper, DateWrapper, TimeWrapper}
 10 | import com.vesoft.nebula.meta.{ColumnDef, ColumnTypeDef, PropertyType}
 11 | import org.apache.spark.sql.Row
 12 | import org.apache.spark.sql.catalyst.InternalRow
 13 | import org.apache.spark.sql.types.{
 14 |   BooleanType,
 15 |   DataType,
 16 |   DoubleType,
 17 |   FloatType,
 18 |   IntegerType,
 19 |   LongType,
 20 |   NullType,
 21 |   StringType,
 22 |   StructType,
 23 |   TimestampType
 24 | }
 25 | import org.apache.spark.unsafe.types.UTF8String
 26 | import org.slf4j.LoggerFactory
 27 | 
 28 | object NebulaUtils {
 29 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 30 | 
 31 |   var nebulaOptions: NebulaOptions    = _
 32 |   var parameters: Map[String, String] = Map()
 33 | 
 34 |   /**
 35 |     * convert nebula data type to spark sql data type
 36 |     */
 37 |   def convertDataType(columnTypeDef: ColumnTypeDef): DataType = {
 38 | 
 39 |     columnTypeDef.getType match {
 40 |       case PropertyType.VID | PropertyType.INT8 | PropertyType.INT16 | PropertyType.INT32 |
 41 |           PropertyType.INT64 =>
 42 |         LongType
 43 |       case PropertyType.BOOL                        => BooleanType
 44 |       case PropertyType.FLOAT | PropertyType.DOUBLE => DoubleType
 45 |       case PropertyType.TIMESTAMP                   => LongType
 46 |       case PropertyType.FIXED_STRING | PropertyType.STRING | PropertyType.DATE | PropertyType.TIME |
 47 |           PropertyType.DATETIME =>
 48 |         StringType
 49 |       case PropertyType.UNKNOWN => throw new IllegalArgumentException("unsupported data type")
 50 |     }
 51 |   }
 52 | 
 53 |   def getColDataType(columnDefs: List[ColumnDef], columnName: String): DataType = {
 54 |     for (columnDef <- columnDefs) {
 55 |       if (columnName.equals(new String(columnDef.getName))) {
 56 |         return convertDataType(columnDef.getType)
 57 |       }
 58 |     }
 59 |     throw new IllegalArgumentException(s"column $columnName does not exist in schema")
 60 |   }
 61 | 
 62 |   type NebulaValueGetter = (Any, InternalRow, Int) => Unit
 63 | 
 64 |   def makeGetters(schema: StructType): Array[NebulaValueGetter] = {
 65 |     schema.fields.map(field => makeGetter(field.dataType))
 66 |   }
 67 | 
 68 |   private def makeGetter(dataType: DataType): NebulaValueGetter = {
 69 |     dataType match {
 70 |       case BooleanType =>
 71 |         (prop: Any, row: InternalRow, pos: Int) =>
 72 |           row.setBoolean(pos, prop.asInstanceOf[Boolean])
 73 |       case TimestampType | LongType =>
 74 |         (prop: Any, row: InternalRow, pos: Int) =>
 75 |           row.setLong(pos, prop.asInstanceOf[Long])
 76 |       case FloatType | DoubleType =>
 77 |         (prop: Any, row: InternalRow, pos: Int) =>
 78 |           row.setDouble(pos, prop.asInstanceOf[Double])
 79 |       case IntegerType =>
 80 |         (prop: Any, row: InternalRow, pos: Int) =>
 81 |           row.setInt(pos, prop.asInstanceOf[Int])
 82 |       case _ =>
 83 |         (prop: Any, row: InternalRow, pos: Int) =>
 84 |           if (prop.isInstanceOf[DateTimeWrapper]) {
 85 |             row.update(pos,
 86 |                        UTF8String.fromString(prop.asInstanceOf[DateTimeWrapper].getUTCDateTimeStr))
 87 |           } else if (prop.isInstanceOf[TimeWrapper]) {
 88 |             row.update(pos, UTF8String.fromString(prop.asInstanceOf[TimeWrapper].getUTCTimeStr))
 89 |           } else {
 90 |             row.update(pos, UTF8String.fromString(String.valueOf(prop)))
 91 |           }
 92 |     }
 93 |   }
 94 | 
 95 |   def isNumic(str: String): Boolean = {
 96 |     val newStr: String = if (str.startsWith("-")) {
 97 |       str.substring(1)
 98 |     } else { str }
 99 | 
100 |     for (char <- newStr.toCharArray) {
101 |       if (!Character.isDigit(char)) return false
102 |     }
103 |     true
104 |   }
105 | 
106 |   def escapeUtil(str: String): String = {
107 |     var s = str
108 |     if (s.contains("\\")) {
109 |       s = s.replaceAll("\\\\", "\\\\\\\\")
110 |     }
111 |     if (s.contains("\t")) {
112 |       s = s.replaceAll("\t", "\\\\t")
113 |     }
114 |     if (s.contains("\n")) {
115 |       s = s.replaceAll("\n", "\\\\n")
116 |     }
117 |     if (s.contains("\"")) {
118 |       s = s.replaceAll("\"", "\\\\\"")
119 |     }
120 |     if (s.contains("\'")) {
121 |       s = s.replaceAll("\'", "\\\\'")
122 |     }
123 |     if (s.contains("\r")) {
124 |       s = s.replaceAll("\r", "\\\\r")
125 |     }
126 |     if (s.contains("\b")) {
127 |       s = s.replaceAll("\b", "\\\\b")
128 |     }
129 |     s
130 |   }
131 | 
132 | }
133 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/PartitionUtils.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector
 8 | 
 9 | import scala.collection.mutable.ListBuffer
10 | 
11 | object PartitionUtils {
12 |   def getScanParts(index: Int, nebulaTotalPart: Int, sparkPartitionNum: Int): List[Integer] = {
13 |     val scanParts   = new ListBuffer[Integer]
14 |     var currentPart = index
15 |     while (currentPart <= nebulaTotalPart) {
16 |       scanParts.append(currentPart)
17 |       currentPart += sparkPartitionNum
18 |     }
19 |     scanParts.toList
20 |   }
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/Template.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector
 8 | 
 9 | object NebulaTemplate {
10 | 
11 |   private[connector] val BATCH_INSERT_TEMPLATE               = "INSERT %s `%s`(%s) VALUES %s"
12 |   private[connector] val VERTEX_VALUE_TEMPLATE               = "%s: (%s)"
13 |   private[connector] val VERTEX_VALUE_TEMPLATE_WITH_POLICY   = "%s(\"%s\"): (%s)"
14 |   private[connector] val ENDPOINT_TEMPLATE                   = "%s(\"%s\")"
15 |   private[connector] val EDGE_VALUE_WITHOUT_RANKING_TEMPLATE = "%s->%s: (%s)"
16 |   private[connector] val EDGE_VALUE_TEMPLATE                 = "%s->%s@%d: (%s)"
17 |   private[connector] val USE_TEMPLATE                        = "USE %s"
18 | 
19 |   private[connector] val UPDATE_VERTEX_TEMPLATE = "UPDATE %s ON `%s` %s SET %s"
20 |   private[connector] val UPDATE_EDGE_TEMPLATE   = "UPDATE %s ON `%s` %s->%s@%d SET %s"
21 |   private[connector] val UPDATE_VALUE_TEMPLATE  = "`%s`=%s"
22 | 
23 |   private[connector] val DELETE_VERTEX_TEMPLATE = "DELETE VERTEX %s"
24 |   private[connector] val DELETE_EDGE_TEMPLATE   = "DELETE EDGE `%s` %s"
25 |   private[connector] val EDGE_ENDPOINT_TEMPLATE = "%s->%s@%d"
26 | }
27 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/exception/Exception.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.exception
 8 | 
 9 | import com.facebook.thrift.TException
10 | 
11 | /***
12 |   * An exception thrown if nebula client connects failed.
13 |   */
14 | class GraphConnectException(message: String, cause: Throwable = null)
15 |     extends TException(message, cause)
16 | 
17 | /**
18 |   * An exception thrown if a required option is missing form [[NebulaOptions]]
19 |   */
20 | class IllegalOptionException(message: String, cause: Throwable = null)
21 |     extends IllegalArgumentException(message, cause)
22 | 
23 | /**
24 |   * An exception thrown if nebula execution failed.
25 |   */
26 | class GraphExecuteException(message: String, cause: Throwable = null)
27 |     extends TException(message, cause)
28 | 
29 | /**
30 |   * An exception thrown if nebula execution occur rpc exception.
31 |   */
32 | class NebulaRPCException(message: String, cause: Throwable = null)
33 |     extends RuntimeException(message, cause)
34 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/GraphProvider.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.nebula
 8 | 
 9 | import com.vesoft.nebula.client.graph.NebulaPoolConfig
10 | import com.vesoft.nebula.client.graph.data.{HostAddress, ResultSet}
11 | import com.vesoft.nebula.client.graph.net.{NebulaPool, Session}
12 | import com.vesoft.nebula.connector.connector.Address
13 | import com.vesoft.nebula.connector.exception.GraphConnectException
14 | import org.apache.log4j.Logger
15 | 
16 | import scala.collection.JavaConverters._
17 | import scala.collection.mutable.ListBuffer
18 | 
19 | /**
20 |   * GraphProvider for Nebula Graph Service
21 |   */
22 | class GraphProvider(addresses: List[Address]) extends AutoCloseable with Serializable {
23 |   private[this] lazy val LOG = Logger.getLogger(this.getClass)
24 | 
25 |   @transient val nebulaPoolConfig = new NebulaPoolConfig
26 | 
27 |   @transient val pool: NebulaPool = new NebulaPool
28 |   val address                     = new ListBuffer[HostAddress]()
29 |   for (addr <- addresses) {
30 |     address.append(new HostAddress(addr._1, addr._2))
31 |   }
32 |   nebulaPoolConfig.setMaxConnSize(1)
33 |   pool.init(address.asJava, nebulaPoolConfig)
34 | 
35 |   var session: Session = null
36 | 
37 |   def releaseGraphClient(session: Session): Unit = {
38 |     session.release()
39 |   }
40 | 
41 |   override def close(): Unit = {
42 |     pool.close()
43 |   }
44 | 
45 |   def switchSpace(user: String, password: String, space: String): Boolean = {
46 |     if (session == null) {
47 |       session = pool.getSession(user, password, true)
48 |     }
49 |     val switchStatment = s"use $space"
50 |     LOG.info(s"switch space $space")
51 |     val result = submit(switchStatment)
52 |     result.isSucceeded
53 |   }
54 | 
55 |   def submit(statement: String): ResultSet = {
56 |     if (session == null) {
57 |       LOG.error("graph session is null")
58 |       throw new GraphConnectException("session is null")
59 |     }
60 |     session.execute(statement)
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/nebula/MetaProvider.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.nebula
 8 | 
 9 | import com.vesoft.nebula.client.graph.data.HostAddress
10 | import com.vesoft.nebula.client.meta.MetaClient
11 | import com.vesoft.nebula.connector.connector.Address
12 | import com.vesoft.nebula.connector.DataTypeEnum
13 | import com.vesoft.nebula.meta.{PropertyType, Schema}
14 | 
15 | import scala.collection.JavaConverters._
16 | import scala.collection.mutable
17 | 
18 | class MetaProvider(addresses: List[Address]) extends AutoCloseable {
19 | 
20 |   val metaAddress = addresses.map(address => new HostAddress(address._1, address._2)).asJava
21 |   val client      = new MetaClient(metaAddress)
22 |   client.connect()
23 | 
24 |   def getPartitionNumber(space: String): Int = {
25 |     client.getPartsAlloc(space).size()
26 |   }
27 | 
28 |   def getVidType(space: String): VidType.Value = {
29 |     val vidType = client.getSpace(space).getProperties.getVid_type.getType
30 |     if (vidType == PropertyType.FIXED_STRING) {
31 |       return VidType.STRING
32 |     }
33 |     VidType.INT
34 |   }
35 | 
36 |   def getTag(space: String, tag: String): Schema = {
37 |     client.getTag(space, tag)
38 |   }
39 | 
40 |   def getEdge(space: String, edge: String): Schema = {
41 |     client.getEdge(space, edge)
42 |   }
43 | 
44 |   def getTagSchema(space: String, tag: String): Map[String, Integer] = {
45 |     val tagSchema = client.getTag(space, tag)
46 |     val schema    = new mutable.HashMap[String, Integer]
47 | 
48 |     val columns = tagSchema.getColumns
49 |     for (colDef <- columns.asScala) {
50 |       schema.put(new String(colDef.getName), colDef.getType.getType.getValue)
51 |     }
52 |     schema.toMap
53 |   }
54 | 
55 |   def getEdgeSchema(space: String, edge: String): Map[String, Integer] = {
56 |     val edgeSchema = client.getEdge(space, edge)
57 |     val schema     = new mutable.HashMap[String, Integer]
58 | 
59 |     val columns = edgeSchema.getColumns
60 |     for (colDef <- columns.asScala) {
61 |       schema.put(new String(colDef.getName), colDef.getType.getType.getValue)
62 |     }
63 |     schema.toMap
64 |   }
65 | 
66 |   def getLabelType(space: String, label: String): DataTypeEnum.Value = {
67 |     val tags = client.getTags(space)
68 |     for (tag <- tags.asScala) {
69 |       if (new String(tag.getTag_name).equals(label)) {
70 |         return DataTypeEnum.VERTEX
71 |       }
72 |     }
73 |     val edges = client.getEdges(space)
74 |     for (edge <- edges.asScala) {
75 |       if (new String(edge.getEdge_name).equals(label)) {
76 |         return DataTypeEnum.EDGE
77 |       }
78 |     }
79 |     null
80 |   }
81 | 
82 |   override def close(): Unit = {
83 |     client.close()
84 |   }
85 | 
86 | }
87 | 
88 | object VidType extends Enumeration {
89 |   type Type = Value
90 | 
91 |   val STRING = Value("STRING")
92 |   val INT    = Value("INT")
93 | }
94 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaEdgePartitionReader.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.reader
 8 | 
 9 | import com.vesoft.nebula.client.storage.scan.{ScanEdgeResult, ScanEdgeResultIterator}
10 | import com.vesoft.nebula.connector.NebulaOptions
11 | import org.apache.spark.sql.types.StructType
12 | import org.slf4j.{Logger, LoggerFactory}
13 | import scala.collection.JavaConverters._
14 | 
15 | class NebulaEdgePartitionReader(index: Int, nebulaOptions: NebulaOptions, schema: StructType)
16 |     extends NebulaPartitionReader(index, nebulaOptions, schema) {
17 |   private val LOG: Logger = LoggerFactory.getLogger(this.getClass)
18 | 
19 |   private var responseIterator: ScanEdgeResultIterator = _
20 | 
21 |   override def next(): Boolean = {
22 |     if (dataIterator == null && responseIterator == null && !scanPartIterator.hasNext)
23 |       return false
24 | 
25 |     var continue: Boolean = false
26 |     var break: Boolean    = false
27 |     while ((dataIterator == null || !dataIterator.hasNext) && !break) {
28 |       resultValues.clear()
29 |       continue = false
30 |       if (responseIterator == null || !responseIterator.hasNext) {
31 |         if (scanPartIterator.hasNext) {
32 |           try {
33 |             if (nebulaOptions.noColumn) {
34 |               responseIterator = storageClient.scanEdge(nebulaOptions.spaceName,
35 |                                                         scanPartIterator.next(),
36 |                                                         nebulaOptions.label,
37 |                                                         nebulaOptions.limit,
38 |                                                         0L,
39 |                                                         Long.MaxValue,
40 |                                                         true,
41 |                                                         true)
42 |             } else {
43 |               responseIterator = storageClient.scanEdge(nebulaOptions.spaceName,
44 |                                                         scanPartIterator.next(),
45 |                                                         nebulaOptions.label,
46 |                                                         nebulaOptions.getReturnCols.asJava,
47 |                                                         nebulaOptions.limit,
48 |                                                         0,
49 |                                                         Long.MaxValue,
50 |                                                         true,
51 |                                                         true)
52 |             }
53 |           } catch {
54 |             case e: Exception =>
55 |               LOG.error(s"Exception scanning vertex ${nebulaOptions.label}", e)
56 |               storageClient.close()
57 |               throw new Exception(e.getMessage, e)
58 |           }
59 |           // jump to the next loop
60 |           continue = true
61 |         }
62 |         // break while loop
63 |         break = !continue
64 |       } else {
65 |         val next: ScanEdgeResult = responseIterator.next
66 |         if (!next.isEmpty) {
67 |           dataIterator = next.getEdgeTableRows.iterator().asScala
68 |         }
69 |       }
70 |     }
71 | 
72 |     if (dataIterator == null) {
73 |       return false
74 |     }
75 |     dataIterator.hasNext
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartition.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.reader
 8 | 
 9 | import com.vesoft.nebula.connector.NebulaOptions
10 | import org.apache.spark.sql.catalyst.InternalRow
11 | import org.apache.spark.sql.sources.v2.reader.{InputPartition, InputPartitionReader}
12 | import org.apache.spark.sql.types.StructType
13 | 
14 | class NebulaVertexPartition(index: Int, nebulaOptions: NebulaOptions, schema: StructType)
15 |     extends InputPartition[InternalRow] {
16 |   override def createPartitionReader(): InputPartitionReader[InternalRow] =
17 |     new NebulaVertexPartitionReader(index, nebulaOptions, schema)
18 | }
19 | 
20 | class NebulaEdgePartition(index: Int, nebulaOptions: NebulaOptions, schema: StructType)
21 |     extends InputPartition[InternalRow] {
22 |   override def createPartitionReader(): InputPartitionReader[InternalRow] =
23 |     new NebulaEdgePartitionReader(index, nebulaOptions, schema)
24 | }
25 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaPartitionReader.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector.reader
  8 | 
  9 | import com.vesoft.nebula.client.graph.data.{HostAddress, ValueWrapper}
 10 | import com.vesoft.nebula.client.storage.StorageClient
 11 | import com.vesoft.nebula.client.storage.data.{BaseTableRow, VertexTableRow}
 12 | import com.vesoft.nebula.connector.NebulaUtils.NebulaValueGetter
 13 | import com.vesoft.nebula.connector.exception.GraphConnectException
 14 | import com.vesoft.nebula.connector.{NebulaOptions, NebulaUtils, PartitionUtils}
 15 | import com.vesoft.nebula.connector.nebula.MetaProvider
 16 | import org.apache.spark.sql.catalyst.InternalRow
 17 | import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 18 | import org.apache.spark.sql.sources.v2.reader.InputPartitionReader
 19 | import org.apache.spark.sql.types.StructType
 20 | import org.slf4j.{Logger, LoggerFactory}
 21 | 
 22 | import scala.collection.JavaConverters._
 23 | import scala.collection.mutable
 24 | import scala.collection.mutable.ListBuffer
 25 | 
 26 | /**
 27 |   * Read nebula data for each spark partition
 28 |   */
 29 | abstract class NebulaPartitionReader extends InputPartitionReader[InternalRow] {
 30 |   private val LOG: Logger = LoggerFactory.getLogger(this.getClass)
 31 | 
 32 |   private var metaProvider: MetaProvider = _
 33 |   private var schema: StructType         = _
 34 | 
 35 |   protected var dataIterator: Iterator[BaseTableRow]           = _
 36 |   protected var scanPartIterator: Iterator[Integer]            = _
 37 |   protected var resultValues: mutable.ListBuffer[List[Object]] = mutable.ListBuffer[List[Object]]()
 38 |   protected var storageClient: StorageClient                   = _
 39 | 
 40 |   /**
 41 |     * @param index identifier for spark partition
 42 |     * @param nebulaOptions nebula Options
 43 |     * @param schema of data need to read
 44 |     */
 45 |   def this(index: Int, nebulaOptions: NebulaOptions, schema: StructType) {
 46 |     this()
 47 |     this.schema = schema
 48 | 
 49 |     metaProvider = new MetaProvider(nebulaOptions.getMetaAddress)
 50 |     val address: ListBuffer[HostAddress] = new ListBuffer[HostAddress]
 51 | 
 52 |     for (addr <- nebulaOptions.getMetaAddress) {
 53 |       address.append(new HostAddress(addr._1, addr._2))
 54 |     }
 55 | 
 56 |     this.storageClient = new StorageClient(address.asJava)
 57 |     if (!storageClient.connect()) {
 58 |       throw new GraphConnectException("storage connect failed.")
 59 |     }
 60 |     // allocate scanPart to this partition
 61 |     val totalPart = metaProvider.getPartitionNumber(nebulaOptions.spaceName)
 62 | 
 63 |     val scanParts = PartitionUtils.getScanParts(index, totalPart, nebulaOptions.partitionNums.toInt)
 64 |     LOG.info(s"partition index: ${index}, scanParts: ${scanParts.toString}")
 65 |     scanPartIterator = scanParts.iterator
 66 |   }
 67 | 
 68 |   override def get(): InternalRow = {
 69 |     val resultSet: Array[ValueWrapper] =
 70 |       dataIterator.next().getValues.toArray.map(v => v.asInstanceOf[ValueWrapper])
 71 |     val getters: Array[NebulaValueGetter] = NebulaUtils.makeGetters(schema)
 72 |     val mutableRow                        = new SpecificInternalRow(schema.fields.map(x => x.dataType))
 73 | 
 74 |     for (i <- getters.indices) {
 75 |       val value: ValueWrapper = resultSet(i)
 76 |       var resolved            = false
 77 |       if (value.isNull) {
 78 |         mutableRow.setNullAt(i)
 79 |         resolved = true
 80 |       }
 81 |       if (value.isString) {
 82 |         getters(i).apply(value.asString(), mutableRow, i)
 83 |         resolved = true
 84 |       }
 85 |       if (value.isDate) {
 86 |         getters(i).apply(value.asDate(), mutableRow, i)
 87 |         resolved = true
 88 |       }
 89 |       if (value.isTime) {
 90 |         getters(i).apply(value.asTime(), mutableRow, i)
 91 |         resolved = true
 92 |       }
 93 |       if (value.isDateTime) {
 94 |         getters(i).apply(value.asDateTime(), mutableRow, i)
 95 |         resolved = true
 96 |       }
 97 |       if (value.isLong) {
 98 |         getters(i).apply(value.asLong(), mutableRow, i)
 99 |       }
100 |       if (value.isBoolean) {
101 |         getters(i).apply(value.asBoolean(), mutableRow, i)
102 |       }
103 |       if (value.isDouble) {
104 |         getters(i).apply(value.asDouble(), mutableRow, i)
105 |       }
106 |     }
107 |     mutableRow
108 |   }
109 | 
110 |   override def close(): Unit = {
111 |     metaProvider.close()
112 |     storageClient.close()
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaSourceReader.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector.reader
  8 | 
  9 | import java.util
 10 | 
 11 | import com.vesoft.nebula.connector.{DataTypeEnum, NebulaOptions, NebulaUtils}
 12 | import com.vesoft.nebula.connector.nebula.MetaProvider
 13 | import com.vesoft.nebula.meta.ColumnDef
 14 | import org.apache.spark.sql.catalyst.InternalRow
 15 | import org.apache.spark.sql.sources.v2.reader.{DataSourceReader, InputPartition}
 16 | import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
 17 | import org.slf4j.LoggerFactory
 18 | 
 19 | import scala.collection.mutable.ListBuffer
 20 | import scala.collection.JavaConverters._
 21 | 
 22 | /**
 23 |   * Base class of Nebula Source Reader
 24 |   */
 25 | abstract class NebulaSourceReader(nebulaOptions: NebulaOptions) extends DataSourceReader {
 26 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 27 | 
 28 |   private var datasetSchema: StructType = _
 29 | 
 30 |   override def readSchema(): StructType = {
 31 |     datasetSchema = getSchema(nebulaOptions)
 32 |     LOG.info(s"dataset's schema: $datasetSchema")
 33 |     datasetSchema
 34 |   }
 35 | 
 36 |   protected def getSchema: StructType = getSchema(nebulaOptions)
 37 | 
 38 |   /**
 39 |     * return the dataset's schema. Schema includes configured cols in returnCols or includes all properties in nebula.
 40 |     */
 41 |   def getSchema(nebulaOptions: NebulaOptions): StructType = {
 42 |     val returnCols                      = nebulaOptions.getReturnCols
 43 |     val noColumn                        = nebulaOptions.noColumn
 44 |     val fields: ListBuffer[StructField] = new ListBuffer[StructField]
 45 |     val metaProvider                    = new MetaProvider(nebulaOptions.getMetaAddress)
 46 | 
 47 |     import scala.collection.JavaConverters._
 48 |     var schemaCols: Seq[ColumnDef] = Seq()
 49 |     val isVertex                   = DataTypeEnum.VERTEX.toString.equalsIgnoreCase(nebulaOptions.dataType)
 50 | 
 51 |     // construct vertex or edge default prop
 52 |     if (isVertex) {
 53 |       fields.append(DataTypes.createStructField("_vertexId", DataTypes.StringType, false))
 54 |     } else {
 55 |       fields.append(DataTypes.createStructField("_srcId", DataTypes.StringType, false))
 56 |       fields.append(DataTypes.createStructField("_dstId", DataTypes.StringType, false))
 57 |       fields.append(DataTypes.createStructField("_rank", DataTypes.LongType, false))
 58 |     }
 59 | 
 60 |     var dataSchema: StructType = null
 61 |     // read no column
 62 |     if (noColumn) {
 63 |       dataSchema = new StructType(fields.toArray)
 64 |       return dataSchema
 65 |     }
 66 |     // get tag schema or edge schema
 67 |     val schema = if (isVertex) {
 68 |       metaProvider.getTag(nebulaOptions.spaceName, nebulaOptions.label)
 69 |     } else {
 70 |       metaProvider.getEdge(nebulaOptions.spaceName, nebulaOptions.label)
 71 |     }
 72 | 
 73 |     schemaCols = schema.columns.asScala
 74 | 
 75 |     // read all columns
 76 |     if (returnCols.isEmpty) {
 77 |       schemaCols.foreach(columnDef => {
 78 |         LOG.info(s"prop name ${new String(columnDef.getName)}, type ${columnDef.getType.getType} ")
 79 |         fields.append(
 80 |           DataTypes.createStructField(new String(columnDef.getName),
 81 |                                       NebulaUtils.convertDataType(columnDef.getType),
 82 |                                       true))
 83 |       })
 84 |     } else {
 85 |       for (col: String <- returnCols) {
 86 |         fields.append(
 87 |           DataTypes
 88 |             .createStructField(col, NebulaUtils.getColDataType(schemaCols.toList, col), true))
 89 |       }
 90 |     }
 91 |     dataSchema = new StructType(fields.toArray)
 92 |     dataSchema
 93 |   }
 94 | }
 95 | 
 96 | /**
 97 |   * DataSourceReader for Nebula Vertex
 98 |   */
 99 | class NebulaDataSourceVertexReader(nebulaOptions: NebulaOptions)
100 |     extends NebulaSourceReader(nebulaOptions) {
101 | 
102 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
103 |     val partitionNum = nebulaOptions.partitionNums.toInt
104 |     val partitions = for (index <- 1 to partitionNum)
105 |       yield {
106 |         new NebulaVertexPartition(index, nebulaOptions, getSchema)
107 |       }
108 |     partitions.map(_.asInstanceOf[InputPartition[InternalRow]]).asJava
109 |   }
110 | }
111 | 
112 | /**
113 |   * DataSourceReader for Nebula Edge
114 |   */
115 | class NebulaDataSourceEdgeReader(nebulaOptions: NebulaOptions)
116 |     extends NebulaSourceReader(nebulaOptions) {
117 | 
118 |   override def planInputPartitions(): util.List[InputPartition[InternalRow]] = {
119 |     val partitionNum = nebulaOptions.partitionNums.toInt
120 |     val partitions = for (index <- 1 to partitionNum)
121 |       yield new NebulaEdgePartition(index, nebulaOptions, getSchema)
122 | 
123 |     partitions.map(_.asInstanceOf[InputPartition[InternalRow]]).asJava
124 |   }
125 | }
126 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/reader/NebulaVertexPartitionReader.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.reader
 8 | 
 9 | import com.vesoft.nebula.client.storage.scan.{ScanVertexResult, ScanVertexResultIterator}
10 | import com.vesoft.nebula.connector.NebulaOptions
11 | import org.apache.spark.sql.types.StructType
12 | import org.slf4j.{Logger, LoggerFactory}
13 | 
14 | import scala.collection.JavaConverters._
15 | 
16 | class NebulaVertexPartitionReader(index: Int, nebulaOptions: NebulaOptions, schema: StructType)
17 |     extends NebulaPartitionReader(index, nebulaOptions, schema) {
18 | 
19 |   private val LOG: Logger = LoggerFactory.getLogger(this.getClass)
20 | 
21 |   private var responseIterator: ScanVertexResultIterator = _
22 | 
23 |   override def next(): Boolean = {
24 |     if (dataIterator == null && responseIterator == null && !scanPartIterator.hasNext)
25 |       return false
26 | 
27 |     var continue: Boolean = false
28 |     var break: Boolean    = false
29 |     while ((dataIterator == null || !dataIterator.hasNext) && !break) {
30 |       resultValues.clear()
31 |       continue = false
32 |       if (responseIterator == null || !responseIterator.hasNext) {
33 |         if (scanPartIterator.hasNext) {
34 |           try {
35 |             if (nebulaOptions.noColumn) {
36 |               responseIterator = storageClient.scanVertex(nebulaOptions.spaceName,
37 |                                                           scanPartIterator.next(),
38 |                                                           nebulaOptions.label,
39 |                                                           nebulaOptions.limit,
40 |                                                           0,
41 |                                                           Long.MaxValue,
42 |                                                           true,
43 |                                                           true)
44 |             } else {
45 |               responseIterator = storageClient.scanVertex(nebulaOptions.spaceName,
46 |                                                           scanPartIterator.next(),
47 |                                                           nebulaOptions.label,
48 |                                                           nebulaOptions.getReturnCols.asJava,
49 |                                                           nebulaOptions.limit,
50 |                                                           0,
51 |                                                           Long.MaxValue,
52 |                                                           true,
53 |                                                           true)
54 |             }
55 |           } catch {
56 |             case e: Exception =>
57 |               LOG.error(s"Exception scanning vertex ${nebulaOptions.label}", e)
58 |               storageClient.close()
59 |               throw new Exception(e.getMessage, e)
60 |           }
61 |           // jump to the next loop
62 |           continue = true
63 |         }
64 |         // break while loop
65 |         break = !continue
66 |       } else {
67 |         val next: ScanVertexResult = responseIterator.next
68 |         if (!next.isEmpty) {
69 |           dataIterator = next.getVertexTableRows.iterator().asScala
70 |         }
71 |       }
72 |     }
73 | 
74 |     if (dataIterator == null) {
75 |       return false
76 |     }
77 |     dataIterator.hasNext
78 |   }
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaCommitMessage.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.writer
 8 | 
 9 | import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
10 | 
11 | case class NebulaCommitMessage(executeStatements: List[String]) extends WriterCommitMessage
12 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaEdgeWriter.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector.writer
  8 | 
  9 | import com.vesoft.nebula.connector.connector.{NebulaEdge, NebulaEdges}
 10 | import com.vesoft.nebula.connector.{KeyPolicy, NebulaOptions, WriteMode}
 11 | import org.apache.spark.sql.catalyst.InternalRow
 12 | import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
 13 | import org.apache.spark.sql.types.StructType
 14 | import org.slf4j.LoggerFactory
 15 | 
 16 | import scala.collection.mutable.ListBuffer
 17 | 
 18 | class NebulaEdgeWriter(nebulaOptions: NebulaOptions,
 19 |                        srcIndex: Int,
 20 |                        dstIndex: Int,
 21 |                        rankIndex: Option[Int],
 22 |                        schema: StructType)
 23 |     extends NebulaWriter(nebulaOptions)
 24 |     with DataWriter[InternalRow] {
 25 | 
 26 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 27 | 
 28 |   val rankIdx = if (rankIndex.isDefined) rankIndex.get else -1
 29 |   val propNames = NebulaExecutor.assignEdgePropNames(schema,
 30 |                                                      srcIndex,
 31 |                                                      dstIndex,
 32 |                                                      rankIdx,
 33 |                                                      nebulaOptions.srcAsProp,
 34 |                                                      nebulaOptions.dstAsProp,
 35 |                                                      nebulaOptions.rankAsProp)
 36 |   val fieldTypMap: Map[String, Integer] =
 37 |     if (nebulaOptions.writeMode == WriteMode.DELETE) Map[String, Integer]()
 38 |     else metaProvider.getEdgeSchema(nebulaOptions.spaceName, nebulaOptions.label)
 39 | 
 40 |   val srcPolicy =
 41 |     if (nebulaOptions.srcPolicy.isEmpty) Option.empty
 42 |     else Option(KeyPolicy.withName(nebulaOptions.srcPolicy))
 43 |   val dstPolicy = {
 44 |     if (nebulaOptions.dstPolicy.isEmpty) Option.empty
 45 |     else Option(KeyPolicy.withName(nebulaOptions.dstPolicy))
 46 |   }
 47 | 
 48 |   /** buffer to save batch edges */
 49 |   var edges: ListBuffer[NebulaEdge] = new ListBuffer()
 50 | 
 51 |   prepareSpace()
 52 | 
 53 |   /**
 54 |     * write one edge record to buffer
 55 |     */
 56 |   override def write(row: InternalRow): Unit = {
 57 |     val srcId = NebulaExecutor.extraID(schema, row, srcIndex, srcPolicy, isVidStringType)
 58 |     val dstId = NebulaExecutor.extraID(schema, row, dstIndex, dstPolicy, isVidStringType)
 59 |     val rank =
 60 |       if (rankIndex.isEmpty) Option.empty
 61 |       else Option(NebulaExecutor.extraRank(schema, row, rankIndex.get))
 62 |     val values =
 63 |       if (nebulaOptions.writeMode == WriteMode.DELETE) List()
 64 |       else
 65 |         NebulaExecutor.assignEdgeValues(schema,
 66 |                                         row,
 67 |                                         srcIndex,
 68 |                                         dstIndex,
 69 |                                         rankIdx,
 70 |                                         nebulaOptions.srcAsProp,
 71 |                                         nebulaOptions.dstAsProp,
 72 |                                         nebulaOptions.rankAsProp,
 73 |                                         fieldTypMap)
 74 |     val nebulaEdge = NebulaEdge(srcId, dstId, rank, values)
 75 |     edges.append(nebulaEdge)
 76 |     if (edges.size >= nebulaOptions.batch) {
 77 |       execute()
 78 |     }
 79 |   }
 80 | 
 81 |   def execute(): Unit = {
 82 |     val nebulaEdges = NebulaEdges(propNames, edges.toList, srcPolicy, dstPolicy)
 83 |     val exec = nebulaOptions.writeMode match {
 84 |       case WriteMode.INSERT => NebulaExecutor.toExecuteSentence(nebulaOptions.label, nebulaEdges)
 85 |       case WriteMode.UPDATE =>
 86 |         NebulaExecutor.toUpdateExecuteStatement(nebulaOptions.label, nebulaEdges)
 87 |       case WriteMode.DELETE =>
 88 |         NebulaExecutor.toDeleteExecuteStatement(nebulaOptions.label, nebulaEdges)
 89 |       case _ =>
 90 |         throw new IllegalArgumentException(s"write mode ${nebulaOptions.writeMode} not supported.")
 91 |     }
 92 |     edges.clear()
 93 |     submit(exec)
 94 |   }
 95 | 
 96 |   override def commit(): WriterCommitMessage = {
 97 |     if (edges.nonEmpty) {
 98 |       execute()
 99 |     }
100 |     graphProvider.close()
101 |     NebulaCommitMessage.apply(failedExecs.toList)
102 |   }
103 | 
104 |   override def abort(): Unit = {
105 |     LOG.error("insert edge task abort.")
106 |     graphProvider.close()
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaSourceWriter.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  * This source code is licensed under Apache 2.0 License,
  4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector.writer
  8 | 
  9 | import com.vesoft.nebula.connector.NebulaOptions
 10 | import org.apache.spark.sql.catalyst.InternalRow
 11 | import org.apache.spark.sql.sources.v2.writer.{
 12 |   DataSourceWriter,
 13 |   DataWriter,
 14 |   DataWriterFactory,
 15 |   WriterCommitMessage
 16 | }
 17 | import org.apache.spark.sql.types.StructType
 18 | import org.slf4j.LoggerFactory
 19 | 
 20 | /**
 21 |   * creating and initializing the actual Nebula vertex writer at executor side
 22 |   */
 23 | class NebulaVertexWriterFactory(nebulaOptions: NebulaOptions, vertexIndex: Int, schema: StructType)
 24 |     extends DataWriterFactory[InternalRow] {
 25 |   override def createDataWriter(partitionId: Int,
 26 |                                 taskId: Long,
 27 |                                 epochId: Long): DataWriter[InternalRow] = {
 28 |     new NebulaVertexWriter(nebulaOptions, vertexIndex, schema)
 29 |   }
 30 | }
 31 | 
 32 | /**
 33 |   * creating and initializing the actual Nebula edge writer at executor side
 34 |   */
 35 | class NebulaEdgeWriterFactory(nebulaOptions: NebulaOptions,
 36 |                               srcIndex: Int,
 37 |                               dstIndex: Int,
 38 |                               rankIndex: Option[Int],
 39 |                               schema: StructType)
 40 |     extends DataWriterFactory[InternalRow] {
 41 |   override def createDataWriter(partitionId: Int,
 42 |                                 taskId: Long,
 43 |                                 epochId: Long): DataWriter[InternalRow] = {
 44 |     new NebulaEdgeWriter(nebulaOptions, srcIndex, dstIndex, rankIndex, schema)
 45 |   }
 46 | }
 47 | 
 48 | /**
 49 |   * nebula vertex writer to create factory
 50 |   */
 51 | class NebulaDataSourceVertexWriter(nebulaOptions: NebulaOptions,
 52 |                                    vertexIndex: Int,
 53 |                                    schema: StructType)
 54 |     extends DataSourceWriter {
 55 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 56 | 
 57 |   override def createWriterFactory(): DataWriterFactory[InternalRow] = {
 58 |     new NebulaVertexWriterFactory(nebulaOptions, vertexIndex, schema)
 59 |   }
 60 | 
 61 |   override def commit(messages: Array[WriterCommitMessage]): Unit = {
 62 |     LOG.debug(s"${messages.length}")
 63 |     for (msg <- messages) {
 64 |       val nebulaMsg = msg.asInstanceOf[NebulaCommitMessage]
 65 |       LOG.info(s"failed execs:\n ${nebulaMsg.executeStatements.toString()}")
 66 |     }
 67 |   }
 68 | 
 69 |   override def abort(messages: Array[WriterCommitMessage]): Unit = {
 70 |     LOG.error("NebulaDataSourceVertexWriter abort")
 71 |   }
 72 | }
 73 | 
 74 | /**
 75 |   * nebula edge writer to create factory
 76 |   */
 77 | class NebulaDataSourceEdgeWriter(nebulaOptions: NebulaOptions,
 78 |                                  srcIndex: Int,
 79 |                                  dstIndex: Int,
 80 |                                  rankIndex: Option[Int],
 81 |                                  schema: StructType)
 82 |     extends DataSourceWriter {
 83 |   private val LOG = LoggerFactory.getLogger(this.getClass)
 84 | 
 85 |   override def createWriterFactory(): DataWriterFactory[InternalRow] = {
 86 |     new NebulaEdgeWriterFactory(nebulaOptions, srcIndex, dstIndex, rankIndex, schema)
 87 |   }
 88 | 
 89 |   override def commit(messages: Array[WriterCommitMessage]): Unit = {
 90 |     LOG.debug(s"${messages.length}")
 91 |     for (msg <- messages) {
 92 |       val nebulaMsg = msg.asInstanceOf[NebulaCommitMessage]
 93 |       LOG.info(s"failed execs:\n ${nebulaMsg.executeStatements.toString()}")
 94 |     }
 95 | 
 96 |   }
 97 | 
 98 |   override def abort(messages: Array[WriterCommitMessage]): Unit = {
 99 |     LOG.error("NebulaDataSourceEdgeWriter abort")
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaVertexWriter.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.writer
 8 | 
 9 | import com.vesoft.nebula.connector.connector.{NebulaVertex, NebulaVertices}
10 | import com.vesoft.nebula.connector.{KeyPolicy, NebulaOptions, WriteMode}
11 | import org.apache.spark.sql.catalyst.InternalRow
12 | import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
13 | import org.apache.spark.sql.types.StructType
14 | import org.slf4j.LoggerFactory
15 | 
16 | import scala.collection.mutable.ListBuffer
17 | 
18 | class NebulaVertexWriter(nebulaOptions: NebulaOptions, vertexIndex: Int, schema: StructType)
19 |     extends NebulaWriter(nebulaOptions)
20 |     with DataWriter[InternalRow] {
21 | 
22 |   private val LOG = LoggerFactory.getLogger(this.getClass)
23 | 
24 |   val propNames = NebulaExecutor.assignVertexPropNames(schema, vertexIndex, nebulaOptions.vidAsProp)
25 |   val fieldTypMap: Map[String, Integer] =
26 |     if (nebulaOptions.writeMode == WriteMode.DELETE) Map[String, Integer]()
27 |     else metaProvider.getTagSchema(nebulaOptions.spaceName, nebulaOptions.label)
28 | 
29 |   val policy = {
30 |     if (nebulaOptions.vidPolicy.isEmpty) Option.empty
31 |     else Option(KeyPolicy.withName(nebulaOptions.vidPolicy))
32 |   }
33 | 
34 |   /** buffer to save batch vertices */
35 |   var vertices: ListBuffer[NebulaVertex] = new ListBuffer()
36 | 
37 |   prepareSpace()
38 | 
39 |   /**
40 |     * write one vertex row to buffer
41 |     */
42 |   override def write(row: InternalRow): Unit = {
43 |     val vertex =
44 |       NebulaExecutor.extraID(schema, row, vertexIndex, policy, isVidStringType)
45 |     val values =
46 |       if (nebulaOptions.writeMode == WriteMode.DELETE) List()
47 |       else
48 |         NebulaExecutor.assignVertexPropValues(schema,
49 |                                               row,
50 |                                               vertexIndex,
51 |                                               nebulaOptions.vidAsProp,
52 |                                               fieldTypMap)
53 |     val nebulaVertex = NebulaVertex(vertex, values)
54 |     vertices.append(nebulaVertex)
55 |     if (vertices.size >= nebulaOptions.batch) {
56 |       execute()
57 |     }
58 |   }
59 | 
60 |   def execute(): Unit = {
61 |     val nebulaVertices = NebulaVertices(propNames, vertices.toList, policy)
62 |     val exec = nebulaOptions.writeMode match {
63 |       case WriteMode.INSERT => NebulaExecutor.toExecuteSentence(nebulaOptions.label, nebulaVertices)
64 |       case WriteMode.UPDATE =>
65 |         NebulaExecutor.toUpdateExecuteStatement(nebulaOptions.label, nebulaVertices)
66 |       case WriteMode.DELETE => NebulaExecutor.toDeleteExecuteStatement(nebulaVertices)
67 |       case _ =>
68 |         throw new IllegalArgumentException(s"write mode ${nebulaOptions.writeMode} not supported.")
69 |     }
70 |     vertices.clear()
71 |     submit(exec)
72 |   }
73 | 
74 |   override def commit(): WriterCommitMessage = {
75 |     if (vertices.nonEmpty) {
76 |       execute()
77 |     }
78 |     graphProvider.close()
79 |     NebulaCommitMessage(failedExecs.toList)
80 |   }
81 | 
82 |   override def abort(): Unit = {
83 |     LOG.error("insert vertex task abort.")
84 |     graphProvider.close()
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/writer/NebulaWriter.scala:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
 2 |  *
 3 |  * This source code is licensed under Apache 2.0 License,
 4 |  * attached with Common Clause Condition 1.0, found in the LICENSES directory.
 5 |  */
 6 | 
 7 | package com.vesoft.nebula.connector.writer
 8 | 
 9 | import java.util.concurrent.TimeUnit
10 | 
11 | import com.google.common.util.concurrent.RateLimiter
12 | import com.vesoft.nebula.connector.NebulaOptions
13 | import com.vesoft.nebula.connector.nebula.{GraphProvider, MetaProvider, VidType}
14 | import org.slf4j.LoggerFactory
15 | 
16 | import scala.collection.mutable.ListBuffer
17 | 
18 | class NebulaWriter(nebulaOptions: NebulaOptions) extends Serializable {
19 |   private val LOG = LoggerFactory.getLogger(this.getClass)
20 | 
21 |   val failedExecs: ListBuffer[String] = new ListBuffer[String]
22 | 
23 |   val graphProvider   = new GraphProvider(nebulaOptions.getGraphAddress)
24 |   val metaProvider    = new MetaProvider(nebulaOptions.getMetaAddress)
25 |   val isVidStringType = metaProvider.getVidType(nebulaOptions.spaceName) == VidType.STRING
26 | 
27 |   def prepareSpace(): Unit = {
28 |     graphProvider.switchSpace(nebulaOptions.user, nebulaOptions.passwd, nebulaOptions.spaceName)
29 |   }
30 | 
31 |   def submit(exec: String): Unit = {
32 |     @transient val rateLimiter = RateLimiter.create(nebulaOptions.rateLimit)
33 |     if (rateLimiter.tryAcquire(nebulaOptions.rateTimeOut, TimeUnit.MILLISECONDS)) {
34 |       val result = graphProvider.submit(exec)
35 |       if (!result.isSucceeded) {
36 |         failedExecs.append(exec)
37 |         LOG.error(s"failed to write ${exec} for " + result.getErrorMessage)
38 |       } else {
39 |         LOG.info(s"batch write succeed")
40 |         LOG.debug(s"batch write succeed: ${exec}")
41 |       }
42 |     } else {
43 |       failedExecs.append(exec)
44 |       LOG.error(s"failed to acquire reteLimiter for statement {$exec}")
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/nebula-spark-connector/src/test/scala/com/vesoft/nebula/connector/NebulaConfigSuite.scala:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) 2020 vesoft inc. All rights reserved.
  2 |  *
  3 |  *  This source code is licensed under Apache 2.0 License,
  4 |  *  attached with Common Clause Condition 1.0, found in the LICENSES directory.
  5 |  */
  6 | 
  7 | package com.vesoft.nebula.connector
  8 | 
  9 | import org.scalatest.BeforeAndAfterAll
 10 | import org.scalatest.funsuite.AnyFunSuite
 11 | 
 12 | class NebulaConfigSuite extends AnyFunSuite with BeforeAndAfterAll {
 13 | 
 14 |   test("test NebulaConnectionConfig") {
 15 |     try {
 16 |       NebulaConnectionConfig.builder().withTimeout(1).build()
 17 |     } catch {
 18 |       case e: java.lang.AssertionError => assert(true)
 19 |     }
 20 | 
 21 |     try {
 22 |       NebulaConnectionConfig.builder().withTimeout(-1).build()
 23 |     } catch {
 24 |       case e: java.lang.AssertionError => assert(true)
 25 |     }
 26 | 
 27 |     try {
 28 |       NebulaConnectionConfig
 29 |         .builder()
 30 |         .withMetaAddress("127.0.0.1:9559")
 31 |         .withTimeout(1)
 32 |         .build()
 33 |       assert(true)
 34 |     } catch {
 35 |       case _: Throwable => assert(false)
 36 |     }
 37 |   }
 38 | 
 39 |   test("test WriteNebulaConfig") {
 40 |     var writeNebulaConfig: WriteNebulaVertexConfig = null
 41 |     try {
 42 |       writeNebulaConfig = WriteNebulaVertexConfig
 43 |         .builder()
 44 |         .withSpace("test")
 45 |         .withTag("tag")
 46 |         .withVidField("vid")
 47 |         .build()
 48 |     } catch {
 49 |       case e: Throwable => assert(false)
 50 |     }
 51 |     assert(true)
 52 |     assert(!writeNebulaConfig.getVidAsProp)
 53 |     assert(writeNebulaConfig.getSpace.equals("test"))
 54 |   }
 55 | 
 56 |   test("test wrong policy") {
 57 |     try {
 58 |       WriteNebulaVertexConfig
 59 |         .builder()
 60 |         .withSpace("test")
 61 |         .withTag("tag")
 62 |         .withVidField("vId")
 63 |         .withVidPolicy("wrong_policy")
 64 |         .build()
 65 |     } catch {
 66 |       case e: java.lang.AssertionError => assert(true)
 67 |     }
 68 |   }
 69 | 
 70 |   test("test wrong batch") {
 71 |     try {
 72 |       WriteNebulaVertexConfig
 73 |         .builder()
 74 |         .withSpace("test")
 75 |         .withTag("tag")
 76 |         .withVidField("vId")
 77 |         .withVidPolicy("hash")
 78 |         .withBatch(-1)
 79 |         .build()
 80 |     } catch {
 81 |       case e: java.lang.AssertionError => assert(true)
 82 |     }
 83 |   }
 84 | 
 85 |   test("test ReadNebulaConfig") {
 86 |     try {
 87 |       ReadNebulaConfig
 88 |         .builder()
 89 |         .withSpace("test")
 90 |         .withLabel("tagName")
 91 |         .withNoColumn(true)
 92 |         .withReturnCols(List("col"))
 93 |         .build()
 94 |     } catch {
 95 |       case e: java.lang.AssertionError => assert(false)
 96 |     }
 97 |   }
 98 | 
 99 | }
100 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |     <groupId>com.vesoft</groupId>
 8 |     <artifactId>nebula-spark</artifactId>
 9 |     <packaging>pom</packaging>
10 |     <version>2.5-SNAPSHOT</version>
11 | 
12 |     <properties>
13 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
14 |     </properties>
15 | 
16 |     <!-- More Project Information -->
17 |     <name>nebula-spark</name>
18 |     <description>Nebula Spark Utils</description>
19 |     <url>https://github.com/vesoft-inc/nebula-spark-utils</url>
20 |     <scm>
21 |         <connection>scm:git:https://github.com/vesoft-inc/nebula</connection>
22 |         <url>https://github.com/vesoft-inc/nebula</url>
23 |         <developerConnection>scm:git:https://github.com/vesoft-inc/nebula</developerConnection>
24 |     </scm>
25 |     <licenses>
26 |         <license>
27 |             <name>Apache License, Version 2.0</name>
28 |             <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
29 |             <distribution>repo</distribution>
30 |             <comments>license</comments>
31 |         </license>
32 |     </licenses>
33 | 
34 |     <developers>
35 |         <developer>
36 |             <id>nebula</id>
37 |             <name>Nebula Graph</name>
38 |             <email>nebula-spark-utils@vesoft-inc.com</email>
39 |             <organization>vesoft</organization>
40 |             <roles>
41 |                 <role>architect</role>
42 |                 <role>developer</role>
43 |             </roles>
44 |         </developer>
45 |     </developers>
46 | 
47 |     <modules>
48 |         <module>nebula-exchange</module>
49 |         <module>nebula-spark-connector</module>
50 |         <module>nebula-algorithm</module>
51 |         <module>example</module>
52 |     </modules>
53 | 
54 |     <distributionManagement>
55 |         <repository>
56 |             <id>release</id>
57 |             <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
58 |         </repository>
59 |         <snapshotRepository>
60 |             <id>snapshots</id>
61 |             <url>https://oss.sonatype.org/content/repositories/snapshots/</url>
62 |         </snapshotRepository>
63 |     </distributionManagement>
64 | 
65 |     <build>
66 |         <plugins>
67 |             <plugin>
68 |                 <groupId>org.apache.maven.plugins</groupId>
69 |                 <artifactId>maven-gpg-plugin</artifactId>
70 |                 <version>1.6</version>
71 |                 <executions>
72 |                     <execution>
73 |                         <phase>verify</phase>
74 |                         <goals>
75 |                             <goal>sign</goal>
76 |                         </goals>
77 |                     </execution>
78 |                 </executions>
79 |             </plugin>
80 |         </plugins>
81 |     </build>
82 | </project>
83 | 


--------------------------------------------------------------------------------