├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docs
├── AzureDocument.md
└── images
│ └── spark_sqldb_dataflow.png
├── lib
├── mssql-jdbc-6.2.2.jre8.jar
└── scalastyle_config.xml
├── pom.xml
├── releases
└── azure-sqldb-spark-1.0.0
│ ├── azure-sqldb-spark-1.0.0-uber.jar
│ └── azure-sqldb-spark-1.0.0.jar
├── samples
├── notebooks
│ └── Spark Connector for Azure SQL Databases and SQL Server.html
└── scripts
│ ├── BulkCopySample.scala
│ ├── ReadSample.scala
│ └── WriteSample.scala
└── src
├── main
├── java
│ └── com
│ │ └── microsoft
│ │ └── azure
│ │ └── sqldb
│ │ └── spark
│ │ └── bulkcopy
│ │ ├── BulkCopyMetadata.java
│ │ ├── ColumnMetadata.java
│ │ ├── SQLServerBulkDataFrameFileRecord.java
│ │ └── SQLServerExceptionReflection.java
└── scala
│ └── com
│ └── microsoft
│ └── azure
│ └── sqldb
│ └── spark
│ ├── Logging.scala
│ ├── LoggingTrait.scala
│ ├── bulk
│ └── BulkCopyUtils.scala
│ ├── config
│ ├── Config.scala
│ ├── SqlDBConfig.scala
│ └── SqlDBConfigBuilder.scala
│ ├── connect
│ ├── ConnectionUtils.scala
│ ├── DataFrameFunctions.scala
│ ├── DataFrameReaderFunctions.scala
│ ├── DataFrameWriterFunctions.scala
│ └── package.scala
│ └── query
│ ├── QueryFunctions.scala
│ └── package.scala
└── test
├── java
└── com
│ └── microsoft
│ └── azure
│ └── sqldb
│ └── spark
│ └── bulkcopy
│ ├── BulkCopyMetadataTest.java
│ ├── ColumnMetadataTest.java
│ ├── SQLServerBulkDataFrameFileRecordTest.java
│ └── SQLServerExceptionReflectionTest.java
└── scala
└── com
└── microsoft
└── azure
└── sqldb
└── spark
├── SqlDBSpark.scala
├── bulk
└── BulkCopyUtilsSpec.scala
├── config
└── ConfigSpec.scala
└── connect
└── ConnectionUtilsSpec.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.log
3 | *.iml
4 | target/
5 |
6 | #IDE
7 | .idea/*
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | sudo: required
3 |
4 | matrix:
5 | include:
6 | - os: linux
7 | dist: trusty
8 | jdk: oraclejdk8
9 | - os: osx
10 | osx_image: xcode8
11 |
12 | script:
13 | - mvn clean package
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation. All rights reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Updated Jun 2020: This project is not being actively maintained. Instead, [Apache Spark Connector for SQL Server and Azure SQL](https://github.com/microsoft/sql-spark-connector) is now available, with support for Python and R bindings, an easier-to use interface to bulk insert data, and many other improvements. We encourage you to actively evaluate and use the new connector.
2 |
3 | # Spark connector for Azure SQL Databases and SQL Server
4 |
5 | [](https://travis-ci.org/Azure/azure-sqldb-spark)
6 |
7 | The Spark connector for [Azure SQL Database](https://azure.microsoft.com/en-us/services/sql-database/) and [SQL Server](https://www.microsoft.com/en-us/sql-server/default.aspx) enables SQL databases, including Azure SQL Databases and SQL Server, to act as input data source or output data sink for Spark jobs. It allows you to utilize real time transactional data in big data analytics and persist results for adhoc queries or reporting.
8 |
9 | Comparing to the built-in Spark connector, this connector provides the ability to bulk insert data into SQL databases. It can outperform row by row insertion with 10x to 20x faster performance. The Spark connector for Azure SQL Databases and SQL Server also supports AAD authentication. It allows you securely connecting to your Azure SQL databases from Azure Databricks using your AAD account. It provides similar interfaces with the built-in JDBC connector. It is easy to migrate your existing Spark jobs to use this new connector.
10 |
11 | ## How to connect to Spark using this library
12 | This connector uses Microsoft SQLServer JDBC driver to fetch data from/to the Azure SQL Database.
13 | Results are of the `DataFrame` type.
14 |
15 | All connection properties in
16 |
17 | Microsoft JDBC Driver for SQL Server
18 | are supported in this connector. Add connection properties as fields in the `com.microsoft.azure.sqldb.spark.config.Config` object.
19 |
20 |
21 | ### Reading from Azure SQL Database or SQL Server
22 | ```scala
23 | import com.microsoft.azure.sqldb.spark.config.Config
24 | import com.microsoft.azure.sqldb.spark.connect._
25 |
26 | val config = Config(Map(
27 | "url" -> "mysqlserver.database.windows.net",
28 | "databaseName" -> "MyDatabase",
29 | "dbTable" -> "dbo.Clients"
30 | "user" -> "username",
31 | "password" -> "*********",
32 | "connectTimeout" -> "5", //seconds
33 | "queryTimeout" -> "5" //seconds
34 | ))
35 |
36 | val collection = sqlContext.read.sqlDB(config)
37 | collection.show()
38 |
39 | ```
40 |
41 | ### Writing to Azure SQL Database or SQL Server
42 | ```scala
43 | import com.microsoft.azure.sqldb.spark.config.Config
44 | import com.microsoft.azure.sqldb.spark.connect._
45 |
46 | // Aquire a DataFrame collection (val collection)
47 |
48 | val config = Config(Map(
49 | "url" -> "mysqlserver.database.windows.net",
50 | "databaseName" -> "MyDatabase",
51 | "dbTable" -> "dbo.Clients"
52 | "user" -> "username",
53 | "password" -> "*********"
54 | ))
55 |
56 | import org.apache.spark.sql.SaveMode
57 | collection.write.mode(SaveMode.Append).sqlDB(config)
58 |
59 | ```
60 | ### Pushdown query to Azure SQL Database or SQL Server
61 | For SELECT queries with expected return results, please use
62 | [Reading from Azure SQL Database using Scala](#reading-from-azure-sql-database-using-scala)
63 | ```scala
64 | import com.microsoft.azure.sqldb.spark.config.Config
65 | import com.microsoft.azure.sqldb.spark.query._
66 | val query = """
67 | |UPDATE Customers
68 | |SET ContactName = 'Alfred Schmidt', City= 'Frankfurt'
69 | |WHERE CustomerID = 1;
70 | """.stripMargin
71 |
72 | val config = Config(Map(
73 | "url" -> "mysqlserver.database.windows.net",
74 | "databaseName" -> "MyDatabase",
75 | "user" -> "username",
76 | "password" -> "*********",
77 | "queryCustom" -> query
78 | ))
79 |
80 | sqlContext.sqlDBQuery(config)
81 | ```
82 | ### Bulk Copy to Azure SQL Database or SQL Server
83 | ```scala
84 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
85 | import com.microsoft.azure.sqldb.spark.config.Config
86 | import com.microsoft.azure.sqldb.spark.connect._
87 |
88 | /**
89 | Add column Metadata.
90 | If not specified, metadata will be automatically added
91 | from the destination table, which may suffer performance.
92 | */
93 | var bulkCopyMetadata = new BulkCopyMetadata
94 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0)
95 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 50, 0)
96 | bulkCopyMetadata.addColumnMetadata(3, "LastName", java.sql.Types.NVARCHAR, 50, 0)
97 |
98 | val bulkCopyConfig = Config(Map(
99 | "url" -> "mysqlserver.database.windows.net",
100 | "databaseName" -> "MyDatabase",
101 | "user" -> "username",
102 | "password" -> "*********",
103 | "databaseName" -> "MyDatabase",
104 | "dbTable" -> "dbo.Clients",
105 | "bulkCopyBatchSize" -> "2500",
106 | "bulkCopyTableLock" -> "true",
107 | "bulkCopyTimeout" -> "600"
108 | ))
109 |
110 | df.bulkCopyToSqlDB(bulkCopyConfig, bulkCopyMetadata)
111 | //df.bulkCopyToSqlDB(bulkCopyConfig) if no metadata is specified.
112 | ```
113 |
114 | ## Requirements
115 | Official supported versions
116 |
117 | | Component | Versions Supported |
118 | | --------- | ------------------ |
119 | | Apache Spark | 2.0.2 or later |
120 | | Scala | 2.10 or later |
121 | | Microsoft JDBC Driver for SQL Server | 6.2 to 7.4 ^|
122 | | Microsoft SQL Server | SQL Server 2008 or later |
123 | | Azure SQL Databases | Supported |
124 |
125 | ^ Driver version 8.x not tested
126 |
127 | ## Download
128 | ### Download from Maven
129 | You can download the latest version from [here](https://search.maven.org/search?q=a:azure-sqldb-spark)
130 |
131 | You can also use the following coordinate to import the library into Azure SQL Databricks:
132 | com.microsoft.azure:azure-sqldb-spark:1.0.2
133 |
134 | ### Build this project
135 | Currently, the connector project uses maven. To build the connector without dependencies, you can run:
136 | ```sh
137 | mvn clean package
138 | ```
139 |
140 | ## Contributing & Feedback
141 |
142 | This project has adopted the [Microsoft Open Source Code of
143 | Conduct](https://opensource.microsoft.com/codeofconduct/). For more information
144 | see the [Code of Conduct
145 | FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact
146 | [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional
147 | questions or comments.
148 |
149 | To give feedback and/or report an issue, open a [GitHub
150 | Issue](https://help.github.com/articles/creating-an-issue/).
151 |
152 |
153 | *Apache®, Apache Spark, and Spark® are either registered trademarks or
154 | trademarks of the Apache Software Foundation in the United States and/or other
155 | countries.*
156 |
--------------------------------------------------------------------------------
/docs/AzureDocument.md:
--------------------------------------------------------------------------------
1 | # Accelerate real-time big data analytics with Spark connector for Azure SQL Database and SQL Server
2 |
3 | The Spark connector for Azure SQL Database and SQL Server enables SQL databases, including Azure SQL Database and SQL Server, to act as input data source or output data sink for Spark jobs. It allows you to utilize real time transactional data in big data analytics and persist results for adhoc queries or reporting. Compared to the built-in JDBC connector, this connector provides the ability to bulk insert data into SQL databases. It can outperform row by row insertion with 10x to 20x faster performance. The Spark connector for Azure SQL Database and SQL Server also supports AAD authentication. It allows you securely connecting to your Azure SQL database from Azure Databricks using your AAD account. It provides similar interfaces with the built-in JDBC connector. It is easy to migrate your existing Spark jobs to use this new connector.
4 |
5 | ## Download
6 | To get started, download the Spark to SQL DB connector from the [azure-sqldb-spark repository](https://github.com/Azure/azure-sqldb-spark) on GitHub.
7 |
8 | ## Official Supported Versions
9 |
10 | | Component | Version |
11 | | :----------------------------------- | :----------------------- |
12 | | Apache Spark | 2.0.2 or later |
13 | | Scala | 2.10 or later |
14 | | Microsoft JDBC Driver for SQL Server | 6.2 or later |
15 | | Microsoft SQL Server | SQL Server 2008 or later |
16 | | Azure SQL Database | Supported |
17 |
18 | The Spark connector for Azure SQL Database and SQL Server utilizes the Microsoft JDBC Driver for SQL Server to move data between Spark worker nodes and SQL databases:
19 |
20 | The dataflow is as following:
21 | 1. The Spark master node connect to SQL Server or Azure SQL Database and load data from a specific table or using a specific SQL query
22 | 2. Spark master node distribute data to worker nodes for transformation.
23 | 3. Worker node connect to SQL Server or Azure SQL Database and write data to the database. User can choose to use row-by-row insertion or bulk insert.
24 |
25 | ### Build the Spark to SQL DB connector
26 | Currently, the connector project uses maven. To build the connector without dependencies, you can run:
27 | mvn clean package
28 | You can also download the latest versions of the JAR from the release folder
29 | Include the SQL DB Spark JAR
30 |
31 | ## Connect Spark to SQL DB using the connector
32 | You can connect to Azure SQL Database or SQL Server from Spark jobs, read or write data. You can also run a DML or DDL query in an Azure SQL database or SQL Server database.
33 |
34 | ### Read data from Azure SQL Database or SQL Server
35 |
36 | ```scala
37 | import com.microsoft.azure.sqldb.spark.config.Config
38 | import com.microsoft.azure.sqldb.spark.connect._
39 |
40 | val config = Config(Map(
41 | "url" -> "mysqlserver.database.windows.net",
42 | "databaseName" -> "MyDatabase",
43 | "dbTable" -> "dbo.Clients"
44 | "user" -> "username",
45 | "password" -> "*********",
46 | "connectTimeout" -> "5", //seconds
47 | "queryTimeout" -> "5" //seconds
48 | ))
49 |
50 | val collection = sqlContext.read.sqlDB(config)
51 | collection.show()
52 | ```
53 | ### Reading data from Azure SQL Database or SQL Server with specified SQL query
54 | ```scala
55 | import com.microsoft.azure.sqldb.spark.config.Config
56 | import com.microsoft.azure.sqldb.spark.connect._
57 |
58 | val config = Config(Map(
59 | "url" -> "mysqlserver.database.windows.net",
60 | "databaseName" -> "MyDatabase",
61 | "queryCustom" -> "SELECT TOP 100 * FROM dbo.Clients WHERE PostalCode = 98074" //Sql query
62 | "user" -> "username",
63 | "password" -> "*********",
64 | ))
65 |
66 | //Read all data in table dbo.Clients
67 | val collection = sqlContext.read.sqlDB(config)
68 | collection.show()
69 | ```
70 |
71 | ### Write data to Azure SQL Database or SQL Server
72 | ```scala
73 | import com.microsoft.azure.sqldb.spark.config.Config
74 | import com.microsoft.azure.sqldb.spark.connect._
75 |
76 | // Aquire a DataFrame collection (val collection)
77 |
78 | val config = Config(Map(
79 | "url" -> "mysqlserver.database.windows.net",
80 | "databaseName" -> "MyDatabase",
81 | "dbTable" -> "dbo.Clients"
82 | "user" -> "username",
83 | "password" -> "*********"
84 | ))
85 |
86 | import org.apache.spark.sql.SaveMode
87 | collection.write.mode(SaveMode.Append).sqlDB(config)
88 | ```
89 |
90 | ### Run DML or DDL query in Azure SQL Database or SQL Server
91 | ```scala
92 | import com.microsoft.azure.sqldb.spark.config.Config
93 | import com.microsoft.azure.sqldb.spark.query._
94 | val query = """
95 | |UPDATE Customers
96 | |SET ContactName = 'Alfred Schmidt', City = 'Frankfurt'
97 | |WHERE CustomerID = 1;
98 | """.stripMargin
99 |
100 | val config = Config(Map(
101 | "url" -> "mysqlserver.database.windows.net",
102 | "databaseName" -> "MyDatabase",
103 | "user" -> "username",
104 | "password" -> "*********",
105 | "queryCustom" -> query
106 | ))
107 |
108 | sqlContext.SqlDBQuery(config)
109 | ```
110 |
111 | ## Connect Spark to Azure SQL Database using AAD authentication
112 | You can connect to Azure SQL Database using Azure Active Directory (AAD) authentication. Use AAD authentication to centrally manage identities of database users and as an alternative to SQL Server authentication.
113 | ### Connecting using ActiveDirectoryPassword Authentication Mode
114 | #### Setup Requirement
115 | If you are using the ActiveDirectoryPassword authentication mode you will need to download [azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java) and its dependencies, and include them in the Java build path.
116 |
117 | ```scala
118 | import com.microsoft.azure.sqldb.spark.config.Config
119 | import com.microsoft.azure.sqldb.spark.connect._
120 |
121 | val config = Config(Map(
122 | "url" -> "mysqlserver.database.windows.net",
123 | "databaseName" -> "MyDatabase",
124 | "user" -> "username",
125 | "password" -> "*********",
126 | "authentication" -> "ActiveDirectoryPassword",
127 | "encrypt" -> "true"
128 | ))
129 |
130 | val collection = sqlContext.read.sqlDB(config)
131 | collection.show()
132 | ```
133 |
134 | ### Connecting using Access Token
135 | #### Setup Requirement
136 | If you are using the access token based authentication mode, you will need to download [azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java) and its dependencies, and include them in the Java build path.
137 |
138 | See [Use Azure Active Directory Authentication for authentication with SQL Database](https://docs.microsoft.com/en-us/azure/sql-database/sql-database-aad-authentication) to learn how to get access token to your Azure SQL database.
139 |
140 | ```scala
141 | import com.microsoft.azure.sqldb.spark.config.Config
142 | import com.microsoft.azure.sqldb.spark.connect._
143 |
144 | val config = Config(Map(
145 | "url" -> "mysqlserver.database.windows.net",
146 | "databaseName" -> "MyDatabase",
147 | "accessToken" -> "access_token",
148 | "hostNameInCertificate" -> "*.database.windows.net",
149 | "encrypt" -> "true"
150 | ))
151 |
152 | val collection = sqlContext.read.sqlDB(config)
153 | collection.show()
154 | ```
155 |
156 | ## Write data to Azure SQL database or SQL Server using Bulk Insert
157 | The traditional jdbc connector writes data into Azure SQL database or SQL Server using row-by-row insertion. You can use Spark to SQL DB connector to write data to SQL database using bulk insert. It will significantly improve the write performance when loading large data sets or loading data into tables where column store index is used.
158 |
159 | ```scala
160 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
161 | import com.microsoft.azure.sqldb.spark.config.Config
162 | import com.microsoft.azure.sqldb.spark.connect._
163 |
164 | /**
165 | Add column Metadata.
166 | If not specified, metadata will be automatically added
167 | from the destination table, which may suffer performance.
168 | */
169 | var bulkCopyMetadata = new BulkCopyMetadata
170 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0)
171 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 50, 0)
172 | bulkCopyMetadata.addColumnMetadata(3, "LastName", java.sql.Types.NVARCHAR, 50, 0)
173 |
174 | val bulkCopyConfig = Config(Map(
175 | "url" -> "mysqlserver.database.windows.net",
176 | "databaseName" -> "MyDatabase",
177 | "user" -> "username",
178 | "password" -> "*********",
179 | "databaseName" -> "zeqisql",
180 | "dbTable" -> "dbo.Clients",
181 | "bulkCopyBatchSize" -> "2500",
182 | "bulkCopyTableLock" -> "true",
183 | "bulkCopyTimeout" -> "600"
184 | ))
185 |
186 | df.bulkCopyToSqlDB(bulkCopyConfig, bulkCopyMetadata)
187 | //df.bulkCopyToSqlDB(bulkCopyConfig) if no metadata is specified.
188 | ```
189 |
190 | ## Next steps
191 | If you haven't already, download the Spark connector for Azure SQL Database and SQL Server from [azure-sqldb-spark GitHub repository](https://github.com/Azure/azure-sqldb-spark) and explore the additional resources in the repo:
192 |
193 | - [Sample Azure Databricks notebooks](https://github.com/Azure/azure-sqldb-spark/tree/master/samples/notebooks)
194 | - [Sample scripts (Scala)](https://github.com/Azure/azure-sqldb-spark/tree/master/samples/scripts)
195 |
196 | You might also want to review the [Apache Spark SQL, DataFrames, and Datasets Guide](http://spark.apache.org/docs/latest/sql-programming-guide.html) and the [Azure Databricks documentation](https://docs.microsoft.com/en-us/azure/azure-databricks/).
197 |
198 |
--------------------------------------------------------------------------------
/docs/images/spark_sqldb_dataflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/docs/images/spark_sqldb_dataflow.png
--------------------------------------------------------------------------------
/lib/mssql-jdbc-6.2.2.jre8.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/lib/mssql-jdbc-6.2.2.jre8.jar
--------------------------------------------------------------------------------
/lib/scalastyle_config.xml:
--------------------------------------------------------------------------------
1 |
2 | Scalastyle standard configuration
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 | Avoid print line, use logger.
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 | com.microsoft.azure
7 | azure-sqldb-spark
8 | jar
9 | 1.0.0
10 | ${project.groupId}:${project.artifactId}
11 | Spark Connector for Microsoft Azure SQL Database and SQL Server
12 |
13 |
14 | MIT License
15 | http://www.opensource.org/licenses/mit-license.php
16 |
17 |
18 |
19 | UTF-8
20 | 2.11
21 | 2.11.8
22 | 2.2.2
23 |
24 |
25 |
26 | org.scala-lang
27 | scala-library
28 | ${scala.version}
29 |
30 |
31 | junit
32 | junit
33 | 4.8.1
34 | test
35 |
36 |
37 | org.apache.spark
38 | spark-core_${scala.binary.version}
39 | ${spark.version}
40 | provided
41 |
42 |
43 | org.apache.spark
44 | spark-sql_${scala.binary.version}
45 | ${spark.version}
46 | provided
47 |
48 |
49 | org.scalactic
50 | scalactic_${scala.binary.version}
51 | 3.0.4
52 |
53 |
54 | org.scalatest
55 | scalatest_${scala.binary.version}
56 | 3.0.4
57 | test
58 |
59 |
60 | com.microsoft.azure
61 | adal4j
62 | 1.2.0
63 |
64 |
65 | com.microsoft.sqlserver
66 | mssql-jdbc
67 | 6.4.0.jre8
68 |
69 |
70 |
71 |
72 | Azure SQL DB Devs
73 | Microsoft
74 | http://www.microsoft.com/
75 |
76 |
77 |
78 |
79 | ossrh
80 | https://oss.sonatype.org/content/repositories/snapshots
81 |
82 |
83 | ossrh
84 | https://oss.sonatype.org/service/local/staging/deploy/maven2/
85 |
86 |
87 |
88 |
89 |
90 | org.codehaus.mojo
91 | build-helper-maven-plugin
92 | 3.0.0
93 |
94 |
95 | generate-sources
96 |
97 | add-source
98 |
99 |
100 |
101 | src/main/java
102 | src/main/scala
103 |
104 |
105 |
106 |
107 | add-test-source
108 | generate-test-sources
109 |
110 | add-test-source
111 |
112 |
113 |
114 | src/test/java
115 | src/test/scala
116 |
117 |
118 |
119 |
120 |
121 |
122 | org.apache.maven.plugins
123 | maven-assembly-plugin
124 | 3.0.0
125 |
126 |
127 | jar-with-dependencies
128 |
129 |
130 |
131 |
132 | make-assembly
133 | package
134 |
135 | single
136 |
137 |
138 |
139 |
140 |
141 | org.apache.maven.plugins
142 | maven-javadoc-plugin
143 |
144 |
145 | org.apache.maven.plugins
146 | maven-source-plugin
147 | 3.0.1
148 |
149 |
150 | attach-sources
151 | verify
152 |
153 | jar-no-fork
154 |
155 |
156 |
157 |
158 |
159 | org.sonatype.plugins
160 | nexus-staging-maven-plugin
161 | 1.6.8
162 | true
163 |
164 | ossrh
165 | https://oss.sonatype.org/
166 | false
167 |
168 |
169 |
170 | org.scalastyle
171 | scalastyle-maven-plugin
172 | 1.0.0
173 |
174 | false
175 | true
176 | true
177 | false
178 | ${project.basedir}/src/main/scala
179 | ${project.basedir}/src/test/scala
180 | ${project.basedir}/lib/scalastyle_config.xml
181 | UTF-8
182 |
183 |
184 |
185 |
186 | check
187 |
188 |
189 |
190 |
191 |
192 | maven-compiler-plugin
193 | 3.7.0
194 |
195 | 1.8
196 | 1.8
197 |
198 |
199 |
200 | net.alchim31.maven
201 | scala-maven-plugin
202 | 3.2.2
203 |
204 | false
205 | ${scala.version}
206 |
207 | -deprecation
208 | -feature
209 |
210 |
211 |
212 |
213 | scala-compile-first
214 | process-resources
215 |
216 | add-source
217 |
218 |
219 |
220 | scala-compile
221 |
222 | compile
223 |
224 | compile
225 |
226 |
227 | test-compile
228 |
229 | testCompile
230 |
231 | test-compile
232 |
233 |
234 | scala-doc
235 | prepare-package
236 |
237 | doc
238 | doc-jar
239 |
240 |
241 |
242 |
243 |
244 | org.scalatest
245 | scalatest-maven-plugin
246 | 1.0
247 |
248 | ${project.build.directory}/surefire-reports
249 | .
250 | TestSuite.txt
251 | W
252 |
253 |
254 |
255 | scala-test
256 |
257 | test
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
--------------------------------------------------------------------------------
/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0-uber.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0-uber.jar
--------------------------------------------------------------------------------
/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0.jar
--------------------------------------------------------------------------------
/samples/notebooks/Spark Connector for Azure SQL Databases and SQL Server.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Spark Connector for Azure SQL Databases and SQL Server - Databricks
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
21 |
24 |
25 |
26 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/samples/scripts/BulkCopySample.scala:
--------------------------------------------------------------------------------
1 | // Import libraries
2 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
3 | import com.microsoft.azure.sqldb.spark.config.Config
4 | import com.microsoft.azure.sqldb.spark.connect._
5 |
6 | val url = "[Enter your url here]"
7 | val databaseName = "[Enter your database name here]"
8 | val dbTable = "[Enter your database table here]"
9 |
10 | val user = "[Enter your username here]"
11 | val password = "[Enter your password here]"
12 |
13 | // Acquire data to be written.
14 | // df could be aquired in any way.
15 | val localTable = "[Enter your local persisted table here]"
16 | val df = spark.sql(s"SELECT * FROM $localTable")
17 |
18 | val writeConfig = Config(Map(
19 | "url" -> url,
20 | "databaseName" -> databaseName,
21 | "dbTable" -> dbTable,
22 | "user" -> user,
23 | "password" -> password,
24 | "connectTimeout" -> "5",
25 | "bulkCopyBatchSize" -> "100000",
26 | "bulkCopyTableLock" -> "true",
27 | "bulkCopyTimeout" -> "600"
28 | ))
29 |
30 | df.bulkCopyToSqlDB(writeConfig)
31 |
32 | /**
33 | For better performance, specify the column metadata of the table
34 |
35 | var bulkCopyMetadata = new BulkCopyMetadata
36 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0)
37 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 128, 0)
38 | bulkCopyMetadata.addColumnMetadata(3, "MiddleName", java.sql.Types.NVARCHAR, 128, 0)
39 | bulkCopyMetadata.addColumnMetadata(4, "LastName", java.sql.Types.NVARCHAR, 128, 0)
40 | ..........
41 |
42 | df.bulkCopyToSqlDB(writeConfig, bulkCopyMetadata)
43 | **/
--------------------------------------------------------------------------------
/samples/scripts/ReadSample.scala:
--------------------------------------------------------------------------------
1 | // Import libraries
2 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
3 | import com.microsoft.azure.sqldb.spark.config.Config
4 | import com.microsoft.azure.sqldb.spark.connect._
5 |
6 | val url = "[Enter your url here]"
7 | val databaseName = "[Enter your database name here]"
8 | val dbTable = "[Enter your database table here]"
9 |
10 | val user = "[Enter your username here]"
11 | val password = "[Enter your password here]"
12 |
13 | // READ FROM CONFIG
14 | val readConfig = Config(Map(
15 | "url" -> url,
16 | "databaseName" -> databaseName,
17 | "user" -> user,
18 | "password" -> password,
19 | "connectTimeout" -> "5",
20 | "queryTimeout" -> "5",
21 | "dbTable" -> dbTable
22 | ))
23 |
24 | val df = sqlContext.read.sqlDB(readConfig)
25 | println("Total rows: " + df.count)
26 | df.show()
27 |
28 | // TRADITIONAL SYNTAX
29 | import java.util.Properties
30 |
31 | val properties = new Properties()
32 | properties.put("databaseName", databaseName)
33 | properties.put("user", user)
34 | properties.put("password", password)
35 | properties.put("connectTimeout", "5")
36 | properties.put("queryTimeout", "5")
37 |
38 | val df = sqlContext.read.sqlDB(url, dbTable, properties)
39 | println("Total rows: " + df.count)
40 | df.show()
41 |
--------------------------------------------------------------------------------
/samples/scripts/WriteSample.scala:
--------------------------------------------------------------------------------
1 | // Import libraries
2 | import com.microsoft.azure.sqldb.spark.config.Config
3 | import com.microsoft.azure.sqldb.spark.connect._
4 |
5 | val url = "[Enter your url here]"
6 | val databaseName = "[Enter your database name here]"
7 | val dbTable = "[Enter your database table here]"
8 |
9 | val user = "[Enter your username here]"
10 | val password = "[Enter your password here]"
11 |
12 | // Acquire data to be written.
13 | // df could be aquired in any way.
14 | val localTable = "[Enter your local persisted table here]"
15 | val df = spark.sql(s"SELECT * FROM $localTable")
16 |
17 | // WRITE FROM CONFIG
18 | val writeConfig = Config(Map(
19 | "url" -> url,
20 | "databaseName" -> databaseName,
21 | "dbTable" -> dbTable,
22 | "user" -> user,
23 | "password" -> password,
24 | "connectTimeout" -> "5",
25 | "queryTimeout" -> "5"
26 | ))
27 |
28 | df.write.mode(SaveMode.Append).sqlDB(writeConfig)
29 |
30 | /** TRADITIONAL SYNTAX
31 |
32 | import java.util.Properties
33 |
34 | val properties = new Properties()
35 |
36 | properties.put("databaseName", databaseName)
37 | properties.put("user", user)
38 | properties.put("password", password)
39 | properties.put("connectTimeout", "5")
40 | properties.put("queryTimeout", "5")
41 |
42 | df.write.mode(SaveMode.Append).sqlDB(url, dbTable, properties)
43 |
44 | **/
--------------------------------------------------------------------------------
/src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/BulkCopyMetadata.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import java.io.Serializable;
26 | import java.time.format.DateTimeFormatter;
27 | import java.util.HashMap;
28 | import java.util.Map;
29 |
30 | /**
31 | * Public class for users to add column metadata manually
32 | */
33 | public class BulkCopyMetadata implements Serializable {
34 |
35 | private Map metadata;
36 |
37 | public BulkCopyMetadata() {
38 | this.metadata = new HashMap<>();
39 | }
40 |
41 | public void addColumnMetadata(
42 | int column,
43 | String name,
44 | int jdbcType,
45 | int precision,
46 | int scale) {
47 | addColumnMetadataInternal(column, name, jdbcType, precision, scale, null);
48 | }
49 |
50 | public void addColumnMetadata(
51 | int column,
52 | String name,
53 | int jdbcType,
54 | int precision,
55 | int scale,
56 | DateTimeFormatter dateTimeFormatter) {
57 | addColumnMetadataInternal(column, name, jdbcType, precision, scale, dateTimeFormatter);
58 | }
59 |
60 | Map getMetadata() {
61 | return metadata;
62 | }
63 |
64 | private void addColumnMetadataInternal(
65 | int column,
66 | String name,
67 | int jdbcType,
68 | int precision,
69 | int scale,
70 | DateTimeFormatter dateTimeFormatter) {
71 |
72 | switch (jdbcType) {
73 | /*
74 | * SQL Server supports numerous string literal formats for temporal types, hence sending them as varchar with approximate
75 | * precision(length) needed to send supported string literals. string literal formats supported by temporal types are available in MSDN
76 | * page on data types.
77 | */
78 | case java.sql.Types.DATE:
79 | case java.sql.Types.TIME:
80 | case java.sql.Types.TIMESTAMP:
81 | case microsoft.sql.Types.DATETIMEOFFSET:
82 | // The precision is just a number long enough to hold all types of temporal data, doesn't need to be exact.
83 | metadata.put(column, new ColumnMetadata(name, jdbcType, 50, scale, dateTimeFormatter));
84 | break;
85 |
86 | // Redirect SQLXML as LONGNVARCHAR, SQLXML is not valid type in TDS
87 | case java.sql.Types.SQLXML:
88 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.LONGNVARCHAR, precision, scale, dateTimeFormatter));
89 | break;
90 |
91 | // Redirecting Float as Double based on data type mapping
92 | case java.sql.Types.FLOAT:
93 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.DOUBLE, precision, scale, dateTimeFormatter));
94 | break;
95 |
96 | // Redirecting BOOLEAN as BIT
97 | case java.sql.Types.BOOLEAN:
98 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.BIT, precision, scale, dateTimeFormatter));
99 | break;
100 |
101 | default:
102 | metadata.put(column, new ColumnMetadata(name, jdbcType, precision, scale, dateTimeFormatter));
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/ColumnMetadata.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import java.io.Serializable;
26 | import java.time.format.DateTimeFormatter;
27 |
28 | /**
29 | * Class to represent the column metadata
30 | */
31 | class ColumnMetadata implements Serializable {
32 | private String columnName;
33 | private int columnType;
34 | private int precision;
35 | private int scale;
36 | private DateTimeFormatter dateTimeFormatter;
37 |
38 | ColumnMetadata(String name, int type, int precision, int scale, DateTimeFormatter dateTimeFormatter) {
39 | this.columnName = name;
40 | this.columnType = type;
41 | this.precision = precision;
42 | this.scale = scale;
43 | this.dateTimeFormatter = dateTimeFormatter;
44 | }
45 |
46 | String getColumnName(){
47 | return columnName;
48 | }
49 |
50 | int getColumnType(){
51 | return columnType;
52 | }
53 |
54 | int getPrecision(){
55 | return precision;
56 | }
57 |
58 | int getScale(){
59 | return scale;
60 | }
61 |
62 | DateTimeFormatter getDateTimeFormatter(){
63 | return dateTimeFormatter;
64 | }
65 | }
--------------------------------------------------------------------------------
/src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerBulkDataFrameFileRecord.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import com.microsoft.sqlserver.jdbc.ISQLServerBulkRecord;
26 | import com.microsoft.sqlserver.jdbc.SQLServerException;
27 | import com.microsoft.sqlserver.jdbc.SQLServerResource;
28 | import org.apache.spark.sql.Row;
29 | import scala.collection.Iterator;
30 |
31 | import java.sql.JDBCType;
32 | import java.sql.Types;
33 | import java.text.MessageFormat;
34 | import java.time.OffsetTime;
35 | import java.time.format.DateTimeFormatter;
36 | import java.util.Map;
37 | import java.util.Set;
38 |
39 | /**
40 | * Class to allow SQLServerBulkCopy to write data to SQL Server Tables from Spark DataFrames
41 | */
42 | public class SQLServerBulkDataFrameFileRecord implements ISQLServerBulkRecord, java.lang.AutoCloseable {
43 |
44 | private Iterator iterator;
45 |
46 | private Map columnMetadata;
47 |
48 | public SQLServerBulkDataFrameFileRecord(Iterator iterator, BulkCopyMetadata metadata) {
49 | this.iterator = iterator;
50 | this.columnMetadata = metadata.getMetadata();
51 | }
52 |
53 | public DateTimeFormatter getDateTimeFormatter(int column) {
54 | return columnMetadata.get(column).getDateTimeFormatter();
55 | }
56 |
57 | @Override
58 | public void close() throws SQLServerException {
59 | // nothing to close
60 | }
61 |
62 | @Override
63 | public String getColumnName(int column) {
64 | return columnMetadata.get(column).getColumnName();
65 | }
66 |
67 | @Override
68 | public Set getColumnOrdinals() {
69 | return columnMetadata.keySet();
70 | }
71 |
72 | @Override
73 | public int getColumnType(int column) {
74 | return columnMetadata.get(column).getColumnType();
75 | }
76 |
77 | @Override
78 | public int getPrecision(int column) {
79 | return columnMetadata.get(column).getPrecision();
80 | }
81 |
82 | @Override
83 | public Object[] getRowData() throws SQLServerException {
84 | Row row = iterator.next();
85 | Object[] rowData = new Object[row.length()];
86 |
87 | // Keys of the columnMetadata is a database table column with index starting from 1.
88 | // rowData is an array with index starting from 0.
89 | for (Map.Entry pair : columnMetadata.entrySet()) {
90 | ColumnMetadata cm = pair.getValue();
91 |
92 | try {
93 | switch (cm.getColumnType()){
94 | case Types.TIME_WITH_TIMEZONE:
95 | case Types.TIMESTAMP_WITH_TIMEZONE: {
96 | OffsetTime offsetTimeValue;
97 |
98 | if (cm.getDateTimeFormatter() != null)
99 | offsetTimeValue = OffsetTime.parse(row.get(pair.getKey() - 1).toString(), cm.getDateTimeFormatter());
100 | else
101 | offsetTimeValue = OffsetTime.parse(row.get(pair.getKey() - 1).toString());
102 |
103 | rowData[pair.getKey() - 1] = offsetTimeValue;
104 | break;
105 | }
106 |
107 | case Types.NULL: {
108 | rowData[pair.getKey() - 1] = null;
109 | break;
110 | }
111 |
112 | default: {
113 | rowData[pair.getKey() - 1] = row.get(pair.getKey() - 1);
114 | break;
115 | }
116 | }
117 | } catch (IllegalArgumentException illegalArgumentException) {
118 | String value = "'" + row.get(pair.getKey() - 1) + "'";
119 | MessageFormat form = new MessageFormat(getSQLServerExceptionErrorMsg("R_errorConvertingValue"));
120 | String errText = form.format(new Object[]{value, JDBCType.valueOf(cm.getColumnType()).getName()});
121 |
122 | try {
123 | throw SQLServerExceptionReflection.throwSQLServerException(errText, null, 0, illegalArgumentException);
124 | } catch (Exception e) {
125 | throw new IllegalArgumentException(errText, illegalArgumentException);
126 | }
127 | } catch (ArrayIndexOutOfBoundsException arrayOutOfBoundsException) {
128 | String errText = getSQLServerExceptionErrorMsg("R_schemaMismatch");
129 |
130 | try {
131 | throw SQLServerExceptionReflection.throwSQLServerException(errText, arrayOutOfBoundsException);
132 | } catch (Exception e) {
133 | throw new ArrayIndexOutOfBoundsException(errText);
134 | }
135 | }
136 | }
137 |
138 | return rowData;
139 | }
140 |
141 | @Override
142 | public int getScale(int column) {
143 | return columnMetadata.get(column).getScale();
144 | }
145 |
146 | @Override
147 | public boolean isAutoIncrement(int column) {
148 | return false;
149 | }
150 |
151 | @Override
152 | public boolean next() throws SQLServerException {
153 | return iterator.hasNext();
154 | }
155 |
156 | private String getSQLServerExceptionErrorMsg(String type) {
157 | return SQLServerResource.getBundle("com.microsoft.sqlserver.jdbc.SQLServerResource").getString(type);
158 | }
159 | }
160 |
--------------------------------------------------------------------------------
/src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerExceptionReflection.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import com.microsoft.sqlserver.jdbc.SQLServerException;
26 | import java.lang.reflect.Constructor;
27 |
28 | /**
29 | * ISQLServerBulkRecord requires some inherited methods to throw SQLServerException.
30 | * Prior to MS SQL JDBC v6.4, the SQLServerException class was only package accessible.
31 | * This class uses reflection in order to access SQLServerException for earlier versions of the JDBC driver.
32 | */
33 | public class SQLServerExceptionReflection{
34 |
35 | public static SQLServerException throwSQLServerException(String errText, Throwable clause) throws Exception {
36 | Constructor constructor
37 | = SQLServerException.class.getDeclaredConstructor(String.class, Throwable.class);
38 | constructor.setAccessible(true);
39 | return constructor.newInstance(errText, clause);
40 | }
41 |
42 | public static SQLServerException throwSQLServerException(
43 | String errText,
44 | String errState,
45 | int errNum,
46 | Throwable clause) throws Exception {
47 |
48 | Constructor constructor
49 | = SQLServerException.class.getDeclaredConstructor(String.class, String.class, int.class, Throwable.class);
50 | constructor.setAccessible(true);
51 | return constructor.newInstance(errText, errState, errNum, clause);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/Logging.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark
24 |
25 | private[spark] abstract class Logging extends LoggingTrait
26 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/LoggingTrait.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark
24 |
25 | import org.slf4j.{Logger, LoggerFactory}
26 |
27 | private[spark] trait LoggingTrait {
28 |
29 | // Make the log field transient so that objects with Logging can
30 | // be serialized and used on another machine
31 | @transient private var log_ : Logger = null // scalastyle:ignore
32 |
33 | // Method to get the logger name for this object
34 | protected def logName = {
35 | // Ignore trailing $'s in the class names for Scala objects
36 | this.getClass.getName.stripSuffix("$")
37 | }
38 |
39 | // Method to get or create the logger for this object
40 | protected def log: Logger = {
41 | if (log_ == null) {
42 | // scalastyle:ignore
43 | log_ = LoggerFactory.getLogger(logName)
44 | }
45 | log_
46 | }
47 |
48 | // Log methods that take only a String
49 | protected def logInfo(msg: => String) {
50 | if (log.isInfoEnabled) log.info(msg)
51 | }
52 |
53 | protected def logDebug(msg: => String) {
54 | if (log.isDebugEnabled) log.debug(msg)
55 | }
56 |
57 | protected def logTrace(msg: => String) {
58 | if (log.isTraceEnabled) log.trace(msg)
59 | }
60 |
61 | protected def logWarning(msg: => String) {
62 | if (log.isWarnEnabled) log.warn(msg)
63 | }
64 |
65 | protected def logError(msg: => String) {
66 | if (log.isErrorEnabled) log.error(msg)
67 | }
68 |
69 | // Log methods that take Throwables (Exceptions/Errors) too
70 | protected def logInfo(msg: => String, throwable: Throwable) {
71 | if (log.isInfoEnabled) log.info(msg, throwable)
72 | }
73 |
74 | protected def logDebug(msg: => String, throwable: Throwable) {
75 | if (log.isDebugEnabled) log.debug(msg, throwable)
76 | }
77 |
78 | protected def logTrace(msg: => String, throwable: Throwable) {
79 | if (log.isTraceEnabled) log.trace(msg, throwable)
80 | }
81 |
82 | protected def logWarning(msg: => String, throwable: Throwable) {
83 | if (log.isWarnEnabled) log.warn(msg, throwable)
84 | }
85 |
86 | protected def logError(msg: => String, throwable: Throwable) {
87 | if (log.isErrorEnabled) log.error(msg, throwable)
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/bulk/BulkCopyUtils.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulk
24 |
25 | import java.sql.{Connection, ResultSetMetaData}
26 |
27 | import com.microsoft.azure.sqldb.spark.LoggingTrait
28 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata
29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
30 | import com.microsoft.sqlserver.jdbc.SQLServerBulkCopyOptions
31 |
32 | import scala.util.control.NonFatal
33 |
34 | /**
35 | * Helper and utility methods used for setting up a Bulk Copy transaction.
36 | */
37 | private[spark] object BulkCopyUtils extends LoggingTrait {
38 |
39 | /**
40 | * Database table columns start at index 1.
41 | */
42 | val ColumnStartingIndex = 1
43 |
44 | /**
45 | * Extracts column names, types, precision and scale from a [[ResultSetMetaData]]
46 | * and creates a [[BulkCopyMetadata]] object.
47 | *
48 | * @param resultSetMetaData metadata from an external database table.
49 | * @return BulkCopyMetadata with the extracted column metadata.
50 | */
51 | def createBulkCopyMetadata(resultSetMetaData: ResultSetMetaData): BulkCopyMetadata = {
52 | val bulkCopyMetadata = new BulkCopyMetadata
53 |
54 | for (column <- ColumnStartingIndex to resultSetMetaData.getColumnCount) {
55 | bulkCopyMetadata.addColumnMetadata(
56 | column,
57 | resultSetMetaData.getColumnName(column),
58 | resultSetMetaData.getColumnType(column),
59 | resultSetMetaData.getPrecision(column),
60 | resultSetMetaData.getScale(column)
61 | )
62 | }
63 |
64 | bulkCopyMetadata
65 | }
66 |
67 | /**
68 | * Extracts Bulk Copy properties from Config and creates [[SQLServerBulkCopyOptions]].
69 | * Will use default values if not specified.
70 | *
71 | * @param config the Config object with specified bulk copy properties
72 | * @return [[SQLServerBulkCopyOptions]] for the JDBC Bulk Copy API
73 | */
74 | def getBulkCopyOptions(config: Config): SQLServerBulkCopyOptions = {
75 | val copyOptions = new SQLServerBulkCopyOptions
76 |
77 | copyOptions.setBatchSize(
78 | config.get[String](SqlDBConfig.BulkCopyBatchSize)
79 | .getOrElse(SqlDBConfig.BulkCopyBatchSizeDefault.toString).toInt
80 | )
81 | copyOptions.setBulkCopyTimeout(
82 | config.get[String](SqlDBConfig.BulkCopyTimeout)
83 | .getOrElse(SqlDBConfig.BulkCopyTimeoutDefault.toString).toInt
84 | )
85 | copyOptions.setCheckConstraints(
86 | config.get[String](SqlDBConfig.BulkCopyCheckConstraints)
87 | .getOrElse(SqlDBConfig.BulkCopyCheckConstraintsDefault.toString).toBoolean
88 | )
89 | copyOptions.setFireTriggers(
90 | config.get[String](SqlDBConfig.BulkCopyFireTriggers)
91 | .getOrElse(SqlDBConfig.BulkCopyFireTriggersDefault.toString).toBoolean
92 | )
93 | copyOptions.setKeepIdentity(
94 | config.get[String](SqlDBConfig.BulkCopyKeepIdentity)
95 | .getOrElse(SqlDBConfig.BulkCopyKeepIdentityDefault.toString).toBoolean
96 | )
97 | copyOptions.setKeepNulls(
98 | config.get[String](SqlDBConfig.BulkCopyKeepNulls)
99 | .getOrElse(SqlDBConfig.BulkCopyKeepNullsDefault.toString).toBoolean
100 | )
101 | copyOptions.setTableLock(
102 | config.get[String](SqlDBConfig.BulkCopyTableLock)
103 | .getOrElse(SqlDBConfig.BulkCopyTableLockDefault.toString).toBoolean
104 | )
105 | copyOptions.setUseInternalTransaction(
106 | config.get[String](SqlDBConfig.BulkCopyUseInternalTransaction)
107 | .getOrElse(SqlDBConfig.BulkCopyUseInternalTransactionDefault.toString).toBoolean
108 | )
109 | copyOptions.setAllowEncryptedValueModifications(
110 | config.get[String](SqlDBConfig.BulkCopyAllowEncryptedValueModifications)
111 | .getOrElse(SqlDBConfig.BulkCopyAllowEncryptedValueModificationsDefault.toString).toBoolean
112 | )
113 |
114 | copyOptions
115 | }
116 |
117 | /**
118 | * Retrieves table columns and metadata from remote database
119 | *
120 | * @param table the table to retrieve column metadata
121 | * @param connection the active JDBC connection
122 | * @return the [[ResultSetMetaData]] of the executed query.
123 | */
124 | def getTableColumns(table: String, connection: Connection): ResultSetMetaData = {
125 | // A bit hacky, but this is the most efficient way.
126 | val statement = s"SELECT TOP 0 * FROM $table"
127 |
128 | connection.createStatement().executeQuery(statement).getMetaData
129 | }
130 |
131 | /**
132 | * Retrieves transaction support from remote database
133 | *
134 | * @param connection the active JDBC connection
135 | * @return true if the connected database support transactions, false otherwise
136 | */
137 | def getTransactionSupport(connection: Connection): Boolean ={
138 | var isolationLevel = Connection.TRANSACTION_NONE
139 | try {
140 | val metadata = connection.getMetaData
141 | if (metadata.supportsTransactions){
142 | isolationLevel = metadata.getDefaultTransactionIsolation
143 | }
144 | } catch {
145 | case NonFatal(e) => logWarning("Exception while detecting transaction support", e)
146 | }
147 |
148 | isolationLevel != Connection.TRANSACTION_NONE
149 | }
150 | }
151 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/config/Config.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.config
24 |
25 | import com.microsoft.azure.sqldb.spark.config.Config.Property
26 | import org.apache.spark.sql.SparkSession
27 | import org.apache.spark.{SparkConf, SparkContext}
28 |
29 | import scala.reflect.ClassTag
30 |
31 | /**
32 | * Abstract config builder, used to set a bunch of properties a build
33 | * a config object from them.
34 | *
35 | * @param properties Map of any-type properties.
36 | * @tparam Builder Current Builder implementation type.
37 | */
38 | abstract class ConfigBuilder[Builder <: ConfigBuilder[Builder]](val properties: Map[Property, Any] = Map()) extends Serializable {
39 | builder =>
40 |
41 | /**
42 | * Required properties to build a AzureDB config object.
43 | * At build time, if these properties are not set, an assert
44 | * exception will be thrown.
45 | */
46 | val requiredProperties: List[Property]
47 |
48 | /**
49 | * Instantiate a brand new Builder from given properties map
50 | *
51 | * @param props Map of any-type properties.
52 | * @return The new builder
53 | */
54 | def apply(props: Map[Property, Any]): Builder
55 |
56 | /**
57 | * Set (override if exists) a single property value given a new one.
58 | *
59 | * @param property Property to be set
60 | * @param value New value for given property
61 | * @tparam T Property type
62 | * @return A new builder that includes new value of the specified property
63 | */
64 | def set[T](property: Property, value: T): Builder =
65 | apply(properties + (property -> value))
66 |
67 | /**
68 | * Build the config object from current builder properties.
69 | *
70 | * @return The CosmosDB configuration object.
71 | */
72 | def build(): Config = new Config {
73 |
74 | val properties: Map[Property, Any] = builder.properties.map { case (k, v) => k.toLowerCase -> v }
75 | val reqProperties: List[Property] = requiredProperties.map(_.toLowerCase)
76 |
77 | require(
78 | reqProperties.forall(properties.isDefinedAt),
79 | s"Not all properties are defined! : ${reqProperties.diff(properties.keys.toList.intersect(requiredProperties))}"
80 | )
81 |
82 | require(
83 | properties.isDefinedAt(SqlDBConfig.QueryCustom.toLowerCase) ^
84 | properties.isDefinedAt(SqlDBConfig.DBTable.toLowerCase),
85 | "QueryCustom and dbTable both or none defined. Please define one property."
86 | )
87 |
88 | /**
89 | * Compare if two Configs have the same properties.
90 | *
91 | * @param other Object to compare
92 | * @return Boolean
93 | */
94 | override def equals(other: Any): Boolean = other match {
95 | case that: Config =>
96 | properties == that.properties
97 | case _ => false
98 | }
99 |
100 | override def hashCode(): Int = {
101 | val state = Seq(properties)
102 | state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
103 | }
104 | }
105 | }
106 |
107 | /**
108 | * SQL DB standard configuration object
109 | */
110 | trait Config extends Serializable {
111 |
112 | /**
113 | * Contained properties in configuration object
114 | */
115 | val properties: Map[Property, Any]
116 |
117 | def asOptions: collection.Map[String, String] = {
118 | properties.map { case (x, v) => x -> v.toString }
119 | }
120 |
121 | /** Returns the value associated with a key, or a default value if the key is not contained in the configuration object.
122 | *
123 | * @param key Desired property.
124 | * @param default Value in case no binding for `key` is found in the map.
125 | * @tparam T Result type of the default computation.
126 | * @return the value associated with `key` if it exists,
127 | * otherwise the result of the `default` computation.
128 | */
129 | def getOrElse[T](key: Property, default: => T): T = properties.get(key) match {
130 | case Some(v) => v.asInstanceOf[T]
131 | case None => default
132 | }
133 |
134 | /**
135 | * Gets specified property from current configuration object
136 | *
137 | * @param property Desired property
138 | * @tparam T Property expected value type.
139 | * @return An optional value of expected type
140 | */
141 | def get[T: ClassTag](property: Property): Option[T] =
142 | properties.get(property.toLowerCase).map(_.asInstanceOf[T])
143 |
144 | /**
145 | * Gets specified property from current configuration object
146 | *
147 | * @return An optional value of expected type
148 | */
149 | def getAllKeys: Set[_] = properties.keySet
150 |
151 | /**
152 | * Gets specified property from current configuration object.
153 | * It will fail if property is not previously set.
154 | *
155 | * @param property Desired property
156 | * @tparam T Property expected value type
157 | * @return Expected type value
158 | */
159 | def apply[T: ClassTag](property: Property): T = {
160 | get[T](property).get
161 | }
162 | }
163 |
164 | object Config {
165 |
166 | val configPrefix = "spark.sqldb."
167 |
168 | type Property = String
169 |
170 | /**
171 | * Defines how to act in case any parameter is not set
172 | *
173 | * @param key Key that couldn't be obtained
174 | * @tparam T Expected type (used to fit in 'getOrElse' cases).
175 | * @return Throws an IllegalStateException.
176 | */
177 | def notFound[T](key: String): T =
178 | throw new IllegalStateException(s"Parameter $key not specified")
179 |
180 | /**
181 | * Create a configuration from the `sparkContext`
182 | *
183 | * Uses the prefixed properties that are set in the Spark configuration to create the config.
184 | *
185 | * @see [[configPrefix]]
186 | * @param sparkContext the spark context
187 | * @return the configuration
188 | */
189 | def apply(sparkContext: SparkContext): Config = apply(sparkContext.getConf)
190 |
191 | /**
192 | * Create a configuration from the `sqlContext`
193 | *
194 | * Uses the prefixed properties that are set in the Spark configuration to create the config.
195 | *
196 | * @see [[configPrefix]]
197 | * @param sparkSession the SparkSession
198 | * @return the configuration
199 | */
200 | def apply(sparkSession: SparkSession): Config = apply(sparkSession.sparkContext.getConf)
201 |
202 | /**
203 | * Create a configuration from the `sparkConf`
204 | *
205 | * Uses the prefixed properties that are set in the Spark configuration to create the config.
206 | *
207 | * @see [[configPrefix]]
208 | * @param sparkConf the spark configuration
209 | * @return the configuration
210 | */
211 | def apply(sparkConf: SparkConf): Config = apply(sparkConf, Map.empty[String, String])
212 |
213 | /**
214 | * Create a configuration from the `sparkConf`
215 | *
216 | * Uses the prefixed properties that are set in the Spark configuration to create the config.
217 | *
218 | * @see [[configPrefix]]
219 | * @param sparkConf the spark configuration
220 | * @param options overloaded parameters
221 | * @return the configuration
222 | */
223 | def apply(sparkConf: SparkConf, options: collection.Map[String, String]): Config =
224 | apply(getOptionsFromConf(sparkConf) ++ stripPrefix(options))
225 |
226 | /**
227 | * Create a configuration from the values in the `Map`
228 | *
229 | * '''Note:''' Values in the map do not need to be prefixed with the [[configPrefix]].
230 | *
231 | * @param options a map of properties and their string values
232 | * @return the configuration
233 | */
234 | def apply(options: collection.Map[String, String]): Config = {
235 | apply(options, None)
236 | }
237 |
238 | /**
239 | * Create a configuration from the values in the `Map`, using the optional default configuration for any default values.
240 | *
241 | * '''Note:''' Values in the map do not need to be prefixed with the [[configPrefix]].
242 | *
243 | * @param options a map of properties and their string values
244 | * @param default the optional default configuration, used for determining the default values for the properties
245 | * @return the configuration
246 | */
247 | def apply(options: collection.Map[String, String], default: Option[Config]): Config = {
248 | var combine = options ++ {
249 | default match {
250 | case Some(value) => value.asOptions
251 | case None => Map.empty[String, String]
252 | }
253 | }
254 | var builder = SqlDBConfigBuilder(combine.asInstanceOf[Map[String, Any]])
255 |
256 | builder.build()
257 | }
258 |
259 | /**
260 | * Strip the prefix from options
261 | *
262 | * @param options options that may contain the prefix
263 | * @return prefixLess options
264 | */
265 | def stripPrefix(options: collection.Map[String, String]): collection.Map[String, String] =
266 | options.map(kv => (kv._1.toLowerCase.stripPrefix(configPrefix), kv._2))
267 |
268 | /**
269 | * Gets an options map from the `SparkConf`
270 | *
271 | * @param sparkConf the SparkConf
272 | * @return the options
273 | */
274 | def getOptionsFromConf(sparkConf: SparkConf): collection.Map[String, String] =
275 | stripPrefix(sparkConf.getAll.filter(_._1.startsWith(configPrefix)).toMap)
276 |
277 | protected def getInt(newValue: Option[String], existingValue: Option[Int] = None, defaultValue: Int): Int = {
278 | newValue match {
279 | case Some(value) => value.toInt
280 | case None => existingValue.getOrElse(defaultValue)
281 | }
282 | }
283 | }
284 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/config/SqlDBConfig.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.config
24 |
25 | /**
26 | * Values and Functions for access and parse the configuration parameters
27 | */
28 | object SqlDBConfig {
29 |
30 | // Parameter names (from JDBC connection properties)
31 | val AccessToken = "accessToken"
32 | val ApplicationIntent = "applicationIntent"
33 | val ApplicationName = "applicationName"
34 | val Authentication = "authentication"
35 | val AuthenticationScheme = "authenticationScheme"
36 | val ColumnEncryptionSetting = "columnEncryptionSetting"
37 | val ConnectTimeout = "connectTimeout"
38 | val Database = "database"
39 | val DatabaseName = "databaseName"
40 | val DBTable = "dbTable"
41 | val DisableStatementPooling = "disableStatementPooling"
42 | val Driver = "driver"
43 | val EnablePrepareOnFirstPreparedStatementCall = "enablePrepareOnFirstPreparedStatementCall"
44 | val Encrypt = "encrypt"
45 | val FailoverPartner = "failoverPartner"
46 | val Fips = "fips"
47 | val FipsProvider = "fipsProvider"
48 | val GSSCredential = "gsscredential"
49 | val HostNameInCertificate = "hostNameInCertificate"
50 | val InstanceName = "instanceName"
51 | val IntegratedSecurity = "integratedSecurity"
52 | val JaasConfigurationName = "jaasConfigurationName"
53 | val KeyStoreAuthentication = "keyStoreAuthentication"
54 | val KeyStoreLocation = "keyStoreLocation"
55 | val KeyStoreSecret = "keyStoreSecret"
56 | val LastUpdateCount = "lastUpdateCount"
57 | val LockTimeout = "lockTimeout"
58 | val LoginTimeout = "loginTimeout"
59 | val MultiSubnetFailover = "multiSubnetFailover"
60 | val PacketSize = "packetSize"
61 | val Password = "password"
62 | val PortNumber = "portNumber"
63 | val Port = "port"
64 | val QueryTimeout = "queryTimeout"
65 | val ResponseBuffering = "responseBuffering"
66 | val SelectMethod = "selectMethod"
67 | val SendStringParametersAsUnicode = "sendStringParametersAsUnicode"
68 | val SendTimeAsDatetime = "sendTimeAsDatetime"
69 | val ServerName = "serverName"
70 | val Server = "server"
71 | val ServerNameAsACE = "serverNameAsACE"
72 | val ServerPreparedStatementDiscardThreshold = "serverPreparedStatementDiscardThreshold"
73 | val ServerSpn = "serverSpn"
74 | val SocketTimeout = "socketTimeout"
75 | val TransparentNetworkIPResolution = "transparentNetworkIPResolution"
76 | val TrustServerCertificate = "trustServerCertificate"
77 | val TrustStore = "trustStore"
78 | val TrustStorePassword = "trustStorePassword"
79 | val TrustStoreType = "trustStoreType"
80 | val URL = "url"
81 | val User = "user"
82 | val WorkstationID = "workstationID"
83 | val XopenStates = "xopenStates"
84 |
85 | // Bulk Copy API Options
86 | val BulkCopyBatchSize = "bulkCopyBatchSize"
87 | val BulkCopyTimeout = "bulkCopyTimeout"
88 | val BulkCopyCheckConstraints = "bulkCopyCheckConstraints"
89 | val BulkCopyFireTriggers = "bulkCopyFireTriggers"
90 | val BulkCopyKeepIdentity = "bulkCopyKeepIdentity"
91 | val BulkCopyKeepNulls = "bulkCopyKeepNulls"
92 | val BulkCopyTableLock = "bulkCopyTableLock"
93 | val BulkCopyUseInternalTransaction = "bulkCopyUseInternalTransaction"
94 | val BulkCopyAllowEncryptedValueModifications = "bulkCopyAllowEncryptedValueModifications"
95 |
96 | // Bulk Copy API Default Settings
97 | val BulkCopyBatchSizeDefault = 0
98 | val BulkCopyTimeoutDefault = 60 //seconds
99 | val BulkCopyCheckConstraintsDefault = false
100 | val BulkCopyFireTriggersDefault = false
101 | val BulkCopyKeepIdentityDefault = false
102 | val BulkCopyKeepNullsDefault = false
103 | val BulkCopyTableLockDefault = false
104 | val BulkCopyUseInternalTransactionDefault = false
105 | val BulkCopyAllowEncryptedValueModificationsDefault = false
106 |
107 | // Extra constants
108 | val JDBCUrlPrefix = "jdbc:sqlserver://"
109 | val QueryCustom = "QueryCustom"
110 | val SQLjdbcDriver = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
111 |
112 | // Mandatory fields to construct a Config
113 | val required = List(
114 | DatabaseName,
115 | URL
116 | )
117 | }
118 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/config/SqlDBConfigBuilder.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.config
24 |
25 | import com.microsoft.azure.sqldb.spark.config.Config.Property
26 |
27 | case class SqlDBConfigBuilder(props: Map[Property, Any] = Map()) extends {
28 |
29 | override val properties = Map() ++ props
30 |
31 | } with ConfigBuilder[SqlDBConfigBuilder](properties) {
32 |
33 | val requiredProperties: List[Property] = SqlDBConfig.required
34 |
35 | def apply(props: Map[Property, Any]) = SqlDBConfigBuilder(props)
36 | }
37 |
38 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/connect/ConnectionUtils.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.connect
24 |
25 | import java.sql.{Connection, DriverManager, SQLException}
26 | import java.util.Properties
27 |
28 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
29 |
30 | /**
31 | * Helper and utility methods used for setting up or using a connection
32 | */
33 | private[spark] object ConnectionUtils {
34 |
35 | /**
36 | * Retrieves all connection properties in the Config object
37 | * and returns them as a [[Properties]] object.
38 | *
39 | * @param config the Config object with specified connection properties.
40 | * @return A connection [[Properties]] object.
41 | */
42 | def createConnectionProperties(config: Config): Properties = {
43 | val connectionProperties = new Properties()
44 | for (key <- config.getAllKeys) {
45 | connectionProperties.put(key.toString, config.get[String](key.toString).get)
46 | }
47 | connectionProperties
48 | }
49 |
50 | /**
51 | * Adds the "jdbc:sqlserver://" suffix to a general server url
52 | *
53 | * @param url the string url without the JDBC prefix
54 | * @return the url with the added JDBC prefix
55 | */
56 | def createJDBCUrl(url: String): String = SqlDBConfig.JDBCUrlPrefix + url
57 |
58 | /**
59 | * Gets a JDBC connection based on Config properties
60 | *
61 | * @param config any read or write Config
62 | * @return a JDBC Connection
63 | */
64 | def getConnection(config: Config): Connection = {
65 | Class.forName(SqlDBConfig.SQLjdbcDriver)
66 | DriverManager.getConnection(
67 | createJDBCUrl(config.get[String](SqlDBConfig.URL).get), createConnectionProperties(config))
68 | }
69 |
70 | /**
71 | * Retrieves the DBTable or QueryCustom specified in the config.
72 | * NOTE: only one property can exist within config.
73 | *
74 | * @param config the Config object with specified properties.
75 | * @return The specified DBTable or QueryCustom
76 | */
77 | def getTableOrQuery(config: Config): String = {
78 | config.get[String](SqlDBConfig.DBTable).getOrElse(
79 | getQueryCustom(config.get[String](SqlDBConfig.QueryCustom).get)
80 | )
81 | }
82 |
83 | /**
84 | * The JDBC driver requires parentheses and a temp variable around any custom queries.
85 | * This adds the required syntax so users only need to specify the query.
86 | *
87 | * @param query the default query
88 | * @return the syntactically correct query to be executed by the JDBC driver.
89 | */
90 | def getQueryCustom(query: String): String = s"($query) QueryCustom"
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameFunctions.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.connect
24 |
25 | import java.sql.{Connection, SQLException}
26 |
27 | import com.microsoft.azure.sqldb.spark.bulkcopy.{BulkCopyMetadata, SQLServerBulkDataFrameFileRecord}
28 | import com.microsoft.azure.sqldb.spark.LoggingTrait
29 | import com.microsoft.azure.sqldb.spark.bulk.BulkCopyUtils
30 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
31 | import com.microsoft.sqlserver.jdbc.SQLServerBulkCopy
32 | import org.apache.spark.sql.{DataFrame, Row}
33 |
34 | import scala.util.Try
35 |
36 | /**
37 | * Implicit functions for DataFrame
38 | */
39 | private[spark] case class DataFrameFunctions[T](@transient dataFrame: DataFrame) extends LoggingTrait {
40 |
41 | /**
42 | * Saves the contents of the [[DataFrame]]
43 | * to Azure SQL DB or SQL Server through the Bulk Copy API
44 | *
45 | * @param config the database connection properties and bulk copy properties
46 | * @param metadata the metadata of the columns - will be null if not specified
47 | */
48 | def bulkCopyToSqlDB(config: Config, metadata: BulkCopyMetadata = null, createTable:Boolean = false): Unit = {
49 | // Ensuring the table exists in the DB already
50 | if(createTable) {
51 | dataFrame.limit(0).write.sqlDB(config)
52 | }
53 |
54 | val actualMetadata = if(metadata == null) {
55 | getConnectionOrFail(config).recover({
56 | case e: ClassNotFoundException =>
57 | logError("JDBC driver not found in class path", e)
58 | throw e
59 | case e1: SQLException =>
60 | logError("Connection cannot be established to the database", e1)
61 | throw e1
62 | }).flatMap(conn => {
63 | inferBulkCopyMetadata(config, conn)
64 | }).recover({
65 | case e: SQLException =>
66 | logError("Column metadata not specified and cannot retrieve metadata from database", e)
67 | throw e
68 | }).get
69 | } else {
70 | metadata
71 | }
72 | dataFrame.foreachPartition(iterator => bulkCopy(config, iterator, actualMetadata))
73 | }
74 |
75 | private def getConnectionOrFail(config:Config):Try[Connection] = {
76 | Try {
77 | ConnectionUtils.getConnection(config)
78 | }
79 | }
80 |
81 | private def inferBulkCopyMetadata(config: Config, connection:Connection):Try[BulkCopyMetadata] = {
82 | val dbTable = config.get[String](SqlDBConfig.DBTable).get
83 | Try {
84 | val resultSetMetaData = BulkCopyUtils.getTableColumns(dbTable, connection)
85 | BulkCopyUtils.createBulkCopyMetadata(resultSetMetaData)
86 | }
87 | }
88 |
89 | /**
90 | * Uses the Bulk Copy API to copy contents of a dataframe partition to an
91 | * external database table.
92 | *
93 | * @param config any write configuration with the specified properties.
94 | * @param iterator an iterator for a dataframe partition.
95 | * @param metadata User specified bulk copy metadata.
96 | */
97 | private def bulkCopy(config: Config, iterator: Iterator[Row], metadata: BulkCopyMetadata): Unit = {
98 | var connection: Connection = null
99 | try {
100 | connection = ConnectionUtils.getConnection(config)
101 | } catch {
102 | case e: ClassNotFoundException =>
103 | logError("JDBC driver not found in class path", e)
104 | throw e
105 | case e1: SQLException =>
106 | logError("Connection cannot be established to the database", e1)
107 | throw e1
108 | }
109 |
110 | val dbTable = config.get[String](SqlDBConfig.DBTable).get
111 |
112 | // Retrieves column metadata from external database table if user does not specify.
113 | val bulkCopyMetadata = metadata
114 |
115 | var committed = false
116 | val supportsTransactions = BulkCopyUtils.getTransactionSupport(connection)
117 | try {
118 | if (supportsTransactions){
119 | connection.setAutoCommit(false)
120 | }
121 |
122 | val fileRecord = new SQLServerBulkDataFrameFileRecord(iterator, bulkCopyMetadata)
123 | val sqlServerBulkCopy = new SQLServerBulkCopy(connection)
124 |
125 | sqlServerBulkCopy.setDestinationTableName(dbTable)
126 | sqlServerBulkCopy.setBulkCopyOptions(BulkCopyUtils.getBulkCopyOptions(config))
127 | sqlServerBulkCopy.writeToServer(fileRecord)
128 |
129 | if (supportsTransactions){
130 | connection.commit()
131 | }
132 | committed = true
133 | } catch {
134 | case e: SQLException =>
135 | if (!committed && supportsTransactions){
136 | logError("An error occurred while writing to database, attempting rollback", e)
137 | }
138 | throw e
139 | } finally {
140 | if (!committed){
141 | if (supportsTransactions){
142 | connection.rollback()
143 | }
144 | connection.close()
145 | } else {
146 | try {
147 | connection.close()
148 | } catch {
149 | case e: Exception => logWarning("Transaction succeeded, but closing failed", e)
150 | }
151 | }
152 | }
153 | }
154 | }
155 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameReaderFunctions.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.connect
24 |
25 | import java.util.Properties
26 |
27 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._
28 | import com.microsoft.azure.sqldb.spark.LoggingTrait
29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
30 | import org.apache.spark.sql.{DataFrame, DataFrameReader}
31 |
32 | /**
33 | * Implicit functions for DataFrameReader
34 | */
35 | private[spark] case class DataFrameReaderFunctions(@transient reader: DataFrameReader) extends LoggingTrait {
36 |
37 | /**
38 | * Creates a [[DataFrame]] based on the read configuration properties.
39 | *
40 | * @param readConfig any read configuration.
41 | * @return DataFrame
42 | */
43 | def sqlDB(readConfig: Config): DataFrame = {
44 | reader.jdbc(
45 | createJDBCUrl(readConfig.get[String](SqlDBConfig.URL).get),
46 | getTableOrQuery(readConfig),
47 | createConnectionProperties(readConfig)
48 | )
49 | }
50 |
51 | /**
52 | * Creates a [[DataFrame]] based on the url, table and specified properties.
53 | *
54 | * @param url the server url
55 | * @param table the external database table being read
56 | * @param properties additional supported JDBC connection properties
57 | * @return DataFrame
58 | */
59 | def sqlDB(url: String, table: String, properties: Properties): DataFrame = {
60 | reader.jdbc(createJDBCUrl(url), table, properties)
61 | }
62 |
63 | /**
64 | * Creates a [[DataFrame]] based on the url, table, predicates and specified properties.
65 | *
66 | * @param url the server url.
67 | * @param table the external database table being read.
68 | * @param predicates condition in the where clause for each partition.
69 | * @param properties additional supported JDBC connection properties.
70 | * @return DataFrame
71 | */
72 | def sqlDB(url: String, table: String, predicates: Array[String], properties: Properties): DataFrame = {
73 | reader.jdbc(createJDBCUrl(url), table, predicates, properties)
74 | }
75 |
76 | /**
77 | * Creates a partitioned [[DataFrame]] based on the url, table and specified properties.
78 | *
79 | * @param url the server url
80 | * @param table the external database table being read
81 | * @param columnName name of a column, used for partitioning.
82 | * @param lowerBound minimum value of the field in `columnName`
83 | * @param upperBound maximum value of the field in `columnName`
84 | * @param numPartitions the number of partitions of the dataframe
85 | * @param properties additional supported JDBC connection properties
86 | * @return DataFrame
87 | */
88 | def sqlDB(url: String, table: String, columnName: String, lowerBound: Long,
89 | upperBound: Long, numPartitions: Int, properties: Properties): DataFrame = {
90 | reader.jdbc(createJDBCUrl(url), table, columnName, lowerBound, upperBound, numPartitions, properties)
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameWriterFunctions.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.connect
24 |
25 | import java.util.Properties
26 |
27 | import scala.language.existentials
28 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._
29 | import com.microsoft.azure.sqldb.spark.LoggingTrait
30 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
31 | import org.apache.spark.sql.DataFrameWriter
32 |
33 | /**
34 | * Implicit functions for DataFrameWriter
35 | */
36 | private[spark] case class DataFrameWriterFunctions(@transient writer: DataFrameWriter[_]) extends LoggingTrait {
37 |
38 | /**
39 | * Saves the contents of the `DataFrame` to Azure SQL dB or SQL Server.
40 | *
41 | * @param writeConfig the [[com.microsoft.azure.sqldb.spark.config.Config]] to use
42 | */
43 | def sqlDB(writeConfig: Config): Unit = {
44 | val url = writeConfig.get[String](SqlDBConfig.URL).get
45 | val properties = createConnectionProperties(writeConfig)
46 | val table = writeConfig.get[String](SqlDBConfig.DBTable).getOrElse(
47 | throw new IllegalArgumentException("Table not found in DBTable in Config")
48 | )
49 |
50 | sqlDB(url, table, properties)
51 | }
52 |
53 | /**
54 | * Saves the contents of the `DataFrame` to Azure SQL dB.
55 | *
56 | * @param url the url of the server
57 | * @param table the database table being written to.
58 | * @param properties any additional connection properties handled by the jdbc driver
59 | */
60 | def sqlDB(url: String, table: String, properties: Properties): Unit = writer.jdbc(createJDBCUrl(url), table, properties)
61 |
62 | }
63 |
64 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/connect/package.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark
24 |
25 | import scala.language.implicitConversions
26 | import org.apache.spark.annotation.DeveloperApi
27 | import org.apache.spark.sql._
28 |
29 | /**
30 | * Implicit functions added to DataFrameReader, DataFrameWriter and DataFrame objects
31 | */
32 | package object connect {
33 |
34 | /**
35 | * :: DeveloperApi ::
36 | *
37 | * Helper to implicitly add SQL DB based functions to a DataFrameReader
38 | *
39 | * @param reader the DataFrameReader
40 | * @return the SQL DB based DataFrameReader
41 | */
42 | @DeveloperApi
43 | implicit def toDataFrameReaderFunctions(reader: DataFrameReader): DataFrameReaderFunctions =
44 | DataFrameReaderFunctions(reader)
45 |
46 | /**
47 | * :: DeveloperApi ::
48 | *
49 | * Helper to implicitly add SQL DB based functions to a DataFrameWriter
50 | *
51 | * @param writer the DataFrameWriter
52 | * @return the SQL DB based DataFrameWriter
53 | */
54 | @DeveloperApi
55 | implicit def toDataFrameWriterFunctions(writer: DataFrameWriter[_]): DataFrameWriterFunctions =
56 | DataFrameWriterFunctions(writer)
57 |
58 | /**
59 | * :: DeveloperApi ::
60 | *
61 | * Helper to implicitly add SQL DB based functions to a DataFrame
62 | *
63 | * @param ds the dataframe/dataset
64 | * @return the SQL DB based DataFrame
65 | */
66 | @DeveloperApi
67 | implicit def toDataFrameFunctions[T](ds: Dataset[T]): DataFrameFunctions[Row] = DataFrameFunctions[Row](ds.toDF())
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/query/QueryFunctions.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.query
24 |
25 | import java.sql.{Connection, SQLException}
26 |
27 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._
28 | import com.microsoft.azure.sqldb.spark.LoggingTrait
29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig}
30 | import com.microsoft.azure.sqldb.spark.connect._
31 | import org.apache.spark.sql.{DataFrame, SQLContext}
32 |
33 | /**
34 | * Implicit functions for SQLContext
35 | */
36 | private[spark] case class QueryFunctions(@transient sqlContext: SQLContext) extends LoggingTrait {
37 |
38 | /**
39 | * Executes a custom query on the external database server which returns
40 | * either a dataframe or a boolean specifying successful/unsuccessful execution.
41 | *
42 | * @param config any general configuration
43 | * @return Either of DataFrame or Boolean based on query stated in config
44 | */
45 | def sqlDBQuery(config: Config): Either[DataFrame, Boolean] = {
46 |
47 | var connection: Connection = null
48 |
49 | val sql = config.get[String](SqlDBConfig.QueryCustom).getOrElse(
50 | throw new IllegalArgumentException("Query not found in QueryCustom in Config")
51 | )
52 |
53 | try {
54 | connection = getConnection(config)
55 | val statement = connection.createStatement()
56 |
57 | if (statement.execute(sql)) {
58 | Left(sqlContext.read.sqlDB(config))
59 | }
60 | else {
61 | Right(true)
62 | }
63 | }
64 | catch {
65 | case sqlException: SQLException => {
66 | sqlException.printStackTrace()
67 | Right(false)
68 | }
69 | case exception: Exception => {
70 | exception.printStackTrace()
71 | Right(false)
72 | }
73 | }
74 | finally {
75 | connection.close()
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/scala/com/microsoft/azure/sqldb/spark/query/package.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark
24 |
25 | import scala.language.implicitConversions
26 | import org.apache.spark.annotation.DeveloperApi
27 | import org.apache.spark.sql.SQLContext
28 |
29 | /**
30 | * Implicit querying functions added to SQLContext
31 | */
32 | package object query {
33 |
34 | /**
35 | * :: DeveloperApi ::
36 | *
37 | * Helper to implicitly add SQL DB based functions to a SQLContext
38 | *
39 | * @param sqlContext the SQLContext
40 | * @return the Azure SQL based SQLContext
41 | */
42 | @DeveloperApi
43 | implicit def toQueryFunctions(sqlContext: SQLContext): QueryFunctions = QueryFunctions(sqlContext)
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/BulkCopyMetadataTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import org.junit.Before;
26 | import org.junit.Test;
27 |
28 | import java.sql.Types;
29 | import java.time.format.DateTimeFormatter;
30 |
31 | import static junit.framework.Assert.assertEquals;
32 |
33 | public class BulkCopyMetadataTest {
34 |
35 | private BulkCopyMetadata bulkCopyMetadata;
36 |
37 | @Before
38 | public void beforeEach() {
39 | bulkCopyMetadata = new BulkCopyMetadata();
40 | }
41 |
42 | @Test
43 | public void constructorTest(){
44 | assertEquals(0, bulkCopyMetadata.getMetadata().size());
45 | }
46 |
47 | @Test
48 | public void addColumnMetadataTest() {
49 | String columnName = "testColumn";
50 | int jdbcType = Types.DOUBLE;
51 | int precision = 15;
52 | int scale = 5;
53 | DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss");
54 |
55 | bulkCopyMetadata.addColumnMetadata(1, columnName, jdbcType, precision, scale);
56 | assertEquals(1, bulkCopyMetadata.getMetadata().size());
57 |
58 | bulkCopyMetadata.addColumnMetadata(2, columnName, jdbcType, precision, scale, dateTimeFormatter);
59 | assertEquals(2, bulkCopyMetadata.getMetadata().size());
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/ColumnMetadataTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import org.junit.Test;
26 | import java.sql.Types;
27 | import java.time.format.DateTimeFormatter;
28 | import static junit.framework.Assert.assertEquals;
29 |
30 | public class ColumnMetadataTest {
31 |
32 | @Test
33 | public void constructorTest(){
34 | String columnName = "testColumn";
35 | int columnType = Types.TIME;
36 | int precision = 50;
37 | int scale = 0;
38 | DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss");
39 |
40 | ColumnMetadata columnMetadata = new ColumnMetadata(columnName, columnType, precision, scale, dateTimeFormatter);
41 |
42 | assertEquals(columnName, columnMetadata.getColumnName());
43 | assertEquals(columnType, columnMetadata.getColumnType());
44 | assertEquals(precision, columnMetadata.getPrecision());
45 | assertEquals(scale, columnMetadata.getScale());
46 | assertEquals(dateTimeFormatter, columnMetadata.getDateTimeFormatter());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerBulkDataFrameFileRecordTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import org.junit.Before;
26 | import org.junit.Test;
27 |
28 | import java.sql.Types;
29 | import java.time.format.DateTimeFormatter;
30 |
31 | import static junit.framework.Assert.assertEquals;
32 |
33 | public class SQLServerBulkDataFrameFileRecordTest {
34 |
35 | private SQLServerBulkDataFrameFileRecord fileRecord;
36 |
37 | @Before
38 | public void beforeEach() {
39 | BulkCopyMetadata bulkCopyMetadata = new BulkCopyMetadata();
40 | bulkCopyMetadata.addColumnMetadata(1, "Column1", Types.NVARCHAR, 128, 0);
41 | bulkCopyMetadata.addColumnMetadata(2, "Column2", Types.DOUBLE, 20, 10);
42 | bulkCopyMetadata.addColumnMetadata(3, "Column3", Types.VARCHAR, 256, 0);
43 | bulkCopyMetadata.addColumnMetadata(4, "Column4", Types.DATE, 50, 0, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"));
44 |
45 | fileRecord = new SQLServerBulkDataFrameFileRecord(null, bulkCopyMetadata);
46 | }
47 |
48 | @Test
49 | public void getColumnNameTest() {
50 | assertEquals("Column1", fileRecord.getColumnName(1));
51 | assertEquals("Column2", fileRecord.getColumnName(2));
52 | assertEquals("Column3", fileRecord.getColumnName(3));
53 | assertEquals("Column4", fileRecord.getColumnName(4));
54 | }
55 |
56 | @Test
57 | public void getColumnOrdinalsTest() {
58 | assertEquals(4, fileRecord.getColumnOrdinals().size());
59 | }
60 |
61 | @Test
62 | public void getColumnTypeTest() {
63 | assertEquals(Types.NVARCHAR, fileRecord.getColumnType(1));
64 | assertEquals(Types.DOUBLE, fileRecord.getColumnType(2));
65 | assertEquals(Types.VARCHAR, fileRecord.getColumnType(3));
66 | assertEquals(Types.DATE, fileRecord.getColumnType(4));
67 | }
68 |
69 | @Test
70 | public void getPrecisionTest() {
71 | assertEquals(128, fileRecord.getPrecision(1));
72 | assertEquals(20, fileRecord.getPrecision(2));
73 | assertEquals(256, fileRecord.getPrecision(3));
74 | assertEquals(50, fileRecord.getPrecision(4));
75 | }
76 |
77 | @Test
78 | public void getScaleTest() {
79 | assertEquals(0, fileRecord.getScale(1));
80 | assertEquals(10, fileRecord.getScale(2));
81 | assertEquals(0, fileRecord.getScale(3));
82 | assertEquals(0, fileRecord.getScale(4));
83 | }
84 |
85 | @Test
86 | public void isAutoIncrementTest() {
87 | assertEquals(false, fileRecord.isAutoIncrement(0));
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerExceptionReflectionTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulkcopy;
24 |
25 | import com.microsoft.sqlserver.jdbc.SQLServerException;
26 | import org.junit.Test;
27 |
28 | import static junit.framework.Assert.assertEquals;
29 | import static junit.framework.Assert.fail;
30 |
31 | public class SQLServerExceptionReflectionTest {
32 |
33 | @Test(expected = SQLServerException.class)
34 | public void throwSQLServerExceptionTest1() throws SQLServerException {
35 | String text = "Testing error text";
36 | String state = "Testing error state";
37 | int code = 1;
38 | Exception caughtException = new Exception();
39 |
40 | SQLServerException exception = null;
41 | try {
42 | exception = SQLServerExceptionReflection.throwSQLServerException(text, state, code, caughtException);
43 | } catch (Exception e){
44 | fail("A SQLServerException should have been successfully constructed");
45 | }
46 |
47 | assertEquals(text, exception.getMessage());
48 | assertEquals(state, exception.getSQLState());
49 | assertEquals(code, exception.getErrorCode());
50 | assertEquals(caughtException, exception.getCause());
51 |
52 | throw exception;
53 | }
54 |
55 | @Test(expected = SQLServerException.class)
56 | public void throwSQLServerExceptionTest2() throws SQLServerException {
57 | String text = "Testing error text";
58 | Exception caughtException = new Exception();
59 |
60 | SQLServerException exception = null;
61 | try {
62 | exception = SQLServerExceptionReflection.throwSQLServerException(text, caughtException);
63 | } catch (Exception e){
64 | fail("A SQLServerException should have been successfully constructed");
65 | }
66 |
67 | assertEquals(text, exception.getMessage());
68 | assertEquals(caughtException, exception.getCause());
69 |
70 | throw exception;
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/test/scala/com/microsoft/azure/sqldb/spark/SqlDBSpark.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark
24 |
25 | import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach, FlatSpec, Matchers}
26 |
27 | trait SqlDBSpark extends FlatSpec with Matchers with BeforeAndAfter with BeforeAndAfterEach
28 |
--------------------------------------------------------------------------------
/src/test/scala/com/microsoft/azure/sqldb/spark/bulk/BulkCopyUtilsSpec.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.bulk
24 |
25 | import com.microsoft.azure.sqldb.spark.SqlDBSpark
26 | import com.microsoft.azure.sqldb.spark.config.Config
27 |
28 | class BulkCopyUtilsSpec extends SqlDBSpark {
29 | "getBulkCopyOptions" should "add the correct options from Config to SQLServerBulkCopyOptions" in {
30 | val bulkCopyBatchSize = "2500"
31 | val bulkCopyTimeout = "120"
32 | val bulkCopyCheckConstraints = "true"
33 | val bulkCopyFireTriggers = "true"
34 | val bulkCopyKeepIdentity = "true"
35 | val bulkCopyKeepNulls = "true"
36 | val bulkCopyTableLock = "true"
37 | val bulkCopyUseInternalTransaction = "true"
38 | val bulkCopyAllowEncryptedValueModifications = "true"
39 |
40 | val config = Config(Map(
41 | "url" -> "mssql.database.windows.net",
42 | "databaseName" -> "MyDatabase",
43 | "user" -> "admin@microsoft.com",
44 | "password" -> "password",
45 | "dbTable" -> "dbo.Customers",
46 | "authentication" -> "ActiveDirectoryPassword",
47 | "trustServerCertificate" -> "true",
48 | "encrypt" -> "true",
49 | "hostNameInCertificate" -> "*.database.windows.net",
50 | "bulkCopyBatchSize" -> bulkCopyBatchSize,
51 | "bulkCopyTimeout" -> bulkCopyTimeout,
52 | "bulkCopyCheckConstraints" -> bulkCopyCheckConstraints,
53 | "bulkCopyFireTriggers" -> bulkCopyFireTriggers,
54 | "bulkCopyKeepIdentity" -> bulkCopyKeepIdentity,
55 | "bulkCopyKeepNulls" -> bulkCopyKeepNulls,
56 | "bulkCopyTableLock" -> bulkCopyTableLock,
57 | "bulkCopyUseInternalTransaction" -> bulkCopyUseInternalTransaction,
58 | "bulkCopyAllowEncryptedValueModifications" -> bulkCopyAllowEncryptedValueModifications
59 | ))
60 |
61 | val bulkCopyOptions = BulkCopyUtils.getBulkCopyOptions(config)
62 | bulkCopyOptions.getBatchSize should be (bulkCopyBatchSize.toInt)
63 | bulkCopyOptions.getBulkCopyTimeout should be (bulkCopyTimeout.toInt)
64 | bulkCopyOptions.isCheckConstraints should be (bulkCopyCheckConstraints.toBoolean)
65 | bulkCopyOptions.isFireTriggers should be (bulkCopyFireTriggers.toBoolean)
66 | bulkCopyOptions.isKeepIdentity should be (bulkCopyKeepIdentity.toBoolean)
67 | bulkCopyOptions.isKeepNulls should be (bulkCopyKeepNulls.toBoolean)
68 | bulkCopyOptions.isTableLock should be (bulkCopyTableLock.toBoolean)
69 | bulkCopyOptions.isUseInternalTransaction should be (bulkCopyUseInternalTransaction.toBoolean)
70 | bulkCopyOptions.isAllowEncryptedValueModifications should be (bulkCopyAllowEncryptedValueModifications.toBoolean)
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/test/scala/com/microsoft/azure/sqldb/spark/config/ConfigSpec.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.config
24 |
25 | import com.microsoft.azure.sqldb.spark.SqlDBSpark
26 |
27 | class ConfigSpec extends SqlDBSpark {
28 |
29 | it should "throw IllegalArgumentException if required properties aren't met" in {
30 | an [IllegalArgumentException] should be thrownBy {
31 | Config(Map("url" -> "mysql.database.windows.net"))
32 | }
33 | }
34 |
35 | it should "throw IllegalArgumentException if queryCustom and dbTable are both stated" in {
36 | an [IllegalArgumentException] should be thrownBy {
37 | Config(Map(
38 | "url" -> "mysql.database.windows.net",
39 | "databaseName" -> "MyDatabase",
40 | "user" -> "MyUsername",
41 | "password" -> "**********",
42 | "dbTable" -> "MyDBTable",
43 | "QueryCustom" -> "MyQueryCustom"
44 | ))
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/test/scala/com/microsoft/azure/sqldb/spark/connect/ConnectionUtilsSpec.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * The MIT License (MIT)
3 | * Copyright (c) 2018 Microsoft Corporation
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 | package com.microsoft.azure.sqldb.spark.connect
24 |
25 | import java.util.Properties
26 |
27 | import com.microsoft.azure.sqldb.spark.SqlDBSpark
28 | import com.microsoft.azure.sqldb.spark.config.Config
29 |
30 | class ConnectionUtilsSpec extends SqlDBSpark {
31 |
32 | "createConnectionProperties" should "return all properties in configuration in a Properties object" in {
33 | val url = "mssql.database.windows.net"
34 | val database = "MyDatabase"
35 | val user = "admin"
36 | val password = "password"
37 | val dbTable = "dbo.Customers"
38 |
39 | val config = Config(Map(
40 | "url" -> url,
41 | "databaseName" -> database,
42 | "user" -> user,
43 | "password" -> password,
44 | "dbTable" -> dbTable
45 | ))
46 |
47 | val controlProperties = new Properties
48 | controlProperties.put("url", url.toLowerCase)
49 | controlProperties.put("databasename", database.toLowerCase)
50 | controlProperties.put("user", user.toLowerCase)
51 | controlProperties.put("password", password.toLowerCase)
52 | controlProperties.put("dbtable", dbTable.toLowerCase)
53 |
54 | val testProperties = ConnectionUtils.createConnectionProperties(config)
55 | Seq(testProperties.keySet()) should contain theSameElementsAs Seq(controlProperties.keySet())
56 | }
57 |
58 | "createJDBCUrl" should "return the server url with jdbc prefix" in {
59 | val url = "mssql.database.windows.net"
60 | ConnectionUtils.createJDBCUrl(url) should be ("jdbc:sqlserver://" + url)
61 | }
62 |
63 | "getQueryCustom" should "return original query in parenthesis" in {
64 | val query = "SELECT * FROM MYTABLE"
65 | ConnectionUtils.getQueryCustom(query) should be ("(" + query + ") QueryCustom")
66 | }
67 |
68 | "getTableOrQuery" should "return appropriate table or query from a config object" in {
69 | val dbTable = "dbo.Customers"
70 | val tableConfig = Config(Map(
71 | "url" -> "mssql.database.windows.net",
72 | "databaseName" -> "MyDatabase",
73 | "user" -> "admin",
74 | "password" -> "password",
75 | "dbTable" -> dbTable
76 | ))
77 | ConnectionUtils.getTableOrQuery(tableConfig) should be (dbTable)
78 |
79 | val queryCustom = "SELECT * FROM dbo.Customers"
80 | val queryConfig = Config(Map(
81 | "url" -> "mssql.database.windows.net",
82 | "databaseName" -> "MyDatabase",
83 | "user" -> "admin",
84 | "password" -> "password",
85 | "QueryCustom" -> queryCustom
86 | ))
87 | ConnectionUtils.getTableOrQuery(queryConfig) should be (ConnectionUtils.getQueryCustom(queryCustom))
88 | }
89 | }
90 |
--------------------------------------------------------------------------------