├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── docs ├── AzureDocument.md └── images │ └── spark_sqldb_dataflow.png ├── lib ├── mssql-jdbc-6.2.2.jre8.jar └── scalastyle_config.xml ├── pom.xml ├── releases └── azure-sqldb-spark-1.0.0 │ ├── azure-sqldb-spark-1.0.0-uber.jar │ └── azure-sqldb-spark-1.0.0.jar ├── samples ├── notebooks │ └── Spark Connector for Azure SQL Databases and SQL Server.html └── scripts │ ├── BulkCopySample.scala │ ├── ReadSample.scala │ └── WriteSample.scala └── src ├── main ├── java │ └── com │ │ └── microsoft │ │ └── azure │ │ └── sqldb │ │ └── spark │ │ └── bulkcopy │ │ ├── BulkCopyMetadata.java │ │ ├── ColumnMetadata.java │ │ ├── SQLServerBulkDataFrameFileRecord.java │ │ └── SQLServerExceptionReflection.java └── scala │ └── com │ └── microsoft │ └── azure │ └── sqldb │ └── spark │ ├── Logging.scala │ ├── LoggingTrait.scala │ ├── bulk │ └── BulkCopyUtils.scala │ ├── config │ ├── Config.scala │ ├── SqlDBConfig.scala │ └── SqlDBConfigBuilder.scala │ ├── connect │ ├── ConnectionUtils.scala │ ├── DataFrameFunctions.scala │ ├── DataFrameReaderFunctions.scala │ ├── DataFrameWriterFunctions.scala │ └── package.scala │ └── query │ ├── QueryFunctions.scala │ └── package.scala └── test ├── java └── com │ └── microsoft │ └── azure │ └── sqldb │ └── spark │ └── bulkcopy │ ├── BulkCopyMetadataTest.java │ ├── ColumnMetadataTest.java │ ├── SQLServerBulkDataFrameFileRecordTest.java │ └── SQLServerExceptionReflectionTest.java └── scala └── com └── microsoft └── azure └── sqldb └── spark ├── SqlDBSpark.scala ├── bulk └── BulkCopyUtilsSpec.scala ├── config └── ConfigSpec.scala └── connect └── ConnectionUtilsSpec.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | *.iml 4 | target/ 5 | 6 | #IDE 7 | .idea/* -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: required 3 | 4 | matrix: 5 | include: 6 | - os: linux 7 | dist: trusty 8 | jdk: oraclejdk8 9 | - os: osx 10 | osx_image: xcode8 11 | 12 | script: 13 | - mvn clean package 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Updated Jun 2020: This project is not being actively maintained. Instead, [Apache Spark Connector for SQL Server and Azure SQL](https://github.com/microsoft/sql-spark-connector) is now available, with support for Python and R bindings, an easier-to use interface to bulk insert data, and many other improvements. We encourage you to actively evaluate and use the new connector. 2 | 3 | # Spark connector for Azure SQL Databases and SQL Server 4 | 5 | [![Build Status](https://travis-ci.org/Azure/azure-sqldb-spark.svg?branch=master)](https://travis-ci.org/Azure/azure-sqldb-spark) 6 | 7 | The Spark connector for [Azure SQL Database](https://azure.microsoft.com/en-us/services/sql-database/) and [SQL Server](https://www.microsoft.com/en-us/sql-server/default.aspx) enables SQL databases, including Azure SQL Databases and SQL Server, to act as input data source or output data sink for Spark jobs. It allows you to utilize real time transactional data in big data analytics and persist results for adhoc queries or reporting. 8 | 9 | Comparing to the built-in Spark connector, this connector provides the ability to bulk insert data into SQL databases. It can outperform row by row insertion with 10x to 20x faster performance. The Spark connector for Azure SQL Databases and SQL Server also supports AAD authentication. It allows you securely connecting to your Azure SQL databases from Azure Databricks using your AAD account. It provides similar interfaces with the built-in JDBC connector. It is easy to migrate your existing Spark jobs to use this new connector. 10 | 11 | ## How to connect to Spark using this library 12 | This connector uses Microsoft SQLServer JDBC driver to fetch data from/to the Azure SQL Database. 13 | Results are of the `DataFrame` type. 14 | 15 | All connection properties in 16 | 17 | Microsoft JDBC Driver for SQL Server 18 | are supported in this connector. Add connection properties as fields in the `com.microsoft.azure.sqldb.spark.config.Config` object. 19 | 20 | 21 | ### Reading from Azure SQL Database or SQL Server 22 | ```scala 23 | import com.microsoft.azure.sqldb.spark.config.Config 24 | import com.microsoft.azure.sqldb.spark.connect._ 25 | 26 | val config = Config(Map( 27 | "url" -> "mysqlserver.database.windows.net", 28 | "databaseName" -> "MyDatabase", 29 | "dbTable" -> "dbo.Clients" 30 | "user" -> "username", 31 | "password" -> "*********", 32 | "connectTimeout" -> "5", //seconds 33 | "queryTimeout" -> "5" //seconds 34 | )) 35 | 36 | val collection = sqlContext.read.sqlDB(config) 37 | collection.show() 38 | 39 | ``` 40 | 41 | ### Writing to Azure SQL Database or SQL Server 42 | ```scala 43 | import com.microsoft.azure.sqldb.spark.config.Config 44 | import com.microsoft.azure.sqldb.spark.connect._ 45 | 46 | // Aquire a DataFrame collection (val collection) 47 | 48 | val config = Config(Map( 49 | "url" -> "mysqlserver.database.windows.net", 50 | "databaseName" -> "MyDatabase", 51 | "dbTable" -> "dbo.Clients" 52 | "user" -> "username", 53 | "password" -> "*********" 54 | )) 55 | 56 | import org.apache.spark.sql.SaveMode 57 | collection.write.mode(SaveMode.Append).sqlDB(config) 58 | 59 | ``` 60 | ### Pushdown query to Azure SQL Database or SQL Server 61 | For SELECT queries with expected return results, please use 62 | [Reading from Azure SQL Database using Scala](#reading-from-azure-sql-database-using-scala) 63 | ```scala 64 | import com.microsoft.azure.sqldb.spark.config.Config 65 | import com.microsoft.azure.sqldb.spark.query._ 66 | val query = """ 67 | |UPDATE Customers 68 | |SET ContactName = 'Alfred Schmidt', City= 'Frankfurt' 69 | |WHERE CustomerID = 1; 70 | """.stripMargin 71 | 72 | val config = Config(Map( 73 | "url" -> "mysqlserver.database.windows.net", 74 | "databaseName" -> "MyDatabase", 75 | "user" -> "username", 76 | "password" -> "*********", 77 | "queryCustom" -> query 78 | )) 79 | 80 | sqlContext.sqlDBQuery(config) 81 | ``` 82 | ### Bulk Copy to Azure SQL Database or SQL Server 83 | ```scala 84 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata 85 | import com.microsoft.azure.sqldb.spark.config.Config 86 | import com.microsoft.azure.sqldb.spark.connect._ 87 | 88 | /** 89 | Add column Metadata. 90 | If not specified, metadata will be automatically added 91 | from the destination table, which may suffer performance. 92 | */ 93 | var bulkCopyMetadata = new BulkCopyMetadata 94 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0) 95 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 50, 0) 96 | bulkCopyMetadata.addColumnMetadata(3, "LastName", java.sql.Types.NVARCHAR, 50, 0) 97 | 98 | val bulkCopyConfig = Config(Map( 99 | "url" -> "mysqlserver.database.windows.net", 100 | "databaseName" -> "MyDatabase", 101 | "user" -> "username", 102 | "password" -> "*********", 103 | "databaseName" -> "MyDatabase", 104 | "dbTable" -> "dbo.Clients", 105 | "bulkCopyBatchSize" -> "2500", 106 | "bulkCopyTableLock" -> "true", 107 | "bulkCopyTimeout" -> "600" 108 | )) 109 | 110 | df.bulkCopyToSqlDB(bulkCopyConfig, bulkCopyMetadata) 111 | //df.bulkCopyToSqlDB(bulkCopyConfig) if no metadata is specified. 112 | ``` 113 | 114 | ## Requirements 115 | Official supported versions 116 | 117 | | Component | Versions Supported | 118 | | --------- | ------------------ | 119 | | Apache Spark | 2.0.2 or later | 120 | | Scala | 2.10 or later | 121 | | Microsoft JDBC Driver for SQL Server | 6.2 to 7.4 ^| 122 | | Microsoft SQL Server | SQL Server 2008 or later | 123 | | Azure SQL Databases | Supported | 124 | 125 | ^ Driver version 8.x not tested 126 | 127 | ## Download 128 | ### Download from Maven 129 | You can download the latest version from [here](https://search.maven.org/search?q=a:azure-sqldb-spark) 130 | 131 | You can also use the following coordinate to import the library into Azure SQL Databricks: 132 | com.microsoft.azure:azure-sqldb-spark:1.0.2 133 | 134 | ### Build this project 135 | Currently, the connector project uses maven. To build the connector without dependencies, you can run: 136 | ```sh 137 | mvn clean package 138 | ``` 139 | 140 | ## Contributing & Feedback 141 | 142 | This project has adopted the [Microsoft Open Source Code of 143 | Conduct](https://opensource.microsoft.com/codeofconduct/). For more information 144 | see the [Code of Conduct 145 | FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact 146 | [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional 147 | questions or comments. 148 | 149 | To give feedback and/or report an issue, open a [GitHub 150 | Issue](https://help.github.com/articles/creating-an-issue/). 151 | 152 | 153 | *Apache®, Apache Spark, and Spark® are either registered trademarks or 154 | trademarks of the Apache Software Foundation in the United States and/or other 155 | countries.* 156 | -------------------------------------------------------------------------------- /docs/AzureDocument.md: -------------------------------------------------------------------------------- 1 | # Accelerate real-time big data analytics with Spark connector for Azure SQL Database and SQL Server 2 | 3 | The Spark connector for Azure SQL Database and SQL Server enables SQL databases, including Azure SQL Database and SQL Server, to act as input data source or output data sink for Spark jobs. It allows you to utilize real time transactional data in big data analytics and persist results for adhoc queries or reporting. Compared to the built-in JDBC connector, this connector provides the ability to bulk insert data into SQL databases. It can outperform row by row insertion with 10x to 20x faster performance. The Spark connector for Azure SQL Database and SQL Server also supports AAD authentication. It allows you securely connecting to your Azure SQL database from Azure Databricks using your AAD account. It provides similar interfaces with the built-in JDBC connector. It is easy to migrate your existing Spark jobs to use this new connector. 4 | 5 | ## Download 6 | To get started, download the Spark to SQL DB connector from the [azure-sqldb-spark repository](https://github.com/Azure/azure-sqldb-spark) on GitHub. 7 | 8 | ## Official Supported Versions 9 | 10 | | Component | Version | 11 | | :----------------------------------- | :----------------------- | 12 | | Apache Spark | 2.0.2 or later | 13 | | Scala | 2.10 or later | 14 | | Microsoft JDBC Driver for SQL Server | 6.2 or later | 15 | | Microsoft SQL Server | SQL Server 2008 or later | 16 | | Azure SQL Database | Supported | 17 | 18 | The Spark connector for Azure SQL Database and SQL Server utilizes the Microsoft JDBC Driver for SQL Server to move data between Spark worker nodes and SQL databases: 19 | 20 | The dataflow is as following: 21 | 1. The Spark master node connect to SQL Server or Azure SQL Database and load data from a specific table or using a specific SQL query 22 | 2. Spark master node distribute data to worker nodes for transformation. 23 | 3. Worker node connect to SQL Server or Azure SQL Database and write data to the database. User can choose to use row-by-row insertion or bulk insert. 24 | 25 | ### Build the Spark to SQL DB connector 26 | Currently, the connector project uses maven. To build the connector without dependencies, you can run: 27 | mvn clean package 28 | You can also download the latest versions of the JAR from the release folder 29 | Include the SQL DB Spark JAR 30 | 31 | ## Connect Spark to SQL DB using the connector 32 | You can connect to Azure SQL Database or SQL Server from Spark jobs, read or write data. You can also run a DML or DDL query in an Azure SQL database or SQL Server database. 33 | 34 | ### Read data from Azure SQL Database or SQL Server 35 | 36 | ```scala 37 | import com.microsoft.azure.sqldb.spark.config.Config 38 | import com.microsoft.azure.sqldb.spark.connect._ 39 | 40 | val config = Config(Map( 41 | "url" -> "mysqlserver.database.windows.net", 42 | "databaseName" -> "MyDatabase", 43 | "dbTable" -> "dbo.Clients" 44 | "user" -> "username", 45 | "password" -> "*********", 46 | "connectTimeout" -> "5", //seconds 47 | "queryTimeout" -> "5" //seconds 48 | )) 49 | 50 | val collection = sqlContext.read.sqlDB(config) 51 | collection.show() 52 | ``` 53 | ### Reading data from Azure SQL Database or SQL Server with specified SQL query 54 | ```scala 55 | import com.microsoft.azure.sqldb.spark.config.Config 56 | import com.microsoft.azure.sqldb.spark.connect._ 57 | 58 | val config = Config(Map( 59 | "url" -> "mysqlserver.database.windows.net", 60 | "databaseName" -> "MyDatabase", 61 | "queryCustom" -> "SELECT TOP 100 * FROM dbo.Clients WHERE PostalCode = 98074" //Sql query 62 | "user" -> "username", 63 | "password" -> "*********", 64 | )) 65 | 66 | //Read all data in table dbo.Clients 67 | val collection = sqlContext.read.sqlDB(config) 68 | collection.show() 69 | ``` 70 | 71 | ### Write data to Azure SQL Database or SQL Server 72 | ```scala 73 | import com.microsoft.azure.sqldb.spark.config.Config 74 | import com.microsoft.azure.sqldb.spark.connect._ 75 | 76 | // Aquire a DataFrame collection (val collection) 77 | 78 | val config = Config(Map( 79 | "url" -> "mysqlserver.database.windows.net", 80 | "databaseName" -> "MyDatabase", 81 | "dbTable" -> "dbo.Clients" 82 | "user" -> "username", 83 | "password" -> "*********" 84 | )) 85 | 86 | import org.apache.spark.sql.SaveMode 87 | collection.write.mode(SaveMode.Append).sqlDB(config) 88 | ``` 89 | 90 | ### Run DML or DDL query in Azure SQL Database or SQL Server 91 | ```scala 92 | import com.microsoft.azure.sqldb.spark.config.Config 93 | import com.microsoft.azure.sqldb.spark.query._ 94 | val query = """ 95 | |UPDATE Customers 96 | |SET ContactName = 'Alfred Schmidt', City = 'Frankfurt' 97 | |WHERE CustomerID = 1; 98 | """.stripMargin 99 | 100 | val config = Config(Map( 101 | "url" -> "mysqlserver.database.windows.net", 102 | "databaseName" -> "MyDatabase", 103 | "user" -> "username", 104 | "password" -> "*********", 105 | "queryCustom" -> query 106 | )) 107 | 108 | sqlContext.SqlDBQuery(config) 109 | ``` 110 | 111 | ## Connect Spark to Azure SQL Database using AAD authentication 112 | You can connect to Azure SQL Database using Azure Active Directory (AAD) authentication. Use AAD authentication to centrally manage identities of database users and as an alternative to SQL Server authentication. 113 | ### Connecting using ActiveDirectoryPassword Authentication Mode 114 | #### Setup Requirement 115 | If you are using the ActiveDirectoryPassword authentication mode you will need to download [azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java) and its dependencies, and include them in the Java build path. 116 | 117 | ```scala 118 | import com.microsoft.azure.sqldb.spark.config.Config 119 | import com.microsoft.azure.sqldb.spark.connect._ 120 | 121 | val config = Config(Map( 122 | "url" -> "mysqlserver.database.windows.net", 123 | "databaseName" -> "MyDatabase", 124 | "user" -> "username", 125 | "password" -> "*********", 126 | "authentication" -> "ActiveDirectoryPassword", 127 | "encrypt" -> "true" 128 | )) 129 | 130 | val collection = sqlContext.read.sqlDB(config) 131 | collection.show() 132 | ``` 133 | 134 | ### Connecting using Access Token 135 | #### Setup Requirement 136 | If you are using the access token based authentication mode, you will need to download [azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java) and its dependencies, and include them in the Java build path. 137 | 138 | See [Use Azure Active Directory Authentication for authentication with SQL Database](https://docs.microsoft.com/en-us/azure/sql-database/sql-database-aad-authentication) to learn how to get access token to your Azure SQL database. 139 | 140 | ```scala 141 | import com.microsoft.azure.sqldb.spark.config.Config 142 | import com.microsoft.azure.sqldb.spark.connect._ 143 | 144 | val config = Config(Map( 145 | "url" -> "mysqlserver.database.windows.net", 146 | "databaseName" -> "MyDatabase", 147 | "accessToken" -> "access_token", 148 | "hostNameInCertificate" -> "*.database.windows.net", 149 | "encrypt" -> "true" 150 | )) 151 | 152 | val collection = sqlContext.read.sqlDB(config) 153 | collection.show() 154 | ``` 155 | 156 | ## Write data to Azure SQL database or SQL Server using Bulk Insert 157 | The traditional jdbc connector writes data into Azure SQL database or SQL Server using row-by-row insertion. You can use Spark to SQL DB connector to write data to SQL database using bulk insert. It will significantly improve the write performance when loading large data sets or loading data into tables where column store index is used. 158 | 159 | ```scala 160 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata 161 | import com.microsoft.azure.sqldb.spark.config.Config 162 | import com.microsoft.azure.sqldb.spark.connect._ 163 | 164 | /** 165 | Add column Metadata. 166 | If not specified, metadata will be automatically added 167 | from the destination table, which may suffer performance. 168 | */ 169 | var bulkCopyMetadata = new BulkCopyMetadata 170 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0) 171 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 50, 0) 172 | bulkCopyMetadata.addColumnMetadata(3, "LastName", java.sql.Types.NVARCHAR, 50, 0) 173 | 174 | val bulkCopyConfig = Config(Map( 175 | "url" -> "mysqlserver.database.windows.net", 176 | "databaseName" -> "MyDatabase", 177 | "user" -> "username", 178 | "password" -> "*********", 179 | "databaseName" -> "zeqisql", 180 | "dbTable" -> "dbo.Clients", 181 | "bulkCopyBatchSize" -> "2500", 182 | "bulkCopyTableLock" -> "true", 183 | "bulkCopyTimeout" -> "600" 184 | )) 185 | 186 | df.bulkCopyToSqlDB(bulkCopyConfig, bulkCopyMetadata) 187 | //df.bulkCopyToSqlDB(bulkCopyConfig) if no metadata is specified. 188 | ``` 189 | 190 | ## Next steps 191 | If you haven't already, download the Spark connector for Azure SQL Database and SQL Server from [azure-sqldb-spark GitHub repository](https://github.com/Azure/azure-sqldb-spark) and explore the additional resources in the repo: 192 | 193 | - [Sample Azure Databricks notebooks](https://github.com/Azure/azure-sqldb-spark/tree/master/samples/notebooks) 194 | - [Sample scripts (Scala)](https://github.com/Azure/azure-sqldb-spark/tree/master/samples/scripts) 195 | 196 | You might also want to review the [Apache Spark SQL, DataFrames, and Datasets Guide](http://spark.apache.org/docs/latest/sql-programming-guide.html) and the [Azure Databricks documentation](https://docs.microsoft.com/en-us/azure/azure-databricks/). 197 | 198 | -------------------------------------------------------------------------------- /docs/images/spark_sqldb_dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/docs/images/spark_sqldb_dataflow.png -------------------------------------------------------------------------------- /lib/mssql-jdbc-6.2.2.jre8.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/lib/mssql-jdbc-6.2.2.jre8.jar -------------------------------------------------------------------------------- /lib/scalastyle_config.xml: -------------------------------------------------------------------------------- 1 | 2 | Scalastyle standard configuration 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | Avoid print line, use logger. 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | com.microsoft.azure 7 | azure-sqldb-spark 8 | jar 9 | 1.0.0 10 | ${project.groupId}:${project.artifactId} 11 | Spark Connector for Microsoft Azure SQL Database and SQL Server 12 | 13 | 14 | MIT License 15 | http://www.opensource.org/licenses/mit-license.php 16 | 17 | 18 | 19 | UTF-8 20 | 2.11 21 | 2.11.8 22 | 2.2.2 23 | 24 | 25 | 26 | org.scala-lang 27 | scala-library 28 | ${scala.version} 29 | 30 | 31 | junit 32 | junit 33 | 4.8.1 34 | test 35 | 36 | 37 | org.apache.spark 38 | spark-core_${scala.binary.version} 39 | ${spark.version} 40 | provided 41 | 42 | 43 | org.apache.spark 44 | spark-sql_${scala.binary.version} 45 | ${spark.version} 46 | provided 47 | 48 | 49 | org.scalactic 50 | scalactic_${scala.binary.version} 51 | 3.0.4 52 | 53 | 54 | org.scalatest 55 | scalatest_${scala.binary.version} 56 | 3.0.4 57 | test 58 | 59 | 60 | com.microsoft.azure 61 | adal4j 62 | 1.2.0 63 | 64 | 65 | com.microsoft.sqlserver 66 | mssql-jdbc 67 | 6.4.0.jre8 68 | 69 | 70 | 71 | 72 | Azure SQL DB Devs 73 | Microsoft 74 | http://www.microsoft.com/ 75 | 76 | 77 | 78 | 79 | ossrh 80 | https://oss.sonatype.org/content/repositories/snapshots 81 | 82 | 83 | ossrh 84 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 85 | 86 | 87 | 88 | 89 | 90 | org.codehaus.mojo 91 | build-helper-maven-plugin 92 | 3.0.0 93 | 94 | 95 | generate-sources 96 | 97 | add-source 98 | 99 | 100 | 101 | src/main/java 102 | src/main/scala 103 | 104 | 105 | 106 | 107 | add-test-source 108 | generate-test-sources 109 | 110 | add-test-source 111 | 112 | 113 | 114 | src/test/java 115 | src/test/scala 116 | 117 | 118 | 119 | 120 | 121 | 122 | org.apache.maven.plugins 123 | maven-assembly-plugin 124 | 3.0.0 125 | 126 | 127 | jar-with-dependencies 128 | 129 | 130 | 131 | 132 | make-assembly 133 | package 134 | 135 | single 136 | 137 | 138 | 139 | 140 | 141 | org.apache.maven.plugins 142 | maven-javadoc-plugin 143 | 144 | 145 | org.apache.maven.plugins 146 | maven-source-plugin 147 | 3.0.1 148 | 149 | 150 | attach-sources 151 | verify 152 | 153 | jar-no-fork 154 | 155 | 156 | 157 | 158 | 159 | org.sonatype.plugins 160 | nexus-staging-maven-plugin 161 | 1.6.8 162 | true 163 | 164 | ossrh 165 | https://oss.sonatype.org/ 166 | false 167 | 168 | 169 | 170 | org.scalastyle 171 | scalastyle-maven-plugin 172 | 1.0.0 173 | 174 | false 175 | true 176 | true 177 | false 178 | ${project.basedir}/src/main/scala 179 | ${project.basedir}/src/test/scala 180 | ${project.basedir}/lib/scalastyle_config.xml 181 | UTF-8 182 | 183 | 184 | 185 | 186 | check 187 | 188 | 189 | 190 | 191 | 192 | maven-compiler-plugin 193 | 3.7.0 194 | 195 | 1.8 196 | 1.8 197 | 198 | 199 | 200 | net.alchim31.maven 201 | scala-maven-plugin 202 | 3.2.2 203 | 204 | false 205 | ${scala.version} 206 | 207 | -deprecation 208 | -feature 209 | 210 | 211 | 212 | 213 | scala-compile-first 214 | process-resources 215 | 216 | add-source 217 | 218 | 219 | 220 | scala-compile 221 | 222 | compile 223 | 224 | compile 225 | 226 | 227 | test-compile 228 | 229 | testCompile 230 | 231 | test-compile 232 | 233 | 234 | scala-doc 235 | prepare-package 236 | 237 | doc 238 | doc-jar 239 | 240 | 241 | 242 | 243 | 244 | org.scalatest 245 | scalatest-maven-plugin 246 | 1.0 247 | 248 | ${project.build.directory}/surefire-reports 249 | . 250 | TestSuite.txt 251 | W 252 | 253 | 254 | 255 | scala-test 256 | 257 | test 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | -------------------------------------------------------------------------------- /releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0-uber.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0-uber.jar -------------------------------------------------------------------------------- /releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure/azure-sqldb-spark/47f5cdc19f3b7c4990118cca4f3091ce0fd201d8/releases/azure-sqldb-spark-1.0.0/azure-sqldb-spark-1.0.0.jar -------------------------------------------------------------------------------- /samples/notebooks/Spark Connector for Azure SQL Databases and SQL Server.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Spark Connector for Azure SQL Databases and SQL Server - Databricks 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 21 | 24 | 25 | 26 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /samples/scripts/BulkCopySample.scala: -------------------------------------------------------------------------------- 1 | // Import libraries 2 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata 3 | import com.microsoft.azure.sqldb.spark.config.Config 4 | import com.microsoft.azure.sqldb.spark.connect._ 5 | 6 | val url = "[Enter your url here]" 7 | val databaseName = "[Enter your database name here]" 8 | val dbTable = "[Enter your database table here]" 9 | 10 | val user = "[Enter your username here]" 11 | val password = "[Enter your password here]" 12 | 13 | // Acquire data to be written. 14 | // df could be aquired in any way. 15 | val localTable = "[Enter your local persisted table here]" 16 | val df = spark.sql(s"SELECT * FROM $localTable") 17 | 18 | val writeConfig = Config(Map( 19 | "url" -> url, 20 | "databaseName" -> databaseName, 21 | "dbTable" -> dbTable, 22 | "user" -> user, 23 | "password" -> password, 24 | "connectTimeout" -> "5", 25 | "bulkCopyBatchSize" -> "100000", 26 | "bulkCopyTableLock" -> "true", 27 | "bulkCopyTimeout" -> "600" 28 | )) 29 | 30 | df.bulkCopyToSqlDB(writeConfig) 31 | 32 | /** 33 | For better performance, specify the column metadata of the table 34 | 35 | var bulkCopyMetadata = new BulkCopyMetadata 36 | bulkCopyMetadata.addColumnMetadata(1, "Title", java.sql.Types.NVARCHAR, 128, 0) 37 | bulkCopyMetadata.addColumnMetadata(2, "FirstName", java.sql.Types.NVARCHAR, 128, 0) 38 | bulkCopyMetadata.addColumnMetadata(3, "MiddleName", java.sql.Types.NVARCHAR, 128, 0) 39 | bulkCopyMetadata.addColumnMetadata(4, "LastName", java.sql.Types.NVARCHAR, 128, 0) 40 | .......... 41 | 42 | df.bulkCopyToSqlDB(writeConfig, bulkCopyMetadata) 43 | **/ -------------------------------------------------------------------------------- /samples/scripts/ReadSample.scala: -------------------------------------------------------------------------------- 1 | // Import libraries 2 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata 3 | import com.microsoft.azure.sqldb.spark.config.Config 4 | import com.microsoft.azure.sqldb.spark.connect._ 5 | 6 | val url = "[Enter your url here]" 7 | val databaseName = "[Enter your database name here]" 8 | val dbTable = "[Enter your database table here]" 9 | 10 | val user = "[Enter your username here]" 11 | val password = "[Enter your password here]" 12 | 13 | // READ FROM CONFIG 14 | val readConfig = Config(Map( 15 | "url" -> url, 16 | "databaseName" -> databaseName, 17 | "user" -> user, 18 | "password" -> password, 19 | "connectTimeout" -> "5", 20 | "queryTimeout" -> "5", 21 | "dbTable" -> dbTable 22 | )) 23 | 24 | val df = sqlContext.read.sqlDB(readConfig) 25 | println("Total rows: " + df.count) 26 | df.show() 27 | 28 | // TRADITIONAL SYNTAX 29 | import java.util.Properties 30 | 31 | val properties = new Properties() 32 | properties.put("databaseName", databaseName) 33 | properties.put("user", user) 34 | properties.put("password", password) 35 | properties.put("connectTimeout", "5") 36 | properties.put("queryTimeout", "5") 37 | 38 | val df = sqlContext.read.sqlDB(url, dbTable, properties) 39 | println("Total rows: " + df.count) 40 | df.show() 41 | -------------------------------------------------------------------------------- /samples/scripts/WriteSample.scala: -------------------------------------------------------------------------------- 1 | // Import libraries 2 | import com.microsoft.azure.sqldb.spark.config.Config 3 | import com.microsoft.azure.sqldb.spark.connect._ 4 | 5 | val url = "[Enter your url here]" 6 | val databaseName = "[Enter your database name here]" 7 | val dbTable = "[Enter your database table here]" 8 | 9 | val user = "[Enter your username here]" 10 | val password = "[Enter your password here]" 11 | 12 | // Acquire data to be written. 13 | // df could be aquired in any way. 14 | val localTable = "[Enter your local persisted table here]" 15 | val df = spark.sql(s"SELECT * FROM $localTable") 16 | 17 | // WRITE FROM CONFIG 18 | val writeConfig = Config(Map( 19 | "url" -> url, 20 | "databaseName" -> databaseName, 21 | "dbTable" -> dbTable, 22 | "user" -> user, 23 | "password" -> password, 24 | "connectTimeout" -> "5", 25 | "queryTimeout" -> "5" 26 | )) 27 | 28 | df.write.mode(SaveMode.Append).sqlDB(writeConfig) 29 | 30 | /** TRADITIONAL SYNTAX 31 | 32 | import java.util.Properties 33 | 34 | val properties = new Properties() 35 | 36 | properties.put("databaseName", databaseName) 37 | properties.put("user", user) 38 | properties.put("password", password) 39 | properties.put("connectTimeout", "5") 40 | properties.put("queryTimeout", "5") 41 | 42 | df.write.mode(SaveMode.Append).sqlDB(url, dbTable, properties) 43 | 44 | **/ -------------------------------------------------------------------------------- /src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/BulkCopyMetadata.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import java.io.Serializable; 26 | import java.time.format.DateTimeFormatter; 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | /** 31 | * Public class for users to add column metadata manually 32 | */ 33 | public class BulkCopyMetadata implements Serializable { 34 | 35 | private Map metadata; 36 | 37 | public BulkCopyMetadata() { 38 | this.metadata = new HashMap<>(); 39 | } 40 | 41 | public void addColumnMetadata( 42 | int column, 43 | String name, 44 | int jdbcType, 45 | int precision, 46 | int scale) { 47 | addColumnMetadataInternal(column, name, jdbcType, precision, scale, null); 48 | } 49 | 50 | public void addColumnMetadata( 51 | int column, 52 | String name, 53 | int jdbcType, 54 | int precision, 55 | int scale, 56 | DateTimeFormatter dateTimeFormatter) { 57 | addColumnMetadataInternal(column, name, jdbcType, precision, scale, dateTimeFormatter); 58 | } 59 | 60 | Map getMetadata() { 61 | return metadata; 62 | } 63 | 64 | private void addColumnMetadataInternal( 65 | int column, 66 | String name, 67 | int jdbcType, 68 | int precision, 69 | int scale, 70 | DateTimeFormatter dateTimeFormatter) { 71 | 72 | switch (jdbcType) { 73 | /* 74 | * SQL Server supports numerous string literal formats for temporal types, hence sending them as varchar with approximate 75 | * precision(length) needed to send supported string literals. string literal formats supported by temporal types are available in MSDN 76 | * page on data types. 77 | */ 78 | case java.sql.Types.DATE: 79 | case java.sql.Types.TIME: 80 | case java.sql.Types.TIMESTAMP: 81 | case microsoft.sql.Types.DATETIMEOFFSET: 82 | // The precision is just a number long enough to hold all types of temporal data, doesn't need to be exact. 83 | metadata.put(column, new ColumnMetadata(name, jdbcType, 50, scale, dateTimeFormatter)); 84 | break; 85 | 86 | // Redirect SQLXML as LONGNVARCHAR, SQLXML is not valid type in TDS 87 | case java.sql.Types.SQLXML: 88 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.LONGNVARCHAR, precision, scale, dateTimeFormatter)); 89 | break; 90 | 91 | // Redirecting Float as Double based on data type mapping 92 | case java.sql.Types.FLOAT: 93 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.DOUBLE, precision, scale, dateTimeFormatter)); 94 | break; 95 | 96 | // Redirecting BOOLEAN as BIT 97 | case java.sql.Types.BOOLEAN: 98 | metadata.put(column, new ColumnMetadata(name, java.sql.Types.BIT, precision, scale, dateTimeFormatter)); 99 | break; 100 | 101 | default: 102 | metadata.put(column, new ColumnMetadata(name, jdbcType, precision, scale, dateTimeFormatter)); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/ColumnMetadata.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import java.io.Serializable; 26 | import java.time.format.DateTimeFormatter; 27 | 28 | /** 29 | * Class to represent the column metadata 30 | */ 31 | class ColumnMetadata implements Serializable { 32 | private String columnName; 33 | private int columnType; 34 | private int precision; 35 | private int scale; 36 | private DateTimeFormatter dateTimeFormatter; 37 | 38 | ColumnMetadata(String name, int type, int precision, int scale, DateTimeFormatter dateTimeFormatter) { 39 | this.columnName = name; 40 | this.columnType = type; 41 | this.precision = precision; 42 | this.scale = scale; 43 | this.dateTimeFormatter = dateTimeFormatter; 44 | } 45 | 46 | String getColumnName(){ 47 | return columnName; 48 | } 49 | 50 | int getColumnType(){ 51 | return columnType; 52 | } 53 | 54 | int getPrecision(){ 55 | return precision; 56 | } 57 | 58 | int getScale(){ 59 | return scale; 60 | } 61 | 62 | DateTimeFormatter getDateTimeFormatter(){ 63 | return dateTimeFormatter; 64 | } 65 | } -------------------------------------------------------------------------------- /src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerBulkDataFrameFileRecord.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import com.microsoft.sqlserver.jdbc.ISQLServerBulkRecord; 26 | import com.microsoft.sqlserver.jdbc.SQLServerException; 27 | import com.microsoft.sqlserver.jdbc.SQLServerResource; 28 | import org.apache.spark.sql.Row; 29 | import scala.collection.Iterator; 30 | 31 | import java.sql.JDBCType; 32 | import java.sql.Types; 33 | import java.text.MessageFormat; 34 | import java.time.OffsetTime; 35 | import java.time.format.DateTimeFormatter; 36 | import java.util.Map; 37 | import java.util.Set; 38 | 39 | /** 40 | * Class to allow SQLServerBulkCopy to write data to SQL Server Tables from Spark DataFrames 41 | */ 42 | public class SQLServerBulkDataFrameFileRecord implements ISQLServerBulkRecord, java.lang.AutoCloseable { 43 | 44 | private Iterator iterator; 45 | 46 | private Map columnMetadata; 47 | 48 | public SQLServerBulkDataFrameFileRecord(Iterator iterator, BulkCopyMetadata metadata) { 49 | this.iterator = iterator; 50 | this.columnMetadata = metadata.getMetadata(); 51 | } 52 | 53 | public DateTimeFormatter getDateTimeFormatter(int column) { 54 | return columnMetadata.get(column).getDateTimeFormatter(); 55 | } 56 | 57 | @Override 58 | public void close() throws SQLServerException { 59 | // nothing to close 60 | } 61 | 62 | @Override 63 | public String getColumnName(int column) { 64 | return columnMetadata.get(column).getColumnName(); 65 | } 66 | 67 | @Override 68 | public Set getColumnOrdinals() { 69 | return columnMetadata.keySet(); 70 | } 71 | 72 | @Override 73 | public int getColumnType(int column) { 74 | return columnMetadata.get(column).getColumnType(); 75 | } 76 | 77 | @Override 78 | public int getPrecision(int column) { 79 | return columnMetadata.get(column).getPrecision(); 80 | } 81 | 82 | @Override 83 | public Object[] getRowData() throws SQLServerException { 84 | Row row = iterator.next(); 85 | Object[] rowData = new Object[row.length()]; 86 | 87 | // Keys of the columnMetadata is a database table column with index starting from 1. 88 | // rowData is an array with index starting from 0. 89 | for (Map.Entry pair : columnMetadata.entrySet()) { 90 | ColumnMetadata cm = pair.getValue(); 91 | 92 | try { 93 | switch (cm.getColumnType()){ 94 | case Types.TIME_WITH_TIMEZONE: 95 | case Types.TIMESTAMP_WITH_TIMEZONE: { 96 | OffsetTime offsetTimeValue; 97 | 98 | if (cm.getDateTimeFormatter() != null) 99 | offsetTimeValue = OffsetTime.parse(row.get(pair.getKey() - 1).toString(), cm.getDateTimeFormatter()); 100 | else 101 | offsetTimeValue = OffsetTime.parse(row.get(pair.getKey() - 1).toString()); 102 | 103 | rowData[pair.getKey() - 1] = offsetTimeValue; 104 | break; 105 | } 106 | 107 | case Types.NULL: { 108 | rowData[pair.getKey() - 1] = null; 109 | break; 110 | } 111 | 112 | default: { 113 | rowData[pair.getKey() - 1] = row.get(pair.getKey() - 1); 114 | break; 115 | } 116 | } 117 | } catch (IllegalArgumentException illegalArgumentException) { 118 | String value = "'" + row.get(pair.getKey() - 1) + "'"; 119 | MessageFormat form = new MessageFormat(getSQLServerExceptionErrorMsg("R_errorConvertingValue")); 120 | String errText = form.format(new Object[]{value, JDBCType.valueOf(cm.getColumnType()).getName()}); 121 | 122 | try { 123 | throw SQLServerExceptionReflection.throwSQLServerException(errText, null, 0, illegalArgumentException); 124 | } catch (Exception e) { 125 | throw new IllegalArgumentException(errText, illegalArgumentException); 126 | } 127 | } catch (ArrayIndexOutOfBoundsException arrayOutOfBoundsException) { 128 | String errText = getSQLServerExceptionErrorMsg("R_schemaMismatch"); 129 | 130 | try { 131 | throw SQLServerExceptionReflection.throwSQLServerException(errText, arrayOutOfBoundsException); 132 | } catch (Exception e) { 133 | throw new ArrayIndexOutOfBoundsException(errText); 134 | } 135 | } 136 | } 137 | 138 | return rowData; 139 | } 140 | 141 | @Override 142 | public int getScale(int column) { 143 | return columnMetadata.get(column).getScale(); 144 | } 145 | 146 | @Override 147 | public boolean isAutoIncrement(int column) { 148 | return false; 149 | } 150 | 151 | @Override 152 | public boolean next() throws SQLServerException { 153 | return iterator.hasNext(); 154 | } 155 | 156 | private String getSQLServerExceptionErrorMsg(String type) { 157 | return SQLServerResource.getBundle("com.microsoft.sqlserver.jdbc.SQLServerResource").getString(type); 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/main/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerExceptionReflection.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import com.microsoft.sqlserver.jdbc.SQLServerException; 26 | import java.lang.reflect.Constructor; 27 | 28 | /** 29 | * ISQLServerBulkRecord requires some inherited methods to throw SQLServerException. 30 | * Prior to MS SQL JDBC v6.4, the SQLServerException class was only package accessible. 31 | * This class uses reflection in order to access SQLServerException for earlier versions of the JDBC driver. 32 | */ 33 | public class SQLServerExceptionReflection{ 34 | 35 | public static SQLServerException throwSQLServerException(String errText, Throwable clause) throws Exception { 36 | Constructor constructor 37 | = SQLServerException.class.getDeclaredConstructor(String.class, Throwable.class); 38 | constructor.setAccessible(true); 39 | return constructor.newInstance(errText, clause); 40 | } 41 | 42 | public static SQLServerException throwSQLServerException( 43 | String errText, 44 | String errState, 45 | int errNum, 46 | Throwable clause) throws Exception { 47 | 48 | Constructor constructor 49 | = SQLServerException.class.getDeclaredConstructor(String.class, String.class, int.class, Throwable.class); 50 | constructor.setAccessible(true); 51 | return constructor.newInstance(errText, errState, errNum, clause); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/Logging.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark 24 | 25 | private[spark] abstract class Logging extends LoggingTrait 26 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/LoggingTrait.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark 24 | 25 | import org.slf4j.{Logger, LoggerFactory} 26 | 27 | private[spark] trait LoggingTrait { 28 | 29 | // Make the log field transient so that objects with Logging can 30 | // be serialized and used on another machine 31 | @transient private var log_ : Logger = null // scalastyle:ignore 32 | 33 | // Method to get the logger name for this object 34 | protected def logName = { 35 | // Ignore trailing $'s in the class names for Scala objects 36 | this.getClass.getName.stripSuffix("$") 37 | } 38 | 39 | // Method to get or create the logger for this object 40 | protected def log: Logger = { 41 | if (log_ == null) { 42 | // scalastyle:ignore 43 | log_ = LoggerFactory.getLogger(logName) 44 | } 45 | log_ 46 | } 47 | 48 | // Log methods that take only a String 49 | protected def logInfo(msg: => String) { 50 | if (log.isInfoEnabled) log.info(msg) 51 | } 52 | 53 | protected def logDebug(msg: => String) { 54 | if (log.isDebugEnabled) log.debug(msg) 55 | } 56 | 57 | protected def logTrace(msg: => String) { 58 | if (log.isTraceEnabled) log.trace(msg) 59 | } 60 | 61 | protected def logWarning(msg: => String) { 62 | if (log.isWarnEnabled) log.warn(msg) 63 | } 64 | 65 | protected def logError(msg: => String) { 66 | if (log.isErrorEnabled) log.error(msg) 67 | } 68 | 69 | // Log methods that take Throwables (Exceptions/Errors) too 70 | protected def logInfo(msg: => String, throwable: Throwable) { 71 | if (log.isInfoEnabled) log.info(msg, throwable) 72 | } 73 | 74 | protected def logDebug(msg: => String, throwable: Throwable) { 75 | if (log.isDebugEnabled) log.debug(msg, throwable) 76 | } 77 | 78 | protected def logTrace(msg: => String, throwable: Throwable) { 79 | if (log.isTraceEnabled) log.trace(msg, throwable) 80 | } 81 | 82 | protected def logWarning(msg: => String, throwable: Throwable) { 83 | if (log.isWarnEnabled) log.warn(msg, throwable) 84 | } 85 | 86 | protected def logError(msg: => String, throwable: Throwable) { 87 | if (log.isErrorEnabled) log.error(msg, throwable) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/bulk/BulkCopyUtils.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulk 24 | 25 | import java.sql.{Connection, ResultSetMetaData} 26 | 27 | import com.microsoft.azure.sqldb.spark.LoggingTrait 28 | import com.microsoft.azure.sqldb.spark.bulkcopy.BulkCopyMetadata 29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 30 | import com.microsoft.sqlserver.jdbc.SQLServerBulkCopyOptions 31 | 32 | import scala.util.control.NonFatal 33 | 34 | /** 35 | * Helper and utility methods used for setting up a Bulk Copy transaction. 36 | */ 37 | private[spark] object BulkCopyUtils extends LoggingTrait { 38 | 39 | /** 40 | * Database table columns start at index 1. 41 | */ 42 | val ColumnStartingIndex = 1 43 | 44 | /** 45 | * Extracts column names, types, precision and scale from a [[ResultSetMetaData]] 46 | * and creates a [[BulkCopyMetadata]] object. 47 | * 48 | * @param resultSetMetaData metadata from an external database table. 49 | * @return BulkCopyMetadata with the extracted column metadata. 50 | */ 51 | def createBulkCopyMetadata(resultSetMetaData: ResultSetMetaData): BulkCopyMetadata = { 52 | val bulkCopyMetadata = new BulkCopyMetadata 53 | 54 | for (column <- ColumnStartingIndex to resultSetMetaData.getColumnCount) { 55 | bulkCopyMetadata.addColumnMetadata( 56 | column, 57 | resultSetMetaData.getColumnName(column), 58 | resultSetMetaData.getColumnType(column), 59 | resultSetMetaData.getPrecision(column), 60 | resultSetMetaData.getScale(column) 61 | ) 62 | } 63 | 64 | bulkCopyMetadata 65 | } 66 | 67 | /** 68 | * Extracts Bulk Copy properties from Config and creates [[SQLServerBulkCopyOptions]]. 69 | * Will use default values if not specified. 70 | * 71 | * @param config the Config object with specified bulk copy properties 72 | * @return [[SQLServerBulkCopyOptions]] for the JDBC Bulk Copy API 73 | */ 74 | def getBulkCopyOptions(config: Config): SQLServerBulkCopyOptions = { 75 | val copyOptions = new SQLServerBulkCopyOptions 76 | 77 | copyOptions.setBatchSize( 78 | config.get[String](SqlDBConfig.BulkCopyBatchSize) 79 | .getOrElse(SqlDBConfig.BulkCopyBatchSizeDefault.toString).toInt 80 | ) 81 | copyOptions.setBulkCopyTimeout( 82 | config.get[String](SqlDBConfig.BulkCopyTimeout) 83 | .getOrElse(SqlDBConfig.BulkCopyTimeoutDefault.toString).toInt 84 | ) 85 | copyOptions.setCheckConstraints( 86 | config.get[String](SqlDBConfig.BulkCopyCheckConstraints) 87 | .getOrElse(SqlDBConfig.BulkCopyCheckConstraintsDefault.toString).toBoolean 88 | ) 89 | copyOptions.setFireTriggers( 90 | config.get[String](SqlDBConfig.BulkCopyFireTriggers) 91 | .getOrElse(SqlDBConfig.BulkCopyFireTriggersDefault.toString).toBoolean 92 | ) 93 | copyOptions.setKeepIdentity( 94 | config.get[String](SqlDBConfig.BulkCopyKeepIdentity) 95 | .getOrElse(SqlDBConfig.BulkCopyKeepIdentityDefault.toString).toBoolean 96 | ) 97 | copyOptions.setKeepNulls( 98 | config.get[String](SqlDBConfig.BulkCopyKeepNulls) 99 | .getOrElse(SqlDBConfig.BulkCopyKeepNullsDefault.toString).toBoolean 100 | ) 101 | copyOptions.setTableLock( 102 | config.get[String](SqlDBConfig.BulkCopyTableLock) 103 | .getOrElse(SqlDBConfig.BulkCopyTableLockDefault.toString).toBoolean 104 | ) 105 | copyOptions.setUseInternalTransaction( 106 | config.get[String](SqlDBConfig.BulkCopyUseInternalTransaction) 107 | .getOrElse(SqlDBConfig.BulkCopyUseInternalTransactionDefault.toString).toBoolean 108 | ) 109 | copyOptions.setAllowEncryptedValueModifications( 110 | config.get[String](SqlDBConfig.BulkCopyAllowEncryptedValueModifications) 111 | .getOrElse(SqlDBConfig.BulkCopyAllowEncryptedValueModificationsDefault.toString).toBoolean 112 | ) 113 | 114 | copyOptions 115 | } 116 | 117 | /** 118 | * Retrieves table columns and metadata from remote database 119 | * 120 | * @param table the table to retrieve column metadata 121 | * @param connection the active JDBC connection 122 | * @return the [[ResultSetMetaData]] of the executed query. 123 | */ 124 | def getTableColumns(table: String, connection: Connection): ResultSetMetaData = { 125 | // A bit hacky, but this is the most efficient way. 126 | val statement = s"SELECT TOP 0 * FROM $table" 127 | 128 | connection.createStatement().executeQuery(statement).getMetaData 129 | } 130 | 131 | /** 132 | * Retrieves transaction support from remote database 133 | * 134 | * @param connection the active JDBC connection 135 | * @return true if the connected database support transactions, false otherwise 136 | */ 137 | def getTransactionSupport(connection: Connection): Boolean ={ 138 | var isolationLevel = Connection.TRANSACTION_NONE 139 | try { 140 | val metadata = connection.getMetaData 141 | if (metadata.supportsTransactions){ 142 | isolationLevel = metadata.getDefaultTransactionIsolation 143 | } 144 | } catch { 145 | case NonFatal(e) => logWarning("Exception while detecting transaction support", e) 146 | } 147 | 148 | isolationLevel != Connection.TRANSACTION_NONE 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/config/Config.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.config 24 | 25 | import com.microsoft.azure.sqldb.spark.config.Config.Property 26 | import org.apache.spark.sql.SparkSession 27 | import org.apache.spark.{SparkConf, SparkContext} 28 | 29 | import scala.reflect.ClassTag 30 | 31 | /** 32 | * Abstract config builder, used to set a bunch of properties a build 33 | * a config object from them. 34 | * 35 | * @param properties Map of any-type properties. 36 | * @tparam Builder Current Builder implementation type. 37 | */ 38 | abstract class ConfigBuilder[Builder <: ConfigBuilder[Builder]](val properties: Map[Property, Any] = Map()) extends Serializable { 39 | builder => 40 | 41 | /** 42 | * Required properties to build a AzureDB config object. 43 | * At build time, if these properties are not set, an assert 44 | * exception will be thrown. 45 | */ 46 | val requiredProperties: List[Property] 47 | 48 | /** 49 | * Instantiate a brand new Builder from given properties map 50 | * 51 | * @param props Map of any-type properties. 52 | * @return The new builder 53 | */ 54 | def apply(props: Map[Property, Any]): Builder 55 | 56 | /** 57 | * Set (override if exists) a single property value given a new one. 58 | * 59 | * @param property Property to be set 60 | * @param value New value for given property 61 | * @tparam T Property type 62 | * @return A new builder that includes new value of the specified property 63 | */ 64 | def set[T](property: Property, value: T): Builder = 65 | apply(properties + (property -> value)) 66 | 67 | /** 68 | * Build the config object from current builder properties. 69 | * 70 | * @return The CosmosDB configuration object. 71 | */ 72 | def build(): Config = new Config { 73 | 74 | val properties: Map[Property, Any] = builder.properties.map { case (k, v) => k.toLowerCase -> v } 75 | val reqProperties: List[Property] = requiredProperties.map(_.toLowerCase) 76 | 77 | require( 78 | reqProperties.forall(properties.isDefinedAt), 79 | s"Not all properties are defined! : ${reqProperties.diff(properties.keys.toList.intersect(requiredProperties))}" 80 | ) 81 | 82 | require( 83 | properties.isDefinedAt(SqlDBConfig.QueryCustom.toLowerCase) ^ 84 | properties.isDefinedAt(SqlDBConfig.DBTable.toLowerCase), 85 | "QueryCustom and dbTable both or none defined. Please define one property." 86 | ) 87 | 88 | /** 89 | * Compare if two Configs have the same properties. 90 | * 91 | * @param other Object to compare 92 | * @return Boolean 93 | */ 94 | override def equals(other: Any): Boolean = other match { 95 | case that: Config => 96 | properties == that.properties 97 | case _ => false 98 | } 99 | 100 | override def hashCode(): Int = { 101 | val state = Seq(properties) 102 | state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b) 103 | } 104 | } 105 | } 106 | 107 | /** 108 | * SQL DB standard configuration object 109 | */ 110 | trait Config extends Serializable { 111 | 112 | /** 113 | * Contained properties in configuration object 114 | */ 115 | val properties: Map[Property, Any] 116 | 117 | def asOptions: collection.Map[String, String] = { 118 | properties.map { case (x, v) => x -> v.toString } 119 | } 120 | 121 | /** Returns the value associated with a key, or a default value if the key is not contained in the configuration object. 122 | * 123 | * @param key Desired property. 124 | * @param default Value in case no binding for `key` is found in the map. 125 | * @tparam T Result type of the default computation. 126 | * @return the value associated with `key` if it exists, 127 | * otherwise the result of the `default` computation. 128 | */ 129 | def getOrElse[T](key: Property, default: => T): T = properties.get(key) match { 130 | case Some(v) => v.asInstanceOf[T] 131 | case None => default 132 | } 133 | 134 | /** 135 | * Gets specified property from current configuration object 136 | * 137 | * @param property Desired property 138 | * @tparam T Property expected value type. 139 | * @return An optional value of expected type 140 | */ 141 | def get[T: ClassTag](property: Property): Option[T] = 142 | properties.get(property.toLowerCase).map(_.asInstanceOf[T]) 143 | 144 | /** 145 | * Gets specified property from current configuration object 146 | * 147 | * @return An optional value of expected type 148 | */ 149 | def getAllKeys: Set[_] = properties.keySet 150 | 151 | /** 152 | * Gets specified property from current configuration object. 153 | * It will fail if property is not previously set. 154 | * 155 | * @param property Desired property 156 | * @tparam T Property expected value type 157 | * @return Expected type value 158 | */ 159 | def apply[T: ClassTag](property: Property): T = { 160 | get[T](property).get 161 | } 162 | } 163 | 164 | object Config { 165 | 166 | val configPrefix = "spark.sqldb." 167 | 168 | type Property = String 169 | 170 | /** 171 | * Defines how to act in case any parameter is not set 172 | * 173 | * @param key Key that couldn't be obtained 174 | * @tparam T Expected type (used to fit in 'getOrElse' cases). 175 | * @return Throws an IllegalStateException. 176 | */ 177 | def notFound[T](key: String): T = 178 | throw new IllegalStateException(s"Parameter $key not specified") 179 | 180 | /** 181 | * Create a configuration from the `sparkContext` 182 | * 183 | * Uses the prefixed properties that are set in the Spark configuration to create the config. 184 | * 185 | * @see [[configPrefix]] 186 | * @param sparkContext the spark context 187 | * @return the configuration 188 | */ 189 | def apply(sparkContext: SparkContext): Config = apply(sparkContext.getConf) 190 | 191 | /** 192 | * Create a configuration from the `sqlContext` 193 | * 194 | * Uses the prefixed properties that are set in the Spark configuration to create the config. 195 | * 196 | * @see [[configPrefix]] 197 | * @param sparkSession the SparkSession 198 | * @return the configuration 199 | */ 200 | def apply(sparkSession: SparkSession): Config = apply(sparkSession.sparkContext.getConf) 201 | 202 | /** 203 | * Create a configuration from the `sparkConf` 204 | * 205 | * Uses the prefixed properties that are set in the Spark configuration to create the config. 206 | * 207 | * @see [[configPrefix]] 208 | * @param sparkConf the spark configuration 209 | * @return the configuration 210 | */ 211 | def apply(sparkConf: SparkConf): Config = apply(sparkConf, Map.empty[String, String]) 212 | 213 | /** 214 | * Create a configuration from the `sparkConf` 215 | * 216 | * Uses the prefixed properties that are set in the Spark configuration to create the config. 217 | * 218 | * @see [[configPrefix]] 219 | * @param sparkConf the spark configuration 220 | * @param options overloaded parameters 221 | * @return the configuration 222 | */ 223 | def apply(sparkConf: SparkConf, options: collection.Map[String, String]): Config = 224 | apply(getOptionsFromConf(sparkConf) ++ stripPrefix(options)) 225 | 226 | /** 227 | * Create a configuration from the values in the `Map` 228 | * 229 | * '''Note:''' Values in the map do not need to be prefixed with the [[configPrefix]]. 230 | * 231 | * @param options a map of properties and their string values 232 | * @return the configuration 233 | */ 234 | def apply(options: collection.Map[String, String]): Config = { 235 | apply(options, None) 236 | } 237 | 238 | /** 239 | * Create a configuration from the values in the `Map`, using the optional default configuration for any default values. 240 | * 241 | * '''Note:''' Values in the map do not need to be prefixed with the [[configPrefix]]. 242 | * 243 | * @param options a map of properties and their string values 244 | * @param default the optional default configuration, used for determining the default values for the properties 245 | * @return the configuration 246 | */ 247 | def apply(options: collection.Map[String, String], default: Option[Config]): Config = { 248 | var combine = options ++ { 249 | default match { 250 | case Some(value) => value.asOptions 251 | case None => Map.empty[String, String] 252 | } 253 | } 254 | var builder = SqlDBConfigBuilder(combine.asInstanceOf[Map[String, Any]]) 255 | 256 | builder.build() 257 | } 258 | 259 | /** 260 | * Strip the prefix from options 261 | * 262 | * @param options options that may contain the prefix 263 | * @return prefixLess options 264 | */ 265 | def stripPrefix(options: collection.Map[String, String]): collection.Map[String, String] = 266 | options.map(kv => (kv._1.toLowerCase.stripPrefix(configPrefix), kv._2)) 267 | 268 | /** 269 | * Gets an options map from the `SparkConf` 270 | * 271 | * @param sparkConf the SparkConf 272 | * @return the options 273 | */ 274 | def getOptionsFromConf(sparkConf: SparkConf): collection.Map[String, String] = 275 | stripPrefix(sparkConf.getAll.filter(_._1.startsWith(configPrefix)).toMap) 276 | 277 | protected def getInt(newValue: Option[String], existingValue: Option[Int] = None, defaultValue: Int): Int = { 278 | newValue match { 279 | case Some(value) => value.toInt 280 | case None => existingValue.getOrElse(defaultValue) 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/config/SqlDBConfig.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.config 24 | 25 | /** 26 | * Values and Functions for access and parse the configuration parameters 27 | */ 28 | object SqlDBConfig { 29 | 30 | // Parameter names (from JDBC connection properties) 31 | val AccessToken = "accessToken" 32 | val ApplicationIntent = "applicationIntent" 33 | val ApplicationName = "applicationName" 34 | val Authentication = "authentication" 35 | val AuthenticationScheme = "authenticationScheme" 36 | val ColumnEncryptionSetting = "columnEncryptionSetting" 37 | val ConnectTimeout = "connectTimeout" 38 | val Database = "database" 39 | val DatabaseName = "databaseName" 40 | val DBTable = "dbTable" 41 | val DisableStatementPooling = "disableStatementPooling" 42 | val Driver = "driver" 43 | val EnablePrepareOnFirstPreparedStatementCall = "enablePrepareOnFirstPreparedStatementCall" 44 | val Encrypt = "encrypt" 45 | val FailoverPartner = "failoverPartner" 46 | val Fips = "fips" 47 | val FipsProvider = "fipsProvider" 48 | val GSSCredential = "gsscredential" 49 | val HostNameInCertificate = "hostNameInCertificate" 50 | val InstanceName = "instanceName" 51 | val IntegratedSecurity = "integratedSecurity" 52 | val JaasConfigurationName = "jaasConfigurationName" 53 | val KeyStoreAuthentication = "keyStoreAuthentication" 54 | val KeyStoreLocation = "keyStoreLocation" 55 | val KeyStoreSecret = "keyStoreSecret" 56 | val LastUpdateCount = "lastUpdateCount" 57 | val LockTimeout = "lockTimeout" 58 | val LoginTimeout = "loginTimeout" 59 | val MultiSubnetFailover = "multiSubnetFailover" 60 | val PacketSize = "packetSize" 61 | val Password = "password" 62 | val PortNumber = "portNumber" 63 | val Port = "port" 64 | val QueryTimeout = "queryTimeout" 65 | val ResponseBuffering = "responseBuffering" 66 | val SelectMethod = "selectMethod" 67 | val SendStringParametersAsUnicode = "sendStringParametersAsUnicode" 68 | val SendTimeAsDatetime = "sendTimeAsDatetime" 69 | val ServerName = "serverName" 70 | val Server = "server" 71 | val ServerNameAsACE = "serverNameAsACE" 72 | val ServerPreparedStatementDiscardThreshold = "serverPreparedStatementDiscardThreshold" 73 | val ServerSpn = "serverSpn" 74 | val SocketTimeout = "socketTimeout" 75 | val TransparentNetworkIPResolution = "transparentNetworkIPResolution" 76 | val TrustServerCertificate = "trustServerCertificate" 77 | val TrustStore = "trustStore" 78 | val TrustStorePassword = "trustStorePassword" 79 | val TrustStoreType = "trustStoreType" 80 | val URL = "url" 81 | val User = "user" 82 | val WorkstationID = "workstationID" 83 | val XopenStates = "xopenStates" 84 | 85 | // Bulk Copy API Options 86 | val BulkCopyBatchSize = "bulkCopyBatchSize" 87 | val BulkCopyTimeout = "bulkCopyTimeout" 88 | val BulkCopyCheckConstraints = "bulkCopyCheckConstraints" 89 | val BulkCopyFireTriggers = "bulkCopyFireTriggers" 90 | val BulkCopyKeepIdentity = "bulkCopyKeepIdentity" 91 | val BulkCopyKeepNulls = "bulkCopyKeepNulls" 92 | val BulkCopyTableLock = "bulkCopyTableLock" 93 | val BulkCopyUseInternalTransaction = "bulkCopyUseInternalTransaction" 94 | val BulkCopyAllowEncryptedValueModifications = "bulkCopyAllowEncryptedValueModifications" 95 | 96 | // Bulk Copy API Default Settings 97 | val BulkCopyBatchSizeDefault = 0 98 | val BulkCopyTimeoutDefault = 60 //seconds 99 | val BulkCopyCheckConstraintsDefault = false 100 | val BulkCopyFireTriggersDefault = false 101 | val BulkCopyKeepIdentityDefault = false 102 | val BulkCopyKeepNullsDefault = false 103 | val BulkCopyTableLockDefault = false 104 | val BulkCopyUseInternalTransactionDefault = false 105 | val BulkCopyAllowEncryptedValueModificationsDefault = false 106 | 107 | // Extra constants 108 | val JDBCUrlPrefix = "jdbc:sqlserver://" 109 | val QueryCustom = "QueryCustom" 110 | val SQLjdbcDriver = "com.microsoft.sqlserver.jdbc.SQLServerDriver" 111 | 112 | // Mandatory fields to construct a Config 113 | val required = List( 114 | DatabaseName, 115 | URL 116 | ) 117 | } 118 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/config/SqlDBConfigBuilder.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.config 24 | 25 | import com.microsoft.azure.sqldb.spark.config.Config.Property 26 | 27 | case class SqlDBConfigBuilder(props: Map[Property, Any] = Map()) extends { 28 | 29 | override val properties = Map() ++ props 30 | 31 | } with ConfigBuilder[SqlDBConfigBuilder](properties) { 32 | 33 | val requiredProperties: List[Property] = SqlDBConfig.required 34 | 35 | def apply(props: Map[Property, Any]) = SqlDBConfigBuilder(props) 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/connect/ConnectionUtils.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.connect 24 | 25 | import java.sql.{Connection, DriverManager, SQLException} 26 | import java.util.Properties 27 | 28 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 29 | 30 | /** 31 | * Helper and utility methods used for setting up or using a connection 32 | */ 33 | private[spark] object ConnectionUtils { 34 | 35 | /** 36 | * Retrieves all connection properties in the Config object 37 | * and returns them as a [[Properties]] object. 38 | * 39 | * @param config the Config object with specified connection properties. 40 | * @return A connection [[Properties]] object. 41 | */ 42 | def createConnectionProperties(config: Config): Properties = { 43 | val connectionProperties = new Properties() 44 | for (key <- config.getAllKeys) { 45 | connectionProperties.put(key.toString, config.get[String](key.toString).get) 46 | } 47 | connectionProperties 48 | } 49 | 50 | /** 51 | * Adds the "jdbc:sqlserver://" suffix to a general server url 52 | * 53 | * @param url the string url without the JDBC prefix 54 | * @return the url with the added JDBC prefix 55 | */ 56 | def createJDBCUrl(url: String): String = SqlDBConfig.JDBCUrlPrefix + url 57 | 58 | /** 59 | * Gets a JDBC connection based on Config properties 60 | * 61 | * @param config any read or write Config 62 | * @return a JDBC Connection 63 | */ 64 | def getConnection(config: Config): Connection = { 65 | Class.forName(SqlDBConfig.SQLjdbcDriver) 66 | DriverManager.getConnection( 67 | createJDBCUrl(config.get[String](SqlDBConfig.URL).get), createConnectionProperties(config)) 68 | } 69 | 70 | /** 71 | * Retrieves the DBTable or QueryCustom specified in the config. 72 | * NOTE: only one property can exist within config. 73 | * 74 | * @param config the Config object with specified properties. 75 | * @return The specified DBTable or QueryCustom 76 | */ 77 | def getTableOrQuery(config: Config): String = { 78 | config.get[String](SqlDBConfig.DBTable).getOrElse( 79 | getQueryCustom(config.get[String](SqlDBConfig.QueryCustom).get) 80 | ) 81 | } 82 | 83 | /** 84 | * The JDBC driver requires parentheses and a temp variable around any custom queries. 85 | * This adds the required syntax so users only need to specify the query. 86 | * 87 | * @param query the default query 88 | * @return the syntactically correct query to be executed by the JDBC driver. 89 | */ 90 | def getQueryCustom(query: String): String = s"($query) QueryCustom" 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameFunctions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.connect 24 | 25 | import java.sql.{Connection, SQLException} 26 | 27 | import com.microsoft.azure.sqldb.spark.bulkcopy.{BulkCopyMetadata, SQLServerBulkDataFrameFileRecord} 28 | import com.microsoft.azure.sqldb.spark.LoggingTrait 29 | import com.microsoft.azure.sqldb.spark.bulk.BulkCopyUtils 30 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 31 | import com.microsoft.sqlserver.jdbc.SQLServerBulkCopy 32 | import org.apache.spark.sql.{DataFrame, Row} 33 | 34 | import scala.util.Try 35 | 36 | /** 37 | * Implicit functions for DataFrame 38 | */ 39 | private[spark] case class DataFrameFunctions[T](@transient dataFrame: DataFrame) extends LoggingTrait { 40 | 41 | /** 42 | * Saves the contents of the [[DataFrame]] 43 | * to Azure SQL DB or SQL Server through the Bulk Copy API 44 | * 45 | * @param config the database connection properties and bulk copy properties 46 | * @param metadata the metadata of the columns - will be null if not specified 47 | */ 48 | def bulkCopyToSqlDB(config: Config, metadata: BulkCopyMetadata = null, createTable:Boolean = false): Unit = { 49 | // Ensuring the table exists in the DB already 50 | if(createTable) { 51 | dataFrame.limit(0).write.sqlDB(config) 52 | } 53 | 54 | val actualMetadata = if(metadata == null) { 55 | getConnectionOrFail(config).recover({ 56 | case e: ClassNotFoundException => 57 | logError("JDBC driver not found in class path", e) 58 | throw e 59 | case e1: SQLException => 60 | logError("Connection cannot be established to the database", e1) 61 | throw e1 62 | }).flatMap(conn => { 63 | inferBulkCopyMetadata(config, conn) 64 | }).recover({ 65 | case e: SQLException => 66 | logError("Column metadata not specified and cannot retrieve metadata from database", e) 67 | throw e 68 | }).get 69 | } else { 70 | metadata 71 | } 72 | dataFrame.foreachPartition(iterator => bulkCopy(config, iterator, actualMetadata)) 73 | } 74 | 75 | private def getConnectionOrFail(config:Config):Try[Connection] = { 76 | Try { 77 | ConnectionUtils.getConnection(config) 78 | } 79 | } 80 | 81 | private def inferBulkCopyMetadata(config: Config, connection:Connection):Try[BulkCopyMetadata] = { 82 | val dbTable = config.get[String](SqlDBConfig.DBTable).get 83 | Try { 84 | val resultSetMetaData = BulkCopyUtils.getTableColumns(dbTable, connection) 85 | BulkCopyUtils.createBulkCopyMetadata(resultSetMetaData) 86 | } 87 | } 88 | 89 | /** 90 | * Uses the Bulk Copy API to copy contents of a dataframe partition to an 91 | * external database table. 92 | * 93 | * @param config any write configuration with the specified properties. 94 | * @param iterator an iterator for a dataframe partition. 95 | * @param metadata User specified bulk copy metadata. 96 | */ 97 | private def bulkCopy(config: Config, iterator: Iterator[Row], metadata: BulkCopyMetadata): Unit = { 98 | var connection: Connection = null 99 | try { 100 | connection = ConnectionUtils.getConnection(config) 101 | } catch { 102 | case e: ClassNotFoundException => 103 | logError("JDBC driver not found in class path", e) 104 | throw e 105 | case e1: SQLException => 106 | logError("Connection cannot be established to the database", e1) 107 | throw e1 108 | } 109 | 110 | val dbTable = config.get[String](SqlDBConfig.DBTable).get 111 | 112 | // Retrieves column metadata from external database table if user does not specify. 113 | val bulkCopyMetadata = metadata 114 | 115 | var committed = false 116 | val supportsTransactions = BulkCopyUtils.getTransactionSupport(connection) 117 | try { 118 | if (supportsTransactions){ 119 | connection.setAutoCommit(false) 120 | } 121 | 122 | val fileRecord = new SQLServerBulkDataFrameFileRecord(iterator, bulkCopyMetadata) 123 | val sqlServerBulkCopy = new SQLServerBulkCopy(connection) 124 | 125 | sqlServerBulkCopy.setDestinationTableName(dbTable) 126 | sqlServerBulkCopy.setBulkCopyOptions(BulkCopyUtils.getBulkCopyOptions(config)) 127 | sqlServerBulkCopy.writeToServer(fileRecord) 128 | 129 | if (supportsTransactions){ 130 | connection.commit() 131 | } 132 | committed = true 133 | } catch { 134 | case e: SQLException => 135 | if (!committed && supportsTransactions){ 136 | logError("An error occurred while writing to database, attempting rollback", e) 137 | } 138 | throw e 139 | } finally { 140 | if (!committed){ 141 | if (supportsTransactions){ 142 | connection.rollback() 143 | } 144 | connection.close() 145 | } else { 146 | try { 147 | connection.close() 148 | } catch { 149 | case e: Exception => logWarning("Transaction succeeded, but closing failed", e) 150 | } 151 | } 152 | } 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameReaderFunctions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.connect 24 | 25 | import java.util.Properties 26 | 27 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._ 28 | import com.microsoft.azure.sqldb.spark.LoggingTrait 29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 30 | import org.apache.spark.sql.{DataFrame, DataFrameReader} 31 | 32 | /** 33 | * Implicit functions for DataFrameReader 34 | */ 35 | private[spark] case class DataFrameReaderFunctions(@transient reader: DataFrameReader) extends LoggingTrait { 36 | 37 | /** 38 | * Creates a [[DataFrame]] based on the read configuration properties. 39 | * 40 | * @param readConfig any read configuration. 41 | * @return DataFrame 42 | */ 43 | def sqlDB(readConfig: Config): DataFrame = { 44 | reader.jdbc( 45 | createJDBCUrl(readConfig.get[String](SqlDBConfig.URL).get), 46 | getTableOrQuery(readConfig), 47 | createConnectionProperties(readConfig) 48 | ) 49 | } 50 | 51 | /** 52 | * Creates a [[DataFrame]] based on the url, table and specified properties. 53 | * 54 | * @param url the server url 55 | * @param table the external database table being read 56 | * @param properties additional supported JDBC connection properties 57 | * @return DataFrame 58 | */ 59 | def sqlDB(url: String, table: String, properties: Properties): DataFrame = { 60 | reader.jdbc(createJDBCUrl(url), table, properties) 61 | } 62 | 63 | /** 64 | * Creates a [[DataFrame]] based on the url, table, predicates and specified properties. 65 | * 66 | * @param url the server url. 67 | * @param table the external database table being read. 68 | * @param predicates condition in the where clause for each partition. 69 | * @param properties additional supported JDBC connection properties. 70 | * @return DataFrame 71 | */ 72 | def sqlDB(url: String, table: String, predicates: Array[String], properties: Properties): DataFrame = { 73 | reader.jdbc(createJDBCUrl(url), table, predicates, properties) 74 | } 75 | 76 | /** 77 | * Creates a partitioned [[DataFrame]] based on the url, table and specified properties. 78 | * 79 | * @param url the server url 80 | * @param table the external database table being read 81 | * @param columnName name of a column, used for partitioning. 82 | * @param lowerBound minimum value of the field in `columnName` 83 | * @param upperBound maximum value of the field in `columnName` 84 | * @param numPartitions the number of partitions of the dataframe 85 | * @param properties additional supported JDBC connection properties 86 | * @return DataFrame 87 | */ 88 | def sqlDB(url: String, table: String, columnName: String, lowerBound: Long, 89 | upperBound: Long, numPartitions: Int, properties: Properties): DataFrame = { 90 | reader.jdbc(createJDBCUrl(url), table, columnName, lowerBound, upperBound, numPartitions, properties) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/connect/DataFrameWriterFunctions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.connect 24 | 25 | import java.util.Properties 26 | 27 | import scala.language.existentials 28 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._ 29 | import com.microsoft.azure.sqldb.spark.LoggingTrait 30 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 31 | import org.apache.spark.sql.DataFrameWriter 32 | 33 | /** 34 | * Implicit functions for DataFrameWriter 35 | */ 36 | private[spark] case class DataFrameWriterFunctions(@transient writer: DataFrameWriter[_]) extends LoggingTrait { 37 | 38 | /** 39 | * Saves the contents of the `DataFrame` to Azure SQL dB or SQL Server. 40 | * 41 | * @param writeConfig the [[com.microsoft.azure.sqldb.spark.config.Config]] to use 42 | */ 43 | def sqlDB(writeConfig: Config): Unit = { 44 | val url = writeConfig.get[String](SqlDBConfig.URL).get 45 | val properties = createConnectionProperties(writeConfig) 46 | val table = writeConfig.get[String](SqlDBConfig.DBTable).getOrElse( 47 | throw new IllegalArgumentException("Table not found in DBTable in Config") 48 | ) 49 | 50 | sqlDB(url, table, properties) 51 | } 52 | 53 | /** 54 | * Saves the contents of the `DataFrame` to Azure SQL dB. 55 | * 56 | * @param url the url of the server 57 | * @param table the database table being written to. 58 | * @param properties any additional connection properties handled by the jdbc driver 59 | */ 60 | def sqlDB(url: String, table: String, properties: Properties): Unit = writer.jdbc(createJDBCUrl(url), table, properties) 61 | 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/connect/package.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark 24 | 25 | import scala.language.implicitConversions 26 | import org.apache.spark.annotation.DeveloperApi 27 | import org.apache.spark.sql._ 28 | 29 | /** 30 | * Implicit functions added to DataFrameReader, DataFrameWriter and DataFrame objects 31 | */ 32 | package object connect { 33 | 34 | /** 35 | * :: DeveloperApi :: 36 | * 37 | * Helper to implicitly add SQL DB based functions to a DataFrameReader 38 | * 39 | * @param reader the DataFrameReader 40 | * @return the SQL DB based DataFrameReader 41 | */ 42 | @DeveloperApi 43 | implicit def toDataFrameReaderFunctions(reader: DataFrameReader): DataFrameReaderFunctions = 44 | DataFrameReaderFunctions(reader) 45 | 46 | /** 47 | * :: DeveloperApi :: 48 | * 49 | * Helper to implicitly add SQL DB based functions to a DataFrameWriter 50 | * 51 | * @param writer the DataFrameWriter 52 | * @return the SQL DB based DataFrameWriter 53 | */ 54 | @DeveloperApi 55 | implicit def toDataFrameWriterFunctions(writer: DataFrameWriter[_]): DataFrameWriterFunctions = 56 | DataFrameWriterFunctions(writer) 57 | 58 | /** 59 | * :: DeveloperApi :: 60 | * 61 | * Helper to implicitly add SQL DB based functions to a DataFrame 62 | * 63 | * @param ds the dataframe/dataset 64 | * @return the SQL DB based DataFrame 65 | */ 66 | @DeveloperApi 67 | implicit def toDataFrameFunctions[T](ds: Dataset[T]): DataFrameFunctions[Row] = DataFrameFunctions[Row](ds.toDF()) 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/query/QueryFunctions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.query 24 | 25 | import java.sql.{Connection, SQLException} 26 | 27 | import com.microsoft.azure.sqldb.spark.connect.ConnectionUtils._ 28 | import com.microsoft.azure.sqldb.spark.LoggingTrait 29 | import com.microsoft.azure.sqldb.spark.config.{Config, SqlDBConfig} 30 | import com.microsoft.azure.sqldb.spark.connect._ 31 | import org.apache.spark.sql.{DataFrame, SQLContext} 32 | 33 | /** 34 | * Implicit functions for SQLContext 35 | */ 36 | private[spark] case class QueryFunctions(@transient sqlContext: SQLContext) extends LoggingTrait { 37 | 38 | /** 39 | * Executes a custom query on the external database server which returns 40 | * either a dataframe or a boolean specifying successful/unsuccessful execution. 41 | * 42 | * @param config any general configuration 43 | * @return Either of DataFrame or Boolean based on query stated in config 44 | */ 45 | def sqlDBQuery(config: Config): Either[DataFrame, Boolean] = { 46 | 47 | var connection: Connection = null 48 | 49 | val sql = config.get[String](SqlDBConfig.QueryCustom).getOrElse( 50 | throw new IllegalArgumentException("Query not found in QueryCustom in Config") 51 | ) 52 | 53 | try { 54 | connection = getConnection(config) 55 | val statement = connection.createStatement() 56 | 57 | if (statement.execute(sql)) { 58 | Left(sqlContext.read.sqlDB(config)) 59 | } 60 | else { 61 | Right(true) 62 | } 63 | } 64 | catch { 65 | case sqlException: SQLException => { 66 | sqlException.printStackTrace() 67 | Right(false) 68 | } 69 | case exception: Exception => { 70 | exception.printStackTrace() 71 | Right(false) 72 | } 73 | } 74 | finally { 75 | connection.close() 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/scala/com/microsoft/azure/sqldb/spark/query/package.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark 24 | 25 | import scala.language.implicitConversions 26 | import org.apache.spark.annotation.DeveloperApi 27 | import org.apache.spark.sql.SQLContext 28 | 29 | /** 30 | * Implicit querying functions added to SQLContext 31 | */ 32 | package object query { 33 | 34 | /** 35 | * :: DeveloperApi :: 36 | * 37 | * Helper to implicitly add SQL DB based functions to a SQLContext 38 | * 39 | * @param sqlContext the SQLContext 40 | * @return the Azure SQL based SQLContext 41 | */ 42 | @DeveloperApi 43 | implicit def toQueryFunctions(sqlContext: SQLContext): QueryFunctions = QueryFunctions(sqlContext) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/BulkCopyMetadataTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | 28 | import java.sql.Types; 29 | import java.time.format.DateTimeFormatter; 30 | 31 | import static junit.framework.Assert.assertEquals; 32 | 33 | public class BulkCopyMetadataTest { 34 | 35 | private BulkCopyMetadata bulkCopyMetadata; 36 | 37 | @Before 38 | public void beforeEach() { 39 | bulkCopyMetadata = new BulkCopyMetadata(); 40 | } 41 | 42 | @Test 43 | public void constructorTest(){ 44 | assertEquals(0, bulkCopyMetadata.getMetadata().size()); 45 | } 46 | 47 | @Test 48 | public void addColumnMetadataTest() { 49 | String columnName = "testColumn"; 50 | int jdbcType = Types.DOUBLE; 51 | int precision = 15; 52 | int scale = 5; 53 | DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"); 54 | 55 | bulkCopyMetadata.addColumnMetadata(1, columnName, jdbcType, precision, scale); 56 | assertEquals(1, bulkCopyMetadata.getMetadata().size()); 57 | 58 | bulkCopyMetadata.addColumnMetadata(2, columnName, jdbcType, precision, scale, dateTimeFormatter); 59 | assertEquals(2, bulkCopyMetadata.getMetadata().size()); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/ColumnMetadataTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import org.junit.Test; 26 | import java.sql.Types; 27 | import java.time.format.DateTimeFormatter; 28 | import static junit.framework.Assert.assertEquals; 29 | 30 | public class ColumnMetadataTest { 31 | 32 | @Test 33 | public void constructorTest(){ 34 | String columnName = "testColumn"; 35 | int columnType = Types.TIME; 36 | int precision = 50; 37 | int scale = 0; 38 | DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"); 39 | 40 | ColumnMetadata columnMetadata = new ColumnMetadata(columnName, columnType, precision, scale, dateTimeFormatter); 41 | 42 | assertEquals(columnName, columnMetadata.getColumnName()); 43 | assertEquals(columnType, columnMetadata.getColumnType()); 44 | assertEquals(precision, columnMetadata.getPrecision()); 45 | assertEquals(scale, columnMetadata.getScale()); 46 | assertEquals(dateTimeFormatter, columnMetadata.getDateTimeFormatter()); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerBulkDataFrameFileRecordTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | 28 | import java.sql.Types; 29 | import java.time.format.DateTimeFormatter; 30 | 31 | import static junit.framework.Assert.assertEquals; 32 | 33 | public class SQLServerBulkDataFrameFileRecordTest { 34 | 35 | private SQLServerBulkDataFrameFileRecord fileRecord; 36 | 37 | @Before 38 | public void beforeEach() { 39 | BulkCopyMetadata bulkCopyMetadata = new BulkCopyMetadata(); 40 | bulkCopyMetadata.addColumnMetadata(1, "Column1", Types.NVARCHAR, 128, 0); 41 | bulkCopyMetadata.addColumnMetadata(2, "Column2", Types.DOUBLE, 20, 10); 42 | bulkCopyMetadata.addColumnMetadata(3, "Column3", Types.VARCHAR, 256, 0); 43 | bulkCopyMetadata.addColumnMetadata(4, "Column4", Types.DATE, 50, 0, DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss")); 44 | 45 | fileRecord = new SQLServerBulkDataFrameFileRecord(null, bulkCopyMetadata); 46 | } 47 | 48 | @Test 49 | public void getColumnNameTest() { 50 | assertEquals("Column1", fileRecord.getColumnName(1)); 51 | assertEquals("Column2", fileRecord.getColumnName(2)); 52 | assertEquals("Column3", fileRecord.getColumnName(3)); 53 | assertEquals("Column4", fileRecord.getColumnName(4)); 54 | } 55 | 56 | @Test 57 | public void getColumnOrdinalsTest() { 58 | assertEquals(4, fileRecord.getColumnOrdinals().size()); 59 | } 60 | 61 | @Test 62 | public void getColumnTypeTest() { 63 | assertEquals(Types.NVARCHAR, fileRecord.getColumnType(1)); 64 | assertEquals(Types.DOUBLE, fileRecord.getColumnType(2)); 65 | assertEquals(Types.VARCHAR, fileRecord.getColumnType(3)); 66 | assertEquals(Types.DATE, fileRecord.getColumnType(4)); 67 | } 68 | 69 | @Test 70 | public void getPrecisionTest() { 71 | assertEquals(128, fileRecord.getPrecision(1)); 72 | assertEquals(20, fileRecord.getPrecision(2)); 73 | assertEquals(256, fileRecord.getPrecision(3)); 74 | assertEquals(50, fileRecord.getPrecision(4)); 75 | } 76 | 77 | @Test 78 | public void getScaleTest() { 79 | assertEquals(0, fileRecord.getScale(1)); 80 | assertEquals(10, fileRecord.getScale(2)); 81 | assertEquals(0, fileRecord.getScale(3)); 82 | assertEquals(0, fileRecord.getScale(4)); 83 | } 84 | 85 | @Test 86 | public void isAutoIncrementTest() { 87 | assertEquals(false, fileRecord.isAutoIncrement(0)); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/test/java/com/microsoft/azure/sqldb/spark/bulkcopy/SQLServerExceptionReflectionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulkcopy; 24 | 25 | import com.microsoft.sqlserver.jdbc.SQLServerException; 26 | import org.junit.Test; 27 | 28 | import static junit.framework.Assert.assertEquals; 29 | import static junit.framework.Assert.fail; 30 | 31 | public class SQLServerExceptionReflectionTest { 32 | 33 | @Test(expected = SQLServerException.class) 34 | public void throwSQLServerExceptionTest1() throws SQLServerException { 35 | String text = "Testing error text"; 36 | String state = "Testing error state"; 37 | int code = 1; 38 | Exception caughtException = new Exception(); 39 | 40 | SQLServerException exception = null; 41 | try { 42 | exception = SQLServerExceptionReflection.throwSQLServerException(text, state, code, caughtException); 43 | } catch (Exception e){ 44 | fail("A SQLServerException should have been successfully constructed"); 45 | } 46 | 47 | assertEquals(text, exception.getMessage()); 48 | assertEquals(state, exception.getSQLState()); 49 | assertEquals(code, exception.getErrorCode()); 50 | assertEquals(caughtException, exception.getCause()); 51 | 52 | throw exception; 53 | } 54 | 55 | @Test(expected = SQLServerException.class) 56 | public void throwSQLServerExceptionTest2() throws SQLServerException { 57 | String text = "Testing error text"; 58 | Exception caughtException = new Exception(); 59 | 60 | SQLServerException exception = null; 61 | try { 62 | exception = SQLServerExceptionReflection.throwSQLServerException(text, caughtException); 63 | } catch (Exception e){ 64 | fail("A SQLServerException should have been successfully constructed"); 65 | } 66 | 67 | assertEquals(text, exception.getMessage()); 68 | assertEquals(caughtException, exception.getCause()); 69 | 70 | throw exception; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test/scala/com/microsoft/azure/sqldb/spark/SqlDBSpark.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark 24 | 25 | import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach, FlatSpec, Matchers} 26 | 27 | trait SqlDBSpark extends FlatSpec with Matchers with BeforeAndAfter with BeforeAndAfterEach 28 | -------------------------------------------------------------------------------- /src/test/scala/com/microsoft/azure/sqldb/spark/bulk/BulkCopyUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.bulk 24 | 25 | import com.microsoft.azure.sqldb.spark.SqlDBSpark 26 | import com.microsoft.azure.sqldb.spark.config.Config 27 | 28 | class BulkCopyUtilsSpec extends SqlDBSpark { 29 | "getBulkCopyOptions" should "add the correct options from Config to SQLServerBulkCopyOptions" in { 30 | val bulkCopyBatchSize = "2500" 31 | val bulkCopyTimeout = "120" 32 | val bulkCopyCheckConstraints = "true" 33 | val bulkCopyFireTriggers = "true" 34 | val bulkCopyKeepIdentity = "true" 35 | val bulkCopyKeepNulls = "true" 36 | val bulkCopyTableLock = "true" 37 | val bulkCopyUseInternalTransaction = "true" 38 | val bulkCopyAllowEncryptedValueModifications = "true" 39 | 40 | val config = Config(Map( 41 | "url" -> "mssql.database.windows.net", 42 | "databaseName" -> "MyDatabase", 43 | "user" -> "admin@microsoft.com", 44 | "password" -> "password", 45 | "dbTable" -> "dbo.Customers", 46 | "authentication" -> "ActiveDirectoryPassword", 47 | "trustServerCertificate" -> "true", 48 | "encrypt" -> "true", 49 | "hostNameInCertificate" -> "*.database.windows.net", 50 | "bulkCopyBatchSize" -> bulkCopyBatchSize, 51 | "bulkCopyTimeout" -> bulkCopyTimeout, 52 | "bulkCopyCheckConstraints" -> bulkCopyCheckConstraints, 53 | "bulkCopyFireTriggers" -> bulkCopyFireTriggers, 54 | "bulkCopyKeepIdentity" -> bulkCopyKeepIdentity, 55 | "bulkCopyKeepNulls" -> bulkCopyKeepNulls, 56 | "bulkCopyTableLock" -> bulkCopyTableLock, 57 | "bulkCopyUseInternalTransaction" -> bulkCopyUseInternalTransaction, 58 | "bulkCopyAllowEncryptedValueModifications" -> bulkCopyAllowEncryptedValueModifications 59 | )) 60 | 61 | val bulkCopyOptions = BulkCopyUtils.getBulkCopyOptions(config) 62 | bulkCopyOptions.getBatchSize should be (bulkCopyBatchSize.toInt) 63 | bulkCopyOptions.getBulkCopyTimeout should be (bulkCopyTimeout.toInt) 64 | bulkCopyOptions.isCheckConstraints should be (bulkCopyCheckConstraints.toBoolean) 65 | bulkCopyOptions.isFireTriggers should be (bulkCopyFireTriggers.toBoolean) 66 | bulkCopyOptions.isKeepIdentity should be (bulkCopyKeepIdentity.toBoolean) 67 | bulkCopyOptions.isKeepNulls should be (bulkCopyKeepNulls.toBoolean) 68 | bulkCopyOptions.isTableLock should be (bulkCopyTableLock.toBoolean) 69 | bulkCopyOptions.isUseInternalTransaction should be (bulkCopyUseInternalTransaction.toBoolean) 70 | bulkCopyOptions.isAllowEncryptedValueModifications should be (bulkCopyAllowEncryptedValueModifications.toBoolean) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test/scala/com/microsoft/azure/sqldb/spark/config/ConfigSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.config 24 | 25 | import com.microsoft.azure.sqldb.spark.SqlDBSpark 26 | 27 | class ConfigSpec extends SqlDBSpark { 28 | 29 | it should "throw IllegalArgumentException if required properties aren't met" in { 30 | an [IllegalArgumentException] should be thrownBy { 31 | Config(Map("url" -> "mysql.database.windows.net")) 32 | } 33 | } 34 | 35 | it should "throw IllegalArgumentException if queryCustom and dbTable are both stated" in { 36 | an [IllegalArgumentException] should be thrownBy { 37 | Config(Map( 38 | "url" -> "mysql.database.windows.net", 39 | "databaseName" -> "MyDatabase", 40 | "user" -> "MyUsername", 41 | "password" -> "**********", 42 | "dbTable" -> "MyDBTable", 43 | "QueryCustom" -> "MyQueryCustom" 44 | )) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/scala/com/microsoft/azure/sqldb/spark/connect/ConnectionUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * The MIT License (MIT) 3 | * Copyright (c) 2018 Microsoft Corporation 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | package com.microsoft.azure.sqldb.spark.connect 24 | 25 | import java.util.Properties 26 | 27 | import com.microsoft.azure.sqldb.spark.SqlDBSpark 28 | import com.microsoft.azure.sqldb.spark.config.Config 29 | 30 | class ConnectionUtilsSpec extends SqlDBSpark { 31 | 32 | "createConnectionProperties" should "return all properties in configuration in a Properties object" in { 33 | val url = "mssql.database.windows.net" 34 | val database = "MyDatabase" 35 | val user = "admin" 36 | val password = "password" 37 | val dbTable = "dbo.Customers" 38 | 39 | val config = Config(Map( 40 | "url" -> url, 41 | "databaseName" -> database, 42 | "user" -> user, 43 | "password" -> password, 44 | "dbTable" -> dbTable 45 | )) 46 | 47 | val controlProperties = new Properties 48 | controlProperties.put("url", url.toLowerCase) 49 | controlProperties.put("databasename", database.toLowerCase) 50 | controlProperties.put("user", user.toLowerCase) 51 | controlProperties.put("password", password.toLowerCase) 52 | controlProperties.put("dbtable", dbTable.toLowerCase) 53 | 54 | val testProperties = ConnectionUtils.createConnectionProperties(config) 55 | Seq(testProperties.keySet()) should contain theSameElementsAs Seq(controlProperties.keySet()) 56 | } 57 | 58 | "createJDBCUrl" should "return the server url with jdbc prefix" in { 59 | val url = "mssql.database.windows.net" 60 | ConnectionUtils.createJDBCUrl(url) should be ("jdbc:sqlserver://" + url) 61 | } 62 | 63 | "getQueryCustom" should "return original query in parenthesis" in { 64 | val query = "SELECT * FROM MYTABLE" 65 | ConnectionUtils.getQueryCustom(query) should be ("(" + query + ") QueryCustom") 66 | } 67 | 68 | "getTableOrQuery" should "return appropriate table or query from a config object" in { 69 | val dbTable = "dbo.Customers" 70 | val tableConfig = Config(Map( 71 | "url" -> "mssql.database.windows.net", 72 | "databaseName" -> "MyDatabase", 73 | "user" -> "admin", 74 | "password" -> "password", 75 | "dbTable" -> dbTable 76 | )) 77 | ConnectionUtils.getTableOrQuery(tableConfig) should be (dbTable) 78 | 79 | val queryCustom = "SELECT * FROM dbo.Customers" 80 | val queryConfig = Config(Map( 81 | "url" -> "mssql.database.windows.net", 82 | "databaseName" -> "MyDatabase", 83 | "user" -> "admin", 84 | "password" -> "password", 85 | "QueryCustom" -> queryCustom 86 | )) 87 | ConnectionUtils.getTableOrQuery(queryConfig) should be (ConnectionUtils.getQueryCustom(queryCustom)) 88 | } 89 | } 90 | --------------------------------------------------------------------------------