├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── _config.yml
├── pom.xml
├── scalastyle-config.xml
└── src
    ├── main
        ├── resources
        │   └── META-INF
        │   │   └── services
        │   │       └── org.apache.spark.sql.sources.DataSourceRegister
        └── scala
        │   └── org
        │       └── apache
        │           └── spark
        │               └── sql
        │                   └── execution
        │                       └── datasources
        │                           └── greenplum
        │                               ├── DefaultSource.scala
        │                               ├── DefaultSource15.scala
        │                               ├── GreenplumOptions.scala
        │                               ├── GreenplumRelation.scala
        │                               ├── GreenplumUtils.scala
        │                               └── PartitionCopyFailureException.scala
    └── test
        └── scala
            └── org
                └── apache
                    └── spark
                        └── sql
                            └── execution
                                └── datasources
                                    └── greenplum
                                        ├── GreenplumOptionsSuite.scala
                                        └── GreenplumUtilsSuite.scala


/.gitignore:
--------------------------------------------------------------------------------
 1 | *#*#
 2 | *.#*
 3 | *.iml
 4 | *.ipr
 5 | *.iws
 6 | *.pyc
 7 | *.pyo
 8 | *.swp
 9 | *~
10 | .DS_Store
11 | .cache
12 | .classpath
13 | .ensime
14 | .ensime_cache/
15 | .ensime_lucene
16 | .generated-mima*
17 | .idea/
18 | .idea_modules/
19 | .project
20 | .pydevproject
21 | .scala_dependencies
22 | .settings
23 | /lib/
24 | R-unit-tests.log
25 | R/unit-tests.out
26 | R/cran-check.out
27 | R/pkg/vignettes/sparkr-vignettes.html
28 | R/pkg/tests/fulltests/Rplots.pdf
29 | build/*.jar
30 | build/apache-maven*
31 | build/scala*
32 | build/zinc*
33 | cache
34 | checkpoint
35 | conf/*.cmd
36 | conf/*.conf
37 | conf/*.properties
38 | conf/*.sh
39 | conf/*.xml
40 | conf/java-opts
41 | conf/slaves
42 | dependency-reduced-pom.xml
43 | derby.log
44 | dev/create-release/*final
45 | dev/create-release/*txt
46 | dev/pr-deps/
47 | dist/
48 | docs/_site
49 | docs/api
50 | sql/docs
51 | sql/site
52 | lib_managed/
53 | lint-r-report.log
54 | log/
55 | logs/
56 | out/
57 | project/boot/
58 | project/build/target/
59 | project/plugins/lib_managed/
60 | project/plugins/project/build.properties
61 | project/plugins/src_managed/
62 | project/plugins/target/
63 | python/lib/pyspark.zip
64 | python/deps
65 | python/pyspark/python
66 | reports/
67 | scalastyle-on-compile.generated.xml
68 | scalastyle-output.xml
69 | scalastyle.txt
70 | spark-*-bin-*.tgz
71 | spark-tests.log
72 | src_managed/
73 | streaming-tests.log
74 | target/
75 | unit-tests.log
76 | work/
77 | 
78 | # For Hive
79 | TempStatsStore/
80 | metastore/
81 | metastore_db/
82 | sql/hive-thriftserver/test_warehouses
83 | warehouse/
84 | spark-warehouse/
85 | 
86 | # For R session data
87 | .RData
88 | .RHistory
89 | .Rhistory
90 | *.Rproj
91 | *.Rproj.*
92 | 
93 | .Rproj.user
94 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | scala:
 3 |   - 2.11.8
 4 | 
 5 | cache:
 6 |   directories:
 7 |     - $HOME/.m2
 8 |     - ./build
 9 | 
10 | before_deploy:
11 |   - mvn clean package -DskipTests
12 | 
13 | deploy:
14 |   - provider: pages
15 |     skip_cleanup: true
16 |     github_token: $GITHUB_TOKEN
17 |     email: yaooqinn@hotmail.com
18 |     name: Kent Yao
19 |     on:
20 |       branch: master
21 |   - provider: releases
22 |     api_key: $GITHUB_TOKEN
23 |     file_glob: true
24 |     file: target/spark-greenplum*.jar
25 |     skip_cleanup: true
26 |     on:
27 |       tags: true
28 | 
29 | jobs:
30 |   include:
31 |     - stage: spark2.4
32 |       language: scala
33 |       script: mvn clean install -Pspark-2.4 -Dmaven.javadoc.skip=true -B -V
34 |     - stage: spark2.3
35 |       language: scala
36 |       script: mvn clean install -Dmaven.javadoc.skip=true -B -V
37 | 
38 | after_success:
39 |   - bash <(curl -s https://codecov.io/bash)
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 
204 | =======================================================================
205 | Apache Spark Subcomponents:
206 | 
207 | The Apache Spark project contains subcomponents with separate copyright
208 | notices and license terms. Your use of the source code for the these
209 | subcomponents is subject to the terms and conditions of the following
210 | licenses.
211 | 
212 | 
213 | ========================================================================
214 | For heapq (pyspark/heapq3.py):
215 | ========================================================================
216 | 
217 | See license/LICENSE-heapq.txt
218 | 
219 | ========================================================================
220 | For SnapTree:
221 | ========================================================================
222 | 
223 | See license/LICENSE-SnapTree.txt
224 | 
225 | ========================================================================
226 | For jbcrypt:
227 | ========================================================================
228 | 
229 | See license/LICENSE-jbcrypt.txt
230 | 
231 | ========================================================================
232 | BSD-style licenses
233 | ========================================================================
234 | 
235 | The following components are provided under a BSD-style license. See project link for details.
236 | The text of each license is also included at licenses/LICENSE-[project].txt.
237 | 
238 |      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
239 |      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
240 |      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
241 |      (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
242 |      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
243 |      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
244 |      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
245 |      (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net)
246 |      (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer)
247 |      (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer)
248 |      (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License)
249 |         (Interpreter classes (all .scala files in repl/src/main/scala
250 |         except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
251 |         and for SerializableMapWrapper in JavaUtils.scala)
252 |      (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
253 |      (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
254 |      (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
255 |      (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
256 |      (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
257 |      (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
258 |      (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
259 |      (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
260 |      (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo)
261 |      (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog)
262 |      (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf)
263 |      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264 |      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265 |      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266 |      (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
267 |      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
268 |      (BSD licence) sbt and sbt-launch-lib.bash
269 |      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
270 |      (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE)
271 |      (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE)
272 | 
273 | ========================================================================
274 | MIT licenses
275 | ========================================================================
276 | 
277 | The following components are provided under the MIT License. See project link for details.
278 | The text of each license is also included at licenses/LICENSE-[project].txt.
279 | 
280 |      (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org)
281 |      (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org)
282 |      (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org)
283 |      (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org)
284 |      (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
285 |      (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt)
286 |      (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org)
287 |      (MIT License) jquery (https://jquery.org/license/)
288 |      (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)
289 |      (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot)
290 |      (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
291 |      (MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
292 |      (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
293 |      (MIT License) datatables (http://datatables.net/license)
294 |      (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
295 |      (MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
296 |      (MIT License) blockUI (http://jquery.malsup.com/block/)
297 |      (MIT License) RowsGroup (http://datatables.net/license/mit)
298 |      (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299 |      (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PostgreSQL & GreenPlum Data Source for Apache Spark [![License](https://img.shields.io/badge/license-Apache%202-4EB1BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html) [![](https://tokei.rs/b1/github/yaooqinn/spark-greenplum)](https://github.com/yaooqinn/spark-greenplum) [![GitHub release](https://img.shields.io/github/release/yaooqinn/spark-greenplum.svg)](https://github.com/yaooqinn/spark-greenplum/releases) [![codecov](https://codecov.io/gh/yaooqinn/spark-greenplum/branch/master/graph/badge.svg)](https://codecov.io/gh/yaooqinn/spark-greenplum) [![Build Status](https://travis-ci.com/yaooqinn/spark-greenplum.svg?branch=master)](https://travis-ci.com/yaooqinn/spark-greenplum)[![HitCount](http://hits.dwyl.io/yaooqinn/spark-greenplum.svg)](http://hits.dwyl.io/yaooqinn/spark-greenplum)
 2 | 
 3 | A library for reading data from and transferring data to Greenplum databases with Apache Spark, for Spark SQL and DataFrames.
 4 | 
 5 | This library is **100x faster** than Apache Spark's JDBC DataSource while transferring data from Spark to Greenpum databases.
 6 | 
 7 | Also, this library is fully **transactional** .
 8 | 
 9 | ## Try it now !
10 | 
11 | ### CTAS
12 | ```genericsql
13 | CREATE TABLE tbl
14 | USING greenplum
15 | options ( 
16 |   url "jdbc:postgresql://greenplum:5432/",
17 |   delimiter "\t",
18 |   dbschema "gptest",
19 |   dbtable "store_sales",
20 |   user 'gptest',
21 |   password 'test')
22 | AS
23 |  SELECT * FROM tpcds_100g.store_sales WHERE ss_sold_date_sk<=2451537 AND ss_sold_date_sk> 2451520;
24 | ```
25 | 
26 | ### View & Insert
27 | 
28 | ```genericsql
29 | CREATE TEMPORARY TABLE tbl
30 | USING greenplum
31 | options ( 
32 |   url "jdbc:postgresql://greenplum:5432/",
33 |   delimiter "\t",
34 |   dbschema "gptest",
35 |   dbtable "store_sales",
36 |   user 'gptest',
37 |   password 'test')
38 |   
39 | INSERT INTO TABLE tbl SELECT * FROM tpcds_100g.store_sales WHERE ss_sold_date_sk<=2451537 AND ss_sold_date_sk> 2451520;
40 | 
41 | ```
42 | 
43 | Please refer to [Spark SQL Guide - JDBC To Other Databases](http://spark.apache.org/docs/latest/sql-data-sources-jdbc.html) to learn more about the similar usage. 
44 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-leap-day
2 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>org.apache.spark</groupId>
  8 |     <artifactId>spark-greenplum</artifactId>
  9 |     <version>1.0-SNAPSHOT</version>
 10 |     <packaging>jar</packaging>
 11 | 
 12 |     <developers>
 13 |         <developer>
 14 |             <id>yaooqinn</id>
 15 |             <name>Kent Yao</name>
 16 |             <organization>NetEase Corp.</organization>
 17 |             <email>yaooqinn@hotmail.com</email>
 18 |         </developer>
 19 |     </developers>
 20 |     <organization>
 21 |         <name>NetEase Corp.</name>
 22 |         <url>https://bigdata.163yun.com/mammut</url>
 23 |     </organization>
 24 |     <licenses>
 25 |         <license>
 26 |             <name>The Apache Software License, Version 2.0</name>
 27 |             <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 28 |             <distribution>manual</distribution>
 29 |         </license>
 30 |     </licenses>
 31 | 
 32 |     <properties>
 33 |         <postgresql.ver>42.3.8</postgresql.ver>
 34 |         <scala.binary.ver>2.11</scala.binary.ver>
 35 |         <scala.ver>2.11.8</scala.ver>
 36 |         <scalatest.ver>3.0.3</scalatest.ver>
 37 |         <spark.ver>2.3.3</spark.ver>
 38 |     </properties>
 39 | 
 40 |     <dependencies>
 41 |         <dependency>
 42 |             <groupId>org.scala-lang</groupId>
 43 |             <artifactId>scala-library</artifactId>
 44 |             <version>${scala.ver}</version>
 45 |             <scope>provided</scope>
 46 |         </dependency>
 47 |         <dependency>
 48 |             <groupId>org.apache.spark</groupId>
 49 |             <artifactId>spark-sql_${scala.binary.ver}</artifactId>
 50 |             <version>${spark.ver}</version>
 51 |             <scope>provided</scope>
 52 |         </dependency>
 53 |         <dependency>
 54 |             <groupId>org.postgresql</groupId>
 55 |             <artifactId>postgresql</artifactId>
 56 |             <version>${postgresql.ver}</version>
 57 |             <scope>compile</scope>
 58 |         </dependency>
 59 | 
 60 |         <!-- https://mvnrepository.com/artifact/org.ostermiller/utils -->
 61 |         <dependency>
 62 |             <groupId>org.ostermiller</groupId>
 63 |             <artifactId>utils</artifactId>
 64 |             <version>1.07.00</version>
 65 |             <scope>compile</scope>
 66 |         </dependency>
 67 | 
 68 |         <!-- Unit Tests -->
 69 | 
 70 |         <dependency>
 71 |             <groupId>org.scalatest</groupId>
 72 |             <artifactId>scalatest_${scala.binary.ver}</artifactId>
 73 |             <version>${scalatest.ver}</version>
 74 |             <scope>test</scope>
 75 |         </dependency>
 76 |         <dependency>
 77 |             <groupId>org.apache.spark</groupId>
 78 |             <artifactId>spark-core_${scala.binary.ver}</artifactId>
 79 |             <version>${spark.ver}</version>
 80 |             <scope>test</scope>
 81 |             <type>test-jar</type>
 82 |         </dependency>
 83 |         <!-- https://mvnrepository.com/artifact/io.airlift/testing-postgresql-server -->
 84 |         <dependency>
 85 |             <groupId>io.airlift</groupId>
 86 |             <artifactId>testing-postgresql-server</artifactId>
 87 |             <version>9.6.3-3</version>
 88 |             <scope>test</scope>
 89 |         </dependency>
 90 |         <dependency>
 91 |             <groupId>org.mockito</groupId>
 92 |             <artifactId>mockito-core</artifactId>
 93 |             <version>1.10.19</version>
 94 |             <scope>test</scope>
 95 |         </dependency>
 96 |     </dependencies>
 97 |     <build>
 98 |         <outputDirectory>target/scala-${scala.binary.ver}/classes</outputDirectory>
 99 |         <testOutputDirectory>target/scala-${scala.binary.ver}/test-classes</testOutputDirectory>
100 |         <resources>
101 |             <resource>
102 |                 <directory>${project.basedir}/src/main/resources</directory>
103 |             </resource>
104 |             <resource>
105 |                 <!-- Include the properties file to provide the build information. -->
106 |                 <directory>${project.build.directory}/extra-resources</directory>
107 |                 <filtering>true</filtering>
108 |             </resource>
109 |         </resources>
110 | 
111 |         <plugins>
112 |             <plugin>
113 |                 <groupId>org.apache.maven.plugins</groupId>
114 |                 <artifactId>maven-compiler-plugin</artifactId>
115 |                 <version>3.5.1</version>
116 |                 <configuration>
117 |                     <source>${java.version}</source>
118 |                     <target>${java.version}</target>
119 |                     <encoding>UTF-8</encoding>
120 |                     <maxmem>1024m</maxmem>
121 |                     <fork>true</fork>
122 |                     <compilerArgs>
123 |                         <arg>-Xlint:all,-serial,-path</arg>
124 |                     </compilerArgs>
125 |                 </configuration>
126 |             </plugin>
127 | 
128 |             <plugin>
129 |                 <groupId>net.alchim31.maven</groupId>
130 |                 <artifactId>scala-maven-plugin</artifactId>
131 |                 <version>3.3.1</version>
132 |                 <executions>
133 |                     <execution>
134 |                         <id>eclipse-add-source</id>
135 |                         <goals>
136 |                             <goal>add-source</goal>
137 |                         </goals>
138 |                     </execution>
139 |                     <execution>
140 |                         <id>scala-compile-first</id>
141 |                         <goals>
142 |                             <goal>compile</goal>
143 |                         </goals>
144 |                     </execution>
145 |                     <execution>
146 |                         <id>scala-test-compile-first</id>
147 |                         <goals>
148 |                             <goal>testCompile</goal>
149 |                         </goals>
150 |                     </execution>
151 |                 </executions>
152 |                 <configuration>
153 |                     <scalaVersion>${scala.ver}</scalaVersion>
154 |                     <recompileMode>incremental</recompileMode>
155 |                     <useZincServer>true</useZincServer>
156 |                     <args>
157 |                         <arg>-unchecked</arg>
158 |                         <arg>-deprecation</arg>
159 |                         <arg>-feature</arg>
160 |                         <arg>-explaintypes</arg>
161 |                         <arg>-Yno-adapted-args</arg>
162 |                     </args>
163 |                     <jvmArgs>
164 |                         <jvmArg>-Xms1024m</jvmArg>
165 |                         <jvmArg>-Xmx1024m</jvmArg>
166 |                         <jvmArg>-XX:ReservedCodeCacheSize=512M</jvmArg>
167 |                     </jvmArgs>
168 |                     <javacArgs>
169 |                         <javacArg>-source</javacArg>
170 |                         <javacArg>${java.version}</javacArg>
171 |                         <javacArg>-target</javacArg>
172 |                         <javacArg>${java.version}</javacArg>
173 |                         <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
174 |                     </javacArgs>
175 |                 </configuration>
176 |             </plugin>
177 | 
178 |             <!-- disable surefire -->
179 |             <plugin>
180 |                 <groupId>org.apache.maven.plugins</groupId>
181 |                 <artifactId>maven-surefire-plugin</artifactId>
182 |                 <version>2.12.4</version>
183 |                 <configuration>
184 |                     <skipTests>true</skipTests>
185 |                 </configuration>
186 |             </plugin>
187 |             <!-- enable scalatest -->
188 |             <plugin>
189 |                 <groupId>org.scalatest</groupId>
190 |                 <artifactId>scalatest-maven-plugin</artifactId>
191 |                 <version>1.0</version>
192 |                 <configuration>
193 |                     <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
194 |                     <junitxml>.</junitxml>
195 |                     <filereports>TestSuite.txt</filereports>
196 |                 </configuration>
197 |                 <executions>
198 |                     <execution>
199 |                         <id>test</id>
200 |                         <goals>
201 |                             <goal>test</goal>
202 |                         </goals>
203 |                     </execution>
204 |                 </executions>
205 |             </plugin>
206 | 
207 |             <plugin>
208 |                 <groupId>org.jacoco</groupId>
209 |                 <artifactId>jacoco-maven-plugin</artifactId>
210 |                 <version>0.8.0</version>
211 |                 <executions>
212 |                     <execution>
213 |                         <id>pre-test</id>
214 |                         <goals>
215 |                             <goal>prepare-agent</goal>
216 |                         </goals>
217 |                     </execution>
218 |                     <execution>
219 |                         <id>report</id>
220 |                         <phase>test</phase>
221 |                         <goals>
222 |                             <goal>report</goal>
223 |                         </goals>
224 |                     </execution>
225 |                 </executions>
226 |             </plugin>
227 | 
228 |             <plugin>
229 |                 <groupId>org.apache.maven.plugins</groupId>
230 |                 <artifactId>maven-shade-plugin</artifactId>
231 |                 <configuration>
232 |                     <shadedArtifactAttached>false</shadedArtifactAttached>
233 |                     <artifactSet>
234 |                         <includes>
235 |                             <include>org.ostermiller:*</include>
236 |                             <include>org.postgresql:*</include>
237 |                         </includes>
238 |                     </artifactSet>
239 |                     <relocations>
240 |                         <relocation>
241 |                             <pattern>com.Ostermiller</pattern>
242 |                             <shadedPattern>spark_greenplum_project.Ostermiller</shadedPattern>
243 |                         </relocation>
244 | 
245 |                     </relocations>
246 |                 </configuration>
247 |                 <executions>
248 |                     <execution>
249 |                         <phase>package</phase>
250 |                         <goals>
251 |                             <goal>shade</goal>
252 |                         </goals>
253 |                     </execution>
254 |                 </executions>
255 |             </plugin>
256 |         </plugins>
257 |     </build>
258 | 
259 |     <profiles>
260 |         <profile>
261 |             <id>spark-2.4</id>
262 |             <properties>
263 |                 <spark.version>2.4.0</spark.version>
264 |             </properties>
265 |         </profile>
266 |     </profiles>
267 | </project>


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
  3 |   ~ contributor license agreements.  See the NOTICE file distributed with
  4 |   ~ this work for additional information regarding copyright ownership.
  5 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
  6 |   ~ (the "License"); you may not use this file except in compliance with
  7 |   ~ the License.  You may obtain a copy of the License at
  8 |   ~
  9 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 10 |   ~
 11 |   ~ Unless required by applicable law or agreed to in writing, software
 12 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
 13 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |   ~ See the License for the specific language governing permissions and
 15 |   ~ limitations under the License.
 16 |   -->
 17 | <!--
 18 | 
 19 | If you wish to turn off checking for a section of code, you can put a comment in the source
 20 | before and after the section, with the following syntax:
 21 | 
 22 |   // scalastyle:off
 23 |   ...  // stuff that breaks the styles
 24 |   // scalastyle:on
 25 | 
 26 | You can also disable only one rule, by specifying its rule id, as specified in:
 27 |   http://www.scalastyle.org/rules-0.7.0.html
 28 | 
 29 |   // scalastyle:off no.finalize
 30 |   override def finalize(): Unit = ...
 31 |   // scalastyle:on no.finalize
 32 | 
 33 | This file is divided into 3 sections:
 34 |  (1) rules that we enforce.
 35 |  (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
 36 |      (or we need to make the scalastyle rule more configurable).
 37 |  (3) rules that we don't want to enforce.
 38 | -->
 39 | 
 40 | <scalastyle>
 41 |   <name>Scalastyle standard configuration</name>
 42 | 
 43 |   <!-- ================================================================================ -->
 44 |   <!--                               rules we enforce                                   -->
 45 |   <!-- ================================================================================ -->
 46 | 
 47 |   <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
 48 | 
 49 |   <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
 50 |     <parameters>
 51 |        <parameter name="header"><![CDATA[/*
 52 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 53 |  * contributor license agreements.  See the NOTICE file distributed with
 54 |  * this work for additional information regarding copyright ownership.
 55 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 56 |  * (the "License"); you may not use this file except in compliance with
 57 |  * the License.  You may obtain a copy of the License at
 58 |  *
 59 |  *    http://www.apache.org/licenses/LICENSE-2.0
 60 |  *
 61 |  * Unless required by applicable law or agreed to in writing, software
 62 |  * distributed under the License is distributed on an "AS IS" BASIS,
 63 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 64 |  * See the License for the specific language governing permissions and
 65 |  * limitations under the License.
 66 |  */]]></parameter>
 67 |     </parameters>
 68 |   </check>
 69 | 
 70 |   <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 71 | 
 72 |   <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 73 | 
 74 |   <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 75 | 
 76 |   <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 77 |     <parameters>
 78 |       <parameter name="maxLineLength"><![CDATA[100]]></parameter>
 79 |       <parameter name="tabSize"><![CDATA[2]]></parameter>
 80 |       <parameter name="ignoreImports">true</parameter>
 81 |     </parameters>
 82 |   </check>
 83 | 
 84 |   <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 85 |     <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
 86 |   </check>
 87 | 
 88 |   <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 89 |     <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
 90 |   </check>
 91 | 
 92 |   <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 93 |     <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
 94 |   </check>
 95 | 
 96 |   <check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 97 |     <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
 98 |   </check>
 99 | 
100 |   <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
101 | 
102 |   <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
103 | 
104 |   <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
105 | 
106 |   <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
107 | 
108 |   <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
109 |     <parameters>
110 |       <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
111 |       <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
112 |     </parameters>
113 |   </check>
114 | 
115 |   <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
116 | 
117 |   <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
118 | 
119 |   <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
120 | 
121 |   <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
122 | 
123 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
124 |    <parameters>
125 |      <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
126 |    </parameters>
127 |   </check>
128 | 
129 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
130 |     <parameters>
131 |      <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
132 |     </parameters>
133 |   </check>
134 | 
135 |   <!-- ??? usually shouldn't be checked into the code base. -->
136 |   <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
137 | 
138 |   <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
139 |   <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
140 |     <parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
141 |     <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
142 |   </check>
143 | 
144 |   <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
145 |   <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
146 |     <parameters><parameter name="regex">^println$</parameter></parameters>
147 |     <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
148 |       // scalastyle:off println
149 |       println(...)
150 |       // scalastyle:on println]]></customMessage>
151 |   </check>
152 | 
153 |   <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
154 |     <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
155 |     <customMessage><![CDATA[
156 |       @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
157 |     ]]></customMessage>
158 |   </check>
159 | 
160 |   <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
161 |     <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
162 |     <customMessage><![CDATA[
163 |       Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
164 |       ShutdownHookManager.addShutdownHook instead.
165 |       If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
166 |       // scalastyle:off runtimeaddshutdownhook
167 |       Runtime.getRuntime.addShutdownHook(...)
168 |       // scalastyle:on runtimeaddshutdownhook
169 |     ]]></customMessage>
170 |   </check>
171 | 
172 |   <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
173 |     <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
174 |     <customMessage><![CDATA[
175 |       Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
176 |       java.util.concurrent.ConcurrentLinkedQueue instead.
177 |       If you must use mutable.SynchronizedBuffer, wrap the code block with
178 |       // scalastyle:off mutablesynchronizedbuffer
179 |       mutable.SynchronizedBuffer[...]
180 |       // scalastyle:on mutablesynchronizedbuffer
181 |     ]]></customMessage>
182 |   </check>
183 | 
184 |   <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
185 |     <parameters><parameter name="regex">Class\.forName</parameter></parameters>
186 |     <customMessage><![CDATA[
187 |       Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
188 |       If you must use Class.forName, wrap the code block with
189 |       // scalastyle:off classforname
190 |       Class.forName(...)
191 |       // scalastyle:on classforname
192 |     ]]></customMessage>
193 |   </check>
194 | 
195 |   <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
196 |     <parameters><parameter name="regex">Await\.result</parameter></parameters>
197 |     <customMessage><![CDATA[
198 |       Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
199 |       If you must use Await.result, wrap the code block with
200 |       // scalastyle:off awaitresult
201 |       Await.result(...)
202 |       // scalastyle:on awaitresult
203 |     ]]></customMessage>
204 |   </check>
205 | 
206 |   <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
207 |     <parameters><parameter name="regex">Await\.ready</parameter></parameters>
208 |     <customMessage><![CDATA[
209 |       Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
210 |       If you must use Await.ready, wrap the code block with
211 |       // scalastyle:off awaitready
212 |       Await.ready(...)
213 |       // scalastyle:on awaitready
214 |     ]]></customMessage>
215 |   </check>
216 | 
217 |   <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
218 |   <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
219 |     <parameters><parameter name="regex">JavaConversions</parameter></parameters>
220 |     <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
221 |     scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
222 |   </check>
223 | 
224 |   <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
225 |     <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
226 |     <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
227 |     of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
228 |   </check>
229 | 
230 |   <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
231 |     <parameters><parameter name="regex">extractOpt</parameter></parameters>
232 |     <customMessage>Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
233 |     is slower.  </customMessage>
234 |   </check>
235 | 
236 |   <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
237 |     <parameters>
238 |       <parameter name="groups">java,scala,3rdParty,spark</parameter>
239 |       <parameter name="group.java">javax?\..*</parameter>
240 |       <parameter name="group.scala">scala\..*</parameter>
241 |       <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
242 |       <parameter name="group.spark">org\.apache\.spark\..*</parameter>
243 |     </parameters>
244 |   </check>
245 | 
246 |   <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
247 |     <parameters>
248 |       <parameter name="tokens">COMMA</parameter>
249 |     </parameters>
250 |   </check>
251 | 
252 |   <!-- SPARK-3854: Single Space between ')' and '{' -->
253 |   <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
254 |     <parameters><parameter name="regex">\)\{</parameter></parameters>
255 |     <customMessage><![CDATA[
256 |       Single Space between ')' and `{`.
257 |     ]]></customMessage>
258 |   </check>
259 | 
260 |   <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
261 |     <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
262 |     <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
263 |   </check>
264 | 
265 |   <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
266 |     <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
267 |     <customMessage>Omit braces in case clauses.</customMessage>
268 |   </check>
269 | 
270 |   <!-- SPARK-16877: Avoid Java annotations -->
271 |   <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
272 | 
273 |   <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
274 | 
275 |   <!-- ================================================================================ -->
276 |   <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
277 |   <!-- ================================================================================ -->
278 | 
279 |   <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
280 |   <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
281 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
282 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
283 | 
284 |   <!-- This breaks symbolic method names so we don't turn it on. -->
285 |   <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
286 |   <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
287 |     <parameters>
288 |     <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
289 |     </parameters>
290 |   </check>
291 | 
292 |   <!-- Should turn this on, but we have a few places that need to be fixed first -->
293 |   <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
294 | 
295 |   <!-- ================================================================================ -->
296 |   <!--                               rules we don't want                                -->
297 |   <!-- ================================================================================ -->
298 | 
299 |   <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
300 |     <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
301 |   </check>
302 | 
303 |   <!-- We want the opposite of this: NewLineAtEofChecker -->
304 |   <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
305 | 
306 |   <!-- This one complains about all kinds of random things. Disable. -->
307 |   <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
308 | 
309 |   <!-- We use return quite a bit for control flows and guards -->
310 |   <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
311 | 
312 |   <!-- We use null a lot in low level code and to interface with 3rd party code -->
313 |   <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
314 | 
315 |   <!-- Doesn't seem super big deal here ... -->
316 |   <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
317 | 
318 |   <!-- Doesn't seem super big deal here ... -->
319 |   <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
320 |     <parameters><parameter name="maxFileLength">800></parameter></parameters>
321 |   </check>
322 | 
323 |   <!-- Doesn't seem super big deal here ... -->
324 |   <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
325 |     <parameters><parameter name="maxTypes">30</parameter></parameters>
326 |   </check>
327 | 
328 |   <!-- Doesn't seem super big deal here ... -->
329 |   <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
330 |     <parameters><parameter name="maximum">10</parameter></parameters>
331 |   </check>
332 | 
333 |   <!-- Doesn't seem super big deal here ... -->
334 |   <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
335 |     <parameters><parameter name="maxLength">50</parameter></parameters>
336 |   </check>
337 | 
338 |   <!-- Not exactly feasible to enforce this right now. -->
339 |   <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
340 |   <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
341 |     <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
342 |   </check>
343 | 
344 |   <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
345 |   <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
346 |     <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
347 |   </check>
348 | 
349 | </scalastyle>
350 | 


--------------------------------------------------------------------------------
/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
--------------------------------------------------------------------------------
1 | org.apache.spark.sql.execution.datasources.greenplum.DefaultSource15


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/DefaultSource.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.execution.datasources.greenplum
 19 | 
 20 | import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 21 | import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 22 | import org.apache.spark.sql.execution.datasources.jdbc._
 23 | import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister, RelationProvider}
 24 | import org.apache.spark.sql.types.StructType
 25 | 
 26 | class DefaultSource
 27 |   extends RelationProvider with CreatableRelationProvider with DataSourceRegister {
 28 | 
 29 |   import GreenplumUtils._
 30 | 
 31 |   override def shortName(): String = "greenplum"
 32 | 
 33 |   override def createRelation(
 34 |       sqlContext: SQLContext,
 35 |       parameters: Map[String, String]): BaseRelation = {
 36 |     import JDBCOptions._
 37 | 
 38 |     val options = GreenplumOptions(CaseInsensitiveMap(parameters))
 39 |     val partitionColumn = options.partitionColumn
 40 |     val lowerBound = options.lowerBound
 41 |     val upperBound = options.upperBound
 42 |     val numPartitions = options.numPartitions
 43 | 
 44 |     val partitionInfo = if (partitionColumn.isEmpty) {
 45 |       assert(lowerBound.isEmpty && upperBound.isEmpty, "When 'partitionColumn' is not specified, " +
 46 |         s"'$JDBC_LOWER_BOUND' and '$JDBC_UPPER_BOUND' are expected to be empty")
 47 |       null
 48 |     } else {
 49 |       assert(lowerBound.nonEmpty && upperBound.nonEmpty && numPartitions.nonEmpty,
 50 |         s"When 'partitionColumn' is specified, '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND', and " +
 51 |           s"'$JDBC_NUM_PARTITIONS' are also required")
 52 |       JDBCPartitioningInfo(
 53 |         partitionColumn.get, lowerBound.get, upperBound.get, numPartitions.get)
 54 |     }
 55 |     val parts = JDBCRelation.columnPartition(partitionInfo)
 56 |     GreenplumRelation(parts, options)(sqlContext.sparkSession)
 57 |   }
 58 | 
 59 |   override def createRelation(
 60 |       sqlContext: SQLContext,
 61 |       mode: SaveMode,
 62 |       parameters: Map[String, String],
 63 |       df: DataFrame): BaseRelation = {
 64 |     val options = GreenplumOptions(CaseInsensitiveMap(parameters))
 65 |     val isCaseSensitive = sqlContext.conf.caseSensitiveAnalysis
 66 | 
 67 |     val m = options.maxConnections
 68 |     val conn = JdbcUtils.createConnectionFactory(options)()
 69 |     try {
 70 |       if (tableExists(conn, options.table)) {
 71 |         val tableSchema = JdbcUtils.getSchemaOption(conn, options)
 72 |         checkSchema(tableSchema, df.schema, isCaseSensitive)
 73 |         val orderedDf = reorderDataFrameColumns(df, tableSchema)
 74 |         // In fact, the mode here is Overwrite constantly, we add other modes just for compatible.
 75 |         mode match {
 76 |           case SaveMode.Overwrite
 77 |             if options.isTruncate &&
 78 |               JdbcUtils.isCascadingTruncateTable(options.url).contains(false) =>
 79 |             JdbcUtils.truncateTable(conn, options)
 80 |             nonTransactionalCopy(
 81 |               if (options.transactionOn) orderedDf.coalesce(1) else orderedDf.coalesce(m),
 82 |               orderedDf.schema, options)
 83 |           case SaveMode.Overwrite =>
 84 |             transactionalCopy(orderedDf.coalesce(m), orderedDf.schema, options)
 85 |           case SaveMode.Append =>
 86 |             nonTransactionalCopy(
 87 |               if (options.transactionOn) {
 88 |                 orderedDf.coalesce(1)
 89 |               } else {
 90 |                 orderedDf.coalesce(m)
 91 |               },
 92 |               orderedDf.schema, options)
 93 |           case SaveMode.ErrorIfExists =>
 94 |             throw new AnalysisException(s"Table or view '${options.table}' already exists. $mode")
 95 |           case SaveMode.Ignore => // do nothing
 96 |         }
 97 |       } else {
 98 |         transactionalCopy(df.coalesce(m), df.schema, options)
 99 |       }
100 |     } finally {
101 |       closeConnSilent(conn)
102 |     }
103 |     createRelation(sqlContext, parameters)
104 |   }
105 | 
106 |   private def checkSchema(
107 |       tableSchema: Option[StructType],
108 |       dfSchema: StructType,
109 |       isCaseSensitive: Boolean): Unit = {
110 |     if (!tableSchema.isEmpty) {
111 |       val columnNameEquality = if (isCaseSensitive) {
112 |         org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
113 |       } else {
114 |         org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution
115 |       }
116 |       val tableColumnNames = tableSchema.get.fieldNames
117 |       dfSchema.fields.map { col =>
118 |         tableColumnNames.find(f => columnNameEquality(f, col.name)).getOrElse(
119 |           throw new AnalysisException(s"Column ${col.name} not found int schema $tableSchema.")
120 |         )
121 |       }
122 |     }
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/DefaultSource15.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.execution.datasources.greenplum
19 | 
20 | import org.apache.spark.sql.sources.DataSourceRegister
21 | 
22 | class DefaultSource15 extends DefaultSource with DataSourceRegister {
23 | 
24 |   override def shortName(): String = "greenplum"
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/GreenplumOptions.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.execution.datasources.greenplum
19 | 
20 | import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
21 | import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
22 | import org.apache.spark.util.Utils
23 | 
24 | /**
25 |  * Options for the Greenplum data source.
26 |  */
27 | case class GreenplumOptions(@transient params: CaseInsensitiveMap[String])
28 |   extends JDBCOptions(params.updated("driver", "org.postgresql.Driver")) {
29 | 
30 |   val delimiter: String = params.getOrElse("delimiter", ",")
31 |   assert(delimiter.length == 1, "The delimiter should be a single character.")
32 | 
33 |   /**
34 |    * This option is only used for these cases:
35 |    * 1. overwrite a gptable, which is a CascadingTruncateTable.
36 |    * 2. append data to a gptable.
37 |    */
38 |   val transactionOn: Boolean = params.getOrElse("transactionOn", "false").toBoolean
39 | 
40 |   /** Max number of times we are allowed to retry dropTempTable operation. */
41 |   val dropTempTableMaxRetries: Int = 3
42 | 
43 |   /** Timeout for copying a partition's data to greenplum. */
44 |   val copyTimeout = Utils.timeStringAsMs(params.getOrElse("copyTimeout", "1h"))
45 |   assert(copyTimeout > 0, "The copy timeout should be positive, 10s, 10min, 1h etc.")
46 | 
47 |   /** Max task numbers write Greenplum concurrently */
48 |   val maxConnections = params.getOrElse("maxConnections", "100").toInt
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/GreenplumRelation.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.execution.datasources.greenplum
19 | 
20 | import org.apache.spark.Partition
21 | import org.apache.spark.rdd.RDD
22 | import org.apache.spark.sql._
23 | import org.apache.spark.sql.execution.datasources.jdbc.{JDBCRDD, JdbcUtils}
24 | import org.apache.spark.sql.jdbc.JdbcDialects
25 | import org.apache.spark.sql.sources.{BaseRelation, Filter, InsertableRelation, PrunedFilteredScan}
26 | import org.apache.spark.sql.types._
27 | 
28 | private[sql] case class GreenplumRelation(
29 |     parts: Array[Partition], options: GreenplumOptions)(@transient val sparkSession: SparkSession)
30 |   extends BaseRelation
31 |   with PrunedFilteredScan
32 |   with InsertableRelation {
33 | 
34 |   import GreenplumUtils._
35 | 
36 |   override def sqlContext: SQLContext = sparkSession.sqlContext
37 |   override val needConversion: Boolean = false
38 |   override val schema: StructType = {
39 |     val tableSchema = JDBCRDD.resolveTable(options)
40 |     options.customSchema match {
41 |       case Some(customSchema) => JdbcUtils.getCustomSchema(
42 |         tableSchema, customSchema, sparkSession.sessionState.conf.resolver)
43 |       case None => tableSchema
44 |     }
45 |   }
46 | 
47 |   // Check if JDBCRDD.compileFilter can accept input filters
48 |   override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
49 |     filters.filter(JDBCRDD.compileFilter(_, JdbcDialects.get(options.url)).isEmpty)
50 |   }
51 | 
52 |   override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
53 |     // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row]
54 |     JDBCRDD.scanTable(
55 |       sparkSession.sparkContext,
56 |       schema,
57 |       requiredColumns,
58 |       filters,
59 |       parts,
60 |       options).asInstanceOf[RDD[Row]]
61 |   }
62 | 
63 |   override def insert(data: DataFrame, overwrite: Boolean): Unit = {
64 |     val conn = JdbcUtils.createConnectionFactory(options)()
65 |     val maxConns = options.maxConnections
66 |     try {
67 |       if (overwrite) {
68 |         if (options.isTruncate &&
69 |           JdbcUtils.isCascadingTruncateTable(options.url).contains(false)) {
70 |           JdbcUtils.truncateTable(conn, options)
71 |           nonTransactionalCopy(
72 |             if (options.transactionOn) data.coalesce(1) else data.coalesce(maxConns),
73 |             schema, options)
74 |         } else {
75 |           transactionalCopy(data.coalesce(maxConns), schema, options)
76 |         }
77 |       } else {
78 |         nonTransactionalCopy(
79 |           if (options.transactionOn) {
80 |             data.coalesce(1)
81 |           } else {
82 |             data.coalesce(maxConns)
83 |           },
84 |           schema, options)
85 |       }
86 |     } finally {
87 |       closeConnSilent(conn)
88 |     }
89 |   }
90 | 
91 |   override def toString: String = {
92 |     val partitioningInfo = if (parts.nonEmpty) s" [numPartitions=${parts.length}]" else ""
93 |     // credentials should not be included in the plan output, table information is sufficient.
94 |     s"GreenplumRelation(${options.table})" + partitioningInfo
95 |   }
96 | }
97 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/GreenplumUtils.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.execution.datasources.greenplum
 19 | 
 20 | import java.io._
 21 | import java.nio.charset.StandardCharsets
 22 | import java.sql.{Connection, Date, Timestamp}
 23 | import java.util.UUID
 24 | import java.util.concurrent.{TimeoutException, TimeUnit}
 25 | 
 26 | import scala.concurrent.Promise
 27 | import scala.concurrent.duration.Duration
 28 | import scala.util.Try
 29 | 
 30 | import org.postgresql.copy.CopyManager
 31 | import org.postgresql.core.BaseConnection
 32 | 
 33 | import org.apache.spark.SparkEnv
 34 | import org.apache.spark.internal.Logging
 35 | import org.apache.spark.sql.{DataFrame, Row}
 36 | import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
 37 | import org.apache.spark.sql.types._
 38 | import org.apache.spark.util.{LongAccumulator, ThreadUtils, Utils}
 39 | 
 40 | object GreenplumUtils extends Logging {
 41 | 
 42 |   def makeConverter(dataType: DataType): (Row, Int) => String = dataType match {
 43 |     case StringType => (r: Row, i: Int) => r.getString(i)
 44 |     case BooleanType => (r: Row, i: Int) => r.getBoolean(i).toString
 45 |     case ByteType => (r: Row, i: Int) => r.getByte(i).toString
 46 |     case ShortType => (r: Row, i: Int) => r.getShort(i).toString
 47 |     case IntegerType => (r: Row, i: Int) => r.getInt(i).toString
 48 |     case LongType => (r: Row, i: Int) => r.getLong(i).toString
 49 |     case FloatType => (r: Row, i: Int) => r.getFloat(i).toString
 50 |     case DoubleType => (r: Row, i: Int) => r.getDouble(i).toString
 51 |     case DecimalType() => (r: Row, i: Int) => r.getDecimal(i).toString
 52 | 
 53 |     case DateType =>
 54 |       (r: Row, i: Int) => r.getAs[Date](i).toString
 55 | 
 56 |     case TimestampType => (r: Row, i: Int) => r.getAs[Timestamp](i).toString
 57 | 
 58 |     case BinaryType => (r: Row, i: Int) =>
 59 |       new String(r.getAs[Array[Byte]](i), StandardCharsets.UTF_8)
 60 | 
 61 |     case udt: UserDefinedType[_] => makeConverter(udt.sqlType)
 62 |     case _ => (row: Row, ordinal: Int) => row.get(ordinal).toString
 63 |   }
 64 | 
 65 |   def convertRow(
 66 |       row: Row,
 67 |       length: Int,
 68 |       delimiter: String,
 69 |       valueConverters: Array[(Row, Int) => String]): Array[Byte] = {
 70 |     var i = 0
 71 |     val values = new Array[String](length)
 72 |     while (i < length) {
 73 |       if (!row.isNullAt(i)) {
 74 |         values(i) = convertValue(valueConverters(i).apply(row, i), delimiter.charAt(0))
 75 |       } else {
 76 |         values(i) = "NULL"
 77 |       }
 78 |       i += 1
 79 |     }
 80 |     (values.mkString(delimiter) + "\n").getBytes("UTF-8")
 81 |   }
 82 | 
 83 |   def convertValue(str: String, delimiter: Char): String = {
 84 |     str.flatMap {
 85 |       case '\\' => "\\\\"
 86 |       case '\n' => "\\n"
 87 |       case '\r' => "\\r"
 88 |       case `delimiter` => s"\\$delimiter"
 89 |       case c if c == 0 => "" // If this char is an empty character, drop it.
 90 |       case c => s"$c"
 91 |     }
 92 |   }
 93 | 
 94 |   /**
 95 |    * https://www.postgresql.org/docs/9.2/sql-copy.html
 96 |    *
 97 |    * Copy data to greenplum in a single transaction.
 98 |    *
 99 |    * @param df the [[DataFrame]] will be copy to the Greenplum
100 |    * @param schema the table schema in Greemnplum
101 |    * @param options Options for the Greenplum data source
102 |    */
103 |   def transactionalCopy(
104 |       df: DataFrame,
105 |       schema: StructType,
106 |       options: GreenplumOptions): Unit = {
107 |     val randomString = UUID.randomUUID().toString.filterNot(_ == '-')
108 |     val canonicalTblName = TableNameExtractor.extract(options.table)
109 |     val schemaPrefix = canonicalTblName.schema.map(_ + ".").getOrElse("")
110 |     val rawTblName = canonicalTblName.rawName
111 |     val suffix = "sparkGpTmp"
112 |     val quote = "\""
113 | 
114 |     val tempTable = s"$schemaPrefix$quote${rawTblName}_${randomString}_$suffix$quote"
115 |     val strSchema = JdbcUtils.schemaString(df, options.url, options.createTableColumnTypes)
116 |     val createTempTbl = s"CREATE TABLE $tempTable ($strSchema) ${options.createTableOptions}"
117 | 
118 |     // Stage 1. create a _sparkGpTmp table as a shadow of the target Greenplum table. If this stage
119 |     // fails, the whole process will abort.
120 |     val conn = JdbcUtils.createConnectionFactory(options)()
121 |     try {
122 |       executeStatement(conn, createTempTbl)
123 |     } finally {
124 |       closeConnSilent(conn)
125 |     }
126 | 
127 |     // Stage 2. Spark executors run copy task to Greenplum, and will increase the accumulator if
128 |     // each task successfully copied.
129 |     val accumulator = df.sparkSession.sparkContext.longAccumulator("copySuccess")
130 |     df.foreachPartition { rows =>
131 |       copyPartition(rows, options, schema, tempTable, Some(accumulator))
132 |     }
133 | 
134 |     // Stage 3. if the accumulator value is not equal to the [[Dataframe]] instance's partition
135 |     // number. The Spark job will fail with a [[PartitionCopyFailureException]].
136 |     // Otherwise, we will run a rename table ddl statement to rename the tmp table to the final
137 |     // target table.
138 |     val partNum = df.rdd.getNumPartitions
139 |     val conn2 = JdbcUtils.createConnectionFactory(options)()
140 |     try {
141 |       if (accumulator.value == partNum) {
142 |         if (tableExists(conn2, options.table)) {
143 |           JdbcUtils.dropTable(conn2, options.table)
144 |         }
145 | 
146 |         val newTableName = s"${options.table}".split("\\.").last
147 |         val renameTempTbl = s"ALTER TABLE $tempTable RENAME TO $newTableName"
148 |         executeStatement(conn2, renameTempTbl)
149 |       } else {
150 |         throw new PartitionCopyFailureException(
151 |           s"""
152 |              | Job aborted for that there are some partitions failed to copy data to greenPlum:
153 |              | Total partitions is: $partNum and successful partitions is: ${accumulator.value}.
154 |              | You can retry again.
155 |             """.stripMargin)
156 |       }
157 |     } finally {
158 |       if (tableExists(conn2, tempTable)) {
159 |         retryingDropTableSilent(conn2, tempTable)
160 |       }
161 |       closeConnSilent(conn2)
162 |     }
163 |   }
164 | 
165 |   /**
166 |    * Drop the table and retry automatically when exception occurred.
167 |    */
168 |   def retryingDropTableSilent(conn: Connection, table: String): Unit = {
169 |     val dropTmpTableMaxRetry = 3
170 |     var dropTempTableRetryCount = 0
171 |     var dropSuccess = false
172 | 
173 |     while (!dropSuccess && dropTempTableRetryCount < dropTmpTableMaxRetry) {
174 |       try {
175 |         JdbcUtils.dropTable(conn, table)
176 |         dropSuccess = true
177 |       } catch {
178 |         case e: Exception =>
179 |           dropTempTableRetryCount += 1
180 |           logWarning(s"Drop tempTable $table failed for $dropTempTableRetryCount" +
181 |             s"/${dropTmpTableMaxRetry} times, and will retry.", e)
182 |       }
183 |     }
184 |     if (!dropSuccess) {
185 |       logError(s"Drop tempTable $table failed for $dropTmpTableMaxRetry times," +
186 |         s" and will not retry.")
187 |     }
188 |   }
189 | 
190 |   /**
191 |    * https://www.postgresql.org/docs/9.2/sql-copy.html
192 |    *
193 |    * Copy data to greenplum in these cases, which need update origin gptable.
194 |    * 1. Overwrite an existed gptable, which is a CascadingTruncateTable.
195 |    * 2. Append data to a gptable.
196 |    *
197 |    * When transcationOn option is true, we will coalesce the dataFrame to one partition,
198 |    * and the copy operation for each partition is atomic.
199 |    *
200 |    * @param df the [[DataFrame]] will be copy to the Greenplum
201 |    * @param schema the table schema in Greemnplum
202 |    * @param options Options for the Greenplum data source
203 |    */
204 |   def nonTransactionalCopy(
205 |       df: DataFrame,
206 |       schema: StructType,
207 |       options: GreenplumOptions): Unit = {
208 |     df.foreachPartition { rows =>
209 |       copyPartition(rows, options, schema, options.table)
210 |     }
211 |   }
212 | 
213 |   /**
214 |    * Copy a partition's data to a gptable.
215 |    *
216 |    * @param rows rows of a partition will be copy to the Greenplum
217 |    * @param options Options for the Greenplum data source
218 |    * @param schema the table schema in Greemnplum
219 |    * @param tableName the tableName, to which the data will be copy
220 |    * @param accumulator account for recording the successful partition num
221 |    */
222 |   def copyPartition(
223 |       rows: Iterator[Row],
224 |       options: GreenplumOptions,
225 |       schema: StructType,
226 |       tableName: String,
227 |       accumulator: Option[LongAccumulator] = None): Unit = {
228 |     val valueConverters: Array[(Row, Int) => String] =
229 |       schema.map(s => makeConverter(s.dataType)).toArray
230 |     val tmpDir = Utils.createTempDir(Utils.getLocalDir(SparkEnv.get.conf), "greenplum")
231 |     val dataFile = new File(tmpDir, UUID.randomUUID().toString)
232 |     logInfo(s"Start to write data to local tmp file: ${dataFile.getCanonicalPath}")
233 |     val out = new BufferedOutputStream(new FileOutputStream(dataFile))
234 |     val startW = System.nanoTime()
235 |     try {
236 |       rows.foreach(r => out.write(
237 |         convertRow(r, schema.length, options.delimiter, valueConverters)))
238 |     } finally {
239 |       out.close()
240 |     }
241 |     val endW = System.nanoTime()
242 |     logInfo(s"Finished writing data to local tmp file: ${dataFile.getCanonicalPath}, " +
243 |       s"time taken: ${(endW - startW) / math.pow(10, 9)}s")
244 |     val in = new BufferedInputStream(new FileInputStream(dataFile))
245 |     val sql = s"COPY $tableName" +
246 |       s" FROM STDIN WITH NULL AS 'NULL' DELIMITER AS E'${options.delimiter}'"
247 | 
248 |     val promisedCopyNums = Promise[Long]
249 |     val conn = JdbcUtils.createConnectionFactory(options)()
250 |     val copyManager = new CopyManager(conn.asInstanceOf[BaseConnection])
251 |     val copyThread = new Thread("copy-to-gp-thread") {
252 |       override def run(): Unit = promisedCopyNums.complete(Try(copyManager.copyIn(sql, in)))
253 |     }
254 | 
255 |     try {
256 |       logInfo(s"Start copy steam to Greenplum with copy command $sql")
257 |       val start = System.nanoTime()
258 |       copyThread.start()
259 |       try {
260 |         val nums = ThreadUtils.awaitResult(promisedCopyNums.future,
261 |           Duration(options.copyTimeout, TimeUnit.MILLISECONDS))
262 |         val end = System.nanoTime()
263 |         logInfo(s"Copied $nums row(s) to Greenplum," +
264 |           s" time taken: ${(end - start) / math.pow(10, 9)}s")
265 |       } catch {
266 |         case _: TimeoutException =>
267 |           throw new TimeoutException(
268 |             s"""
269 |                | The copy operation for copying this partition's data to greenplum has been running for
270 |                | more than the timeout: ${TimeUnit.MILLISECONDS.toSeconds(options.copyTimeout)}s.
271 |                | You can configure this timeout with option copyTimeout, such as "2h", "100min",
272 |                | and default copyTimeout is "1h".
273 |                """.stripMargin)
274 |       }
275 |       accumulator.foreach(_.add(1L))
276 |     } finally {
277 |       copyThread.interrupt()
278 |       copyThread.join()
279 |       in.close()
280 |       closeConnSilent(conn)
281 |     }
282 |   }
283 | 
284 |   def closeConnSilent(conn: Connection): Unit = {
285 |     try {
286 |       conn.close()
287 |     } catch {
288 |       case e: Exception => logWarning("Exception occured when closing connection.", e)
289 |     }
290 |   }
291 | 
292 |   def executeStatement(conn: Connection, sql: String): Unit = {
293 |     val statement = conn.createStatement()
294 |     try {
295 |       statement.executeUpdate(sql)
296 |     } finally {
297 |       statement.close()
298 |     }
299 |   }
300 | 
301 |   def reorderDataFrameColumns(df: DataFrame, tableSchema: Option[StructType]): DataFrame = {
302 |     tableSchema.map { schema =>
303 |       df.selectExpr(schema.map(filed => filed.name): _*)
304 |     }.getOrElse(df)
305 |   }
306 | 
307 |   /**
308 |    * Returns true if the table already exists in the JDBC database.
309 |    */
310 |   def tableExists(conn: Connection, table: String): Boolean = {
311 |     val query = s"SELECT * FROM $table WHERE 1=0"
312 |     Try {
313 |       val statement = conn.prepareStatement(query)
314 |       try {
315 |         statement.executeQuery()
316 |       } finally {
317 |         statement.close()
318 |       }
319 |     }.isSuccess
320 |   }
321 | }
322 | 
323 | private[greenplum] case class CanonicalTblName(schema: Option[String], rawName: String)
324 | 
325 | /**
326 |  * Extract schema name and raw table name from a table name string.
327 |  */
328 | private[greenplum] object TableNameExtractor {
329 |   private val nonSchemaTable = """[\"]*([0-9a-zA-Z_]+)[\"]*""".r
330 |   private val schemaTable = """([\"]*[0-9a-zA-Z_]+[\"]*)\.[\"]*([0-9a-zA-Z_]+)[\"]*""".r
331 | 
332 |   def extract(tableName: String): CanonicalTblName = {
333 |     tableName match {
334 |       case nonSchemaTable(t) => CanonicalTblName(None, t)
335 |       case schemaTable(schema, t) => CanonicalTblName(Some(schema), t)
336 |       case _ => throw new IllegalArgumentException(
337 |         s"""
338 |            | The table name is illegal, you can set it with the dbtable option, such as
339 |            | "schemaname"."tableName" or just "tableName" with a default schema "public".
340 |          """.stripMargin
341 |       )
342 |     }
343 |   }
344 | }
345 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/greenplum/PartitionCopyFailureException.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.execution.datasources.greenplum
19 | 
20 | /**
21 |  * General exception caused by some partition failed for copying dataFrame to greenplum.
22 |  */
23 | class PartitionCopyFailureException(
24 |     errorMsg: String,
25 |     cause: Throwable = null)
26 |   extends Exception(errorMsg, cause)
27 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/execution/datasources/greenplum/GreenplumOptionsSuite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.execution.datasources.greenplum
19 | 
20 | import java.util.{Date, TimeZone}
21 | 
22 | import org.apache.commons.lang3.time.FastDateFormat
23 | import org.scalatest.Matchers
24 | 
25 | import org.apache.spark.SparkFunSuite
26 | import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
27 | 
28 | class GreenplumOptionsSuite extends SparkFunSuite with Matchers {
29 |   private val date = new Date(0)
30 | 
31 |   test("empty user specified options") {
32 |     val e = intercept[IllegalArgumentException](GreenplumOptions(CaseInsensitiveMap(Map())))
33 |     e.getMessage should include("Option 'url' is required")
34 |   }
35 | 
36 |   test("map with only url") {
37 |     val e = intercept[IllegalArgumentException](
38 |       GreenplumOptions(CaseInsensitiveMap(Map("url" -> ""))))
39 |     e.getMessage should include("Option 'dbtable' is required")
40 |   }
41 | 
42 |   test("driver class should always using postgresql") {
43 |     val options = GreenplumOptions(CaseInsensitiveMap(Map("url" -> "", "dbtable" -> "src")))
44 |     options.driverClass should be("org.postgresql.Driver")
45 |     val options2 = GreenplumOptions(CaseInsensitiveMap(
46 |       Map("url" -> "",
47 |         "dbtable" -> "src",
48 |         "driver" -> "org.mysql.Driver")))
49 |     options2.driverClass should be("org.postgresql.Driver")
50 |   }
51 | 
52 |   test("as properties") {
53 |     val options = GreenplumOptions(CaseInsensitiveMap(Map("url" -> "", "dbtable" -> "src")))
54 |     val properties = options.asProperties
55 |     properties.getProperty("url") should be("")
56 |     properties.get("dbtable") should be("src")
57 |     properties.get("driver") should be("org.postgresql.Driver")
58 |   }
59 | 
60 |   test("as connection properties") {
61 |     val options = GreenplumOptions(CaseInsensitiveMap(Map("url" -> "", "dbtable" -> "src")))
62 |     val properties = options.asConnectionProperties
63 |     properties.getProperty("url") should be(null)
64 |     properties.get("dbtable") should be(null)
65 |     properties.get("driver") should be(null)
66 |   }
67 | }
68 | 


--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/execution/datasources/greenplum/GreenplumUtilsSuite.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  *  The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.execution.datasources.greenplum
 19 | 
 20 | import java.io.File
 21 | import java.sql.{Connection, Date, SQLException, Timestamp}
 22 | import java.util.TimeZone
 23 | 
 24 | import scala.concurrent.TimeoutException
 25 | 
 26 | import io.airlift.testing.postgresql.TestingPostgreSqlServer
 27 | import org.mockito.Matchers._
 28 | import org.mockito.Mockito._
 29 | import org.scalatest.mockito.MockitoSugar
 30 | import org.apache.spark.SparkFunSuite
 31 | 
 32 | import org.apache.spark.api.java.function.ForeachPartitionFunction
 33 | import org.apache.spark.sql._
 34 | import org.apache.spark.sql.catalyst.expressions.GenericRow
 35 | import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 36 | import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
 37 | import org.apache.spark.sql.types._
 38 | import org.apache.spark.util.Utils
 39 | 
 40 | class GreenplumUtilsSuite extends SparkFunSuite with MockitoSugar {
 41 |   var postgres: TestingPostgreSqlServer = _
 42 |   var url: String = _
 43 |   var sparkSession: SparkSession = _
 44 |   var tempDir: File = _
 45 | 
 46 |   override def beforeAll(): Unit = {
 47 |     tempDir = Utils.createTempDir()
 48 |     postgres = new TestingPostgreSqlServer("gptest", "gptest")
 49 |     url = postgres.getJdbcUrl
 50 |     sparkSession = SparkSession.builder()
 51 |       .config("spark.master", "local")
 52 |       .config("spark.app.name", "testGp")
 53 |       .config("spark.sql.warehouse.dir", s"${tempDir.getAbsolutePath}/warehouse")
 54 |       .config("spark.local.dir", s"${tempDir.getAbsolutePath}/local")
 55 |       .getOrCreate()
 56 |   }
 57 | 
 58 |   override def afterAll(): Unit = {
 59 |     try {
 60 |       if (postgres != null) {
 61 |         postgres.close()
 62 |       }
 63 |       if (sparkSession != null) {
 64 |         sparkSession.stop()
 65 |       }
 66 |     } finally {
 67 |       Utils.deleteRecursively(tempDir)
 68 |     }
 69 |   }
 70 | 
 71 |   test("make converter") {
 72 |     val options = GreenplumOptions(CaseInsensitiveMap(Map("url" -> "", "dbtable" -> "src")))
 73 | 
 74 |     val now = System.currentTimeMillis()
 75 |     val row1 = Row(true, 1.toByte, 2.toShort, 3, 4.toLong,
 76 |       5.toFloat, 6.toDouble, 7.toString, 8.toString.getBytes,
 77 |       new Date(now),
 78 |       new Timestamp(now),
 79 |       new java.math.BigDecimal(11),
 80 |       Array[String]("12", "12"),
 81 |       Map(13 -> 13, 130 -> 130),
 82 |       Row(14, "15"))
 83 | 
 84 |     val row2 = Row(null)
 85 | 
 86 |     val boolConverter = GreenplumUtils.makeConverter(BooleanType)
 87 |     assert(boolConverter.apply(row1, 0) === "true")
 88 |     intercept[NullPointerException](boolConverter.apply(row2, 0) === "")
 89 | 
 90 |     val byteConverter = GreenplumUtils.makeConverter(ByteType)
 91 |     assert(byteConverter(row1, 1) === "1")
 92 | 
 93 |     val shortConverter = GreenplumUtils.makeConverter(ShortType)
 94 |     assert(shortConverter(row1, 2) === "2")
 95 | 
 96 |     val intConverter = GreenplumUtils.makeConverter(IntegerType)
 97 |     assert(intConverter(row1, 3) === "3")
 98 | 
 99 |     val longConverter = GreenplumUtils.makeConverter(LongType)
100 |     assert(longConverter(row1, 4) === "4")
101 | 
102 |     val floatConverter = GreenplumUtils.makeConverter(FloatType)
103 |     assert(floatConverter(row1, 5) === "5.0")
104 | 
105 |     val doubleConverter = GreenplumUtils.makeConverter(DoubleType)
106 |     assert(doubleConverter(row1, 6) === "6.0")
107 | 
108 |     val strConverter = GreenplumUtils.makeConverter(StringType)
109 |     assert(strConverter(row1, 7) === "7")
110 | 
111 |     val binConverter = GreenplumUtils.makeConverter(BinaryType)
112 |     assert(binConverter(row1, 8) === "8")
113 | 
114 |     val dateConverter = GreenplumUtils.makeConverter(DateType)
115 |     assert(dateConverter(row1, 9) === new Date(now).toString)
116 | 
117 |     val tsConverter = GreenplumUtils.makeConverter(TimestampType)
118 |     assert(tsConverter(row1, 10) === new Timestamp(now).toString)
119 | 
120 |     val decimalConverter = GreenplumUtils.makeConverter(DecimalType(2, 0))
121 |     assert(decimalConverter(row1, 11) === new java.math.BigDecimal(11).toString)
122 | 
123 | //    val arrConverter = GreenplumUtils.makeConverter(ArrayType(StringType), options)
124 | //    assert(arrConverter(row1, 12) === Array[String]("12", "12").mkString("[", ",", "]"))
125 | //
126 | //    val mapConverter = GreenplumUtils.makeConverter(MapType(IntegerType, IntegerType), options)
127 | //    assert(mapConverter(row1, 13) ===
128 | //      Map(13 -> 13, 130 -> 130)
129 | //        .map(e => e._1 + ":" + e._2).toSeq.sorted.mkString("{", ",", "}"))
130 | //
131 | //    val structConverter =
132 | //      GreenplumUtils.makeConverter(
133 | //        StructType(Array(StructField("a", IntegerType), StructField("b", StringType))), options)
134 | //    assert(structConverter(row1, 14) === "{\"a\":14,\"b\":15}")
135 |   }
136 | 
137 |   test("test copy to greenplum") {
138 |     withConnectionAndOptions { (conn, tblname, options) =>
139 |       // scalastyle:off
140 |       val buffer = "测试".getBytes().toBuffer
141 |       buffer += 0
142 |       val strWithEmpty = new String(buffer.toArray)
143 |       val kvs = Map[Int, String](0 -> " ", 1 -> "\t", 2 -> "\n", 3 -> "\r", 4 -> "\\t", 5 -> "\\n",
144 |         6 -> "\\", 7 -> ",", 8 -> "te\tst", 9 -> "1`'`", 10 -> "中文测试", 11 -> strWithEmpty)
145 |       // scalastyle:on
146 |       val rdd = sparkSession.sparkContext.parallelize(kvs.toSeq)
147 |       val df = sparkSession.createDataFrame(rdd)
148 |       val defaultSource = new DefaultSource
149 | 
150 |       defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Overwrite, options.params, df)
151 |       val stmt1 = conn.createStatement()
152 |       stmt1.executeQuery(s"select * from $tblname")
153 |       stmt1.setFetchSize(kvs.size + 1)
154 |       var count = 0
155 |       val result2 = stmt1.getResultSet
156 |       while (result2.next()) {
157 |         val k = result2.getInt(1)
158 |         val v = result2.getString(2)
159 |         count += 1
160 |         assert(kvs(k).filterNot(_ == 0) === v)
161 |       }
162 |       assert(count === kvs.size)
163 | 
164 |       // Append the df's data to gptbl, so the size will double.
165 |       defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Append, options.params, df)
166 |       val stmt2 = conn.createStatement()
167 |       stmt2.executeQuery(s"select * from $tblname")
168 |       stmt2.setFetchSize(kvs.size * 2 + 1)
169 |       val result3 = stmt2.getResultSet
170 |       count = 0
171 |       while (result3.next()) {
172 |         count += 1
173 |       }
174 |       assert(count === kvs.size * 2)
175 | 
176 |       // Overwrite gptbl with df's data.
177 |       defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Overwrite, options.params, df)
178 |       val stat4 = conn.createStatement()
179 |       stat4.executeQuery(s"select * from $tblname")
180 |       stat4.setFetchSize(kvs.size + 1)
181 |       val result4 = stat4.getResultSet
182 |       count = 0
183 |       while (result4.next()) {
184 |         count += 1
185 |       }
186 |       assert(count === kvs.size)
187 | 
188 |       intercept[AnalysisException](defaultSource.createRelation(sparkSession.sqlContext,
189 |         SaveMode.ErrorIfExists, options.params, df))
190 | 
191 |       defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Ignore, options.params, df)
192 | 
193 |     }
194 |   }
195 | 
196 |   test("test covert value and row") {
197 |     withConnectionAndOptions { (_, _, options) =>
198 |       val value = "test\t\rtest\n\\n\\,"
199 |       assert(GreenplumUtils.convertValue(value, '\t') === "test\\\t\\rtest\\n\\\\n\\\\,")
200 | 
201 |       val values = Array[Any]("\n", "\t", ",", "\r", "\\", "\\n")
202 |       val schema = new StructType().add("c1", StringType).add("c2", StringType)
203 |         .add("c3", StringType).add("c4", StringType).add("c5", StringType)
204 |         .add("c6", StringType)
205 |       val valueConverters: Array[(Row, Int) => String] =
206 |         schema.map(s => GreenplumUtils.makeConverter(s.dataType)).toArray
207 | 
208 |       val row = new GenericRow(values)
209 |       val str = GreenplumUtils.convertRow(row, schema.length, options.delimiter, valueConverters)
210 |       assert(str === "\\n\t\\\t\t,\t\\r\t\\\\\t\\\\n\n".getBytes("utf-8"))
211 |     }
212 |   }
213 | 
214 |   test("test copy partition") {
215 |     withConnectionAndOptions { (conn, tblname, options) =>
216 | 
217 |       val values = Array[Any]("\n", "\t", ",", "\r", "\\", "\\n")
218 |       val schema = new StructType().add("c1", StringType).add("c2", StringType)
219 |         .add("c3", StringType).add("c4", StringType).add("c5", StringType)
220 |         .add("c6", StringType)
221 |       val rows = Array(new GenericRow(values)).toIterator
222 | 
223 |       val createTbl = s"CREATE TABLE $tblname(c1 text, c2 text, c3 text, c4 text, c5 text, c6 text)"
224 |       GreenplumUtils.executeStatement(conn, createTbl)
225 | 
226 |       GreenplumUtils.copyPartition(rows, options, schema, tblname)
227 |       val stat = conn.createStatement()
228 |       val sql = s"SELECT * FROM $tblname"
229 |       stat.executeQuery(sql)
230 |       val result = stat.getResultSet
231 |       result.next()
232 |       for (i <- (0 until values.size)) {
233 |         assert(result.getObject(i + 1) === values(i))
234 |       }
235 |       assert(!result.next())
236 |     }
237 |   }
238 | 
239 |   test("test transactions support") {
240 |     withConnectionAndOptions { (conn, tblname, options) =>
241 |       // scalastyle:off
242 |       val kvs = Map[Int, String](0 -> " ", 1 -> "\t", 2 -> "\n", 3 -> "\r", 4 -> "\\t",
243 |         5 -> "\\n", 6 -> "\\", 7 -> ",", 8 -> "te\tst", 9 -> "1`'`", 10 -> "中文测试")
244 |       // scalastyle:on
245 |       // This suffix should be consisted with the suffix in transactionalCopy
246 |       val tempSuffix = "sparkGpTmp"
247 |       val df = mock[DataFrame]
248 |       val rdd = sparkSession.sparkContext.parallelize(kvs.toSeq)
249 |       val realdf = sparkSession.createDataFrame(rdd)
250 |       val schema = realdf.schema
251 |       when(df.foreachPartition(any[ForeachPartitionFunction[Row]]()))
252 |         .thenThrow(classOf[SQLException])
253 |       when(df.sparkSession).thenReturn(sparkSession)
254 |       when(df.schema).thenReturn(schema)
255 |       when(df.rdd).thenReturn(realdf.rdd)
256 | 
257 |       // This would touch an exception, gptable are not created and temp table would be removed
258 |       intercept[PartitionCopyFailureException](
259 |         GreenplumUtils.transactionalCopy(df, schema, options))
260 | 
261 |       val showTables = "SELECT table_name FROM information_schema.tables"
262 |       val stat = conn.createStatement()
263 |       val result = stat.executeQuery(showTables)
264 |       while (result.next()) {
265 |         val tbl = result.getString(1)
266 |         assert(tbl != tblname && !tbl.endsWith(tempSuffix))
267 |       }
268 |     }
269 |   }
270 | 
271 |   test("test copyPartition with timeout exception") {
272 |     val tblname = "tempTable"
273 |     // Set the copyTimeout to 1ms, it must trigger a TimeoutException.
274 |     val paras = CaseInsensitiveMap(Map("url" -> s"$url", "delimiter" -> "\t",
275 |       "dbtable" -> "gptest", "copyTimeout" -> "1ms"))
276 |     val options = GreenplumOptions(paras)
277 |     val conn = JdbcUtils.createConnectionFactory(options)()
278 | 
279 |     try {
280 |       val values = Array[Any]("\n", "\t", ",", "\r", "\\", "\\n")
281 |       val schema = new StructType().add("c1", StringType).add("c2", StringType)
282 |         .add("c3", StringType).add("c4", StringType).add("c5", StringType)
283 |         .add("c6", StringType)
284 |       val rows = Array(new GenericRow(values)).toIterator
285 |       val createTbl = s"CREATE TABLE $tblname(c1 text, c2 text, c3 text, c4 text, c5 text, c6 text)"
286 |       GreenplumUtils.executeStatement(conn, createTbl)
287 |       intercept[TimeoutException](GreenplumUtils.copyPartition(rows, options, schema, tblname))
288 |     } finally {
289 |       GreenplumUtils.closeConnSilent(conn)
290 |     }
291 |   }
292 | 
293 |   test("test reorder dataframe's columns when relative gp table is existed") {
294 |     withConnectionAndOptions { (conn, tblname, options) =>
295 |       // scalastyle:off
296 |       val kvs = Map[Int, String](0 -> " ", 1 -> "\t", 2 -> "\n", 3 -> "\r", 4 -> "\\t",
297 |         5 -> "\\n", 6 -> "\\", 7 -> ",", 8 -> "te\tst", 9 -> "1`'`", 10 -> "中文测试")
298 |       // scalastyle:on
299 |       val rdd = sparkSession.sparkContext.parallelize(kvs.toSeq)
300 |       val df = sparkSession.createDataFrame(rdd)
301 | 
302 |       // create a gptable whose columns order is not equal with dataFrame
303 |       val createTbl = s"CREATE TABLE $tblname (_2 text, _1 int)"
304 |       GreenplumUtils.executeStatement(conn, createTbl)
305 | 
306 |       val defaultSource = new DefaultSource
307 |       defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Append, options.params, df)
308 | 
309 |       val stmt = conn.createStatement()
310 |       stmt.executeQuery(s"select * from $tblname")
311 |       stmt.setFetchSize(kvs.size + 1)
312 |       val result4 = stmt.getResultSet
313 |       var count = 0
314 |       while (result4.next()) {
315 |         count += 1
316 |       }
317 |       assert(count === kvs.size)
318 |     }
319 |   }
320 | 
321 |   test("test convert the table name to canonical table name") {
322 |     val quote = "\""
323 |     val schema = "schema"
324 |     val table = "table"
325 | 
326 |     val str1 = s"$table"
327 |     val str2 = s"${quote}$table${quote}"
328 |     val str3 = s"$schema.${quote}${quote}$str1${quote}"
329 |     val str4 = s"${quote}test${quote}test"
330 |     assert(TableNameExtractor.extract(str1) === CanonicalTblName(None, table))
331 |     assert(TableNameExtractor.extract(str2) === CanonicalTblName(None, table))
332 |     assert(TableNameExtractor.extract(str3) === CanonicalTblName(Some(schema), table))
333 |     intercept[IllegalArgumentException](TableNameExtractor.extract(str4))
334 |   }
335 | 
336 |   test("test schema and table names with double quotes") {
337 |     val quote = "\""
338 |     val schemaTableNames = Map(s"${quote}163${quote}" -> s"${quote}tempTable${quote}",
339 |       s"schemaName" -> s"${quote}tempTable${quote}", s"${quote}163${quote}" -> s"tempTable")
340 | 
341 |     schemaTableNames.foreach { schemaTableName =>
342 |       val schema = schemaTableName._1
343 |       val tblname = schemaTableName._2
344 |       val paras = CaseInsensitiveMap(Map("url" -> s"$url", "delimiter" -> "\t",
345 |         "dbtable" -> s"$schema.$tblname"))
346 |       val options = GreenplumOptions(paras)
347 | 
348 |       // scalastyle:off
349 |       val kvs = Map[Int, String](0 -> " ", 1 -> "\t", 2 -> "\n", 3 -> "\r", 4 -> "\\t",
350 |         5 -> "\\n", 6 -> "\\", 7 -> ",", 8 -> "te\tst", 9 -> "1`'`", 10 -> "中文测试")
351 |       // scalastyle:on
352 |       val rdd = sparkSession.sparkContext.parallelize(kvs.toSeq)
353 |       val df = sparkSession.createDataFrame(rdd)
354 | 
355 |       val conn = JdbcUtils.createConnectionFactory(options)()
356 | 
357 |       try {
358 |         val createSchema = s"CREATE SCHEMA IF NOT EXISTS $schema "
359 |         GreenplumUtils.executeStatement(conn, createSchema)
360 |         val defaultSource = new DefaultSource
361 |         defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Append, options.params, df)
362 | 
363 |         assert(JdbcUtils.tableExists(conn, options))
364 | 
365 |         val relation = defaultSource.createRelation(sparkSession.sqlContext, paras)
366 |         relation.asInstanceOf[GreenplumRelation].insert(df, true)
367 |         assert(JdbcUtils.tableExists(conn, options))
368 |         relation.asInstanceOf[GreenplumRelation].insert(df, false)
369 |         assert(JdbcUtils.tableExists(conn, options))
370 |         val paras2 = paras ++: Map("transactionOn" -> "true", "truncate" -> "true")
371 |         val relation2 = defaultSource.createRelation(sparkSession.sqlContext, paras2)
372 |         relation2.asInstanceOf[GreenplumRelation].insert(df, true)
373 |         relation2.asInstanceOf[GreenplumRelation].insert(df, false)
374 |         defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Append, paras2, df)
375 |         defaultSource.createRelation(sparkSession.sqlContext, SaveMode.Overwrite, paras2, df)
376 |         assert(JdbcUtils.tableExists(conn, options))
377 |       } finally {
378 |         GreenplumUtils.closeConnSilent(conn)
379 |       }
380 |     }
381 |   }
382 | 
383 |   def withConnectionAndOptions(f: (Connection, String, GreenplumOptions) => Unit): Unit = {
384 |     val schema = "gptest"
385 |     val paras =
386 |       CaseInsensitiveMap(Map("url" -> s"$url", "delimiter" -> "\t", "dbtable" -> s"$schema.test",
387 |         "transactionOn" -> "true"))
388 |     val options = GreenplumOptions(paras)
389 |     val conn = JdbcUtils.createConnectionFactory(options)()
390 |     try {
391 |       val createSchema = s"CREATE SCHEMA IF NOT EXISTS $schema"
392 |       GreenplumUtils.executeStatement(conn, createSchema)
393 |       f(conn, options.table, options)
394 |     } finally {
395 |       val dropTbl = s"DROP TABLE IF EXISTS ${options.table}"
396 |       val dropSchema = s"DROP SCHEMA IF EXISTS $schema"
397 |       GreenplumUtils.executeStatement(conn, dropTbl)
398 |       GreenplumUtils.executeStatement(conn, dropSchema)
399 |       GreenplumUtils.closeConnSilent(conn)
400 |     }
401 |   }
402 | }
403 | 
404 | 


--------------------------------------------------------------------------------