├── .gitignore
├── CHANGES.txt
├── README.md
├── build.gradle
├── cascading-jdbc-core
├── build.gradle
└── src
│ ├── main
│ └── java
│ │ └── cascading
│ │ └── jdbc
│ │ ├── InternalTypeMapping.java
│ │ ├── JDBCFactory.java
│ │ ├── JDBCScheme.java
│ │ ├── JDBCTap.java
│ │ ├── JDBCUtil.java
│ │ ├── TableDesc.java
│ │ ├── TupleRecord.java
│ │ └── db
│ │ ├── BatchProcessingException.java
│ │ ├── DBConfiguration.java
│ │ ├── DBInputFormat.java
│ │ ├── DBOutputFormat.java
│ │ └── DBWritable.java
│ └── test
│ ├── java
│ └── cascading
│ │ └── jdbc
│ │ ├── GenericJdbcCheck.java
│ │ ├── InternalTypeMappingTest.java
│ │ ├── JDBCFactoryTest.java
│ │ ├── JDBCSchemeTest.java
│ │ ├── JDBCTestingBase.java
│ │ ├── TableDescTest.java
│ │ └── TupleRecordTest.java
│ └── resources
│ ├── data
│ └── small.txt
│ └── log4j.properties
├── cascading-jdbc-derby
├── build.gradle
└── src
│ ├── main
│ ├── java
│ │ └── cascading
│ │ │ └── jdbc
│ │ │ ├── DerbyFactory.java
│ │ │ └── db
│ │ │ └── DerbyDBInputFormat.java
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ └── DerbyTest.java
├── cascading-jdbc-h2
├── build.gradle
└── src
│ ├── main
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ └── H2Test.java
├── cascading-jdbc-mysql
├── build.gradle
└── src
│ ├── main
│ ├── java
│ │ └── cascading
│ │ │ └── jdbc
│ │ │ ├── MySqlFactory.java
│ │ │ ├── MySqlScheme.java
│ │ │ └── db
│ │ │ ├── MySqlDBConfiguration.java
│ │ │ ├── MySqlDBInputFormat.java
│ │ │ └── MySqlDBOutputFormat.java
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ └── MysqlTest.java
├── cascading-jdbc-oracle
├── build.gradle
└── src
│ ├── main
│ ├── java
│ │ └── cascading
│ │ │ └── jdbc
│ │ │ ├── OracleJDBCFactory.java
│ │ │ └── db
│ │ │ └── OracleDBInputFormat.java
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ ├── OracleJDBCFactoryTest.java
│ └── OracleTest.java
├── cascading-jdbc-postgresql
├── build.gradle
└── src
│ ├── main
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ └── PostgresTest.java
├── cascading-jdbc-redshift
├── build.gradle
└── src
│ ├── main
│ ├── java
│ │ └── cascading
│ │ │ └── jdbc
│ │ │ ├── AWSCredentials.java
│ │ │ ├── InvalidCodepointForRedshiftException.java
│ │ │ ├── RedshiftFactory.java
│ │ │ ├── RedshiftSafeDelimitedParser.java
│ │ │ ├── RedshiftScheme.java
│ │ │ ├── RedshiftTableDesc.java
│ │ │ └── RedshiftTap.java
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ ├── RedshiftSafeDelimitedParserTest.java
│ └── RedshiftTest.java
├── cascading-jdbc-teradata
├── build.gradle
└── src
│ ├── main
│ ├── java
│ │ └── cascading
│ │ │ └── jdbc
│ │ │ ├── TeradataJDBCFactory.java
│ │ │ ├── TeradataTableDesc.java
│ │ │ └── db
│ │ │ └── TeradataDBInputFormat.java
│ └── resources
│ │ └── cascading
│ │ └── bind
│ │ └── provider.properties
│ └── test
│ └── java
│ └── cascading
│ └── jdbc
│ ├── TeradataJDBCFactoryTest.java
│ └── TeradataTest.java
├── etc
├── properties.gradle
├── s3Upload.gradle
├── synchronizer.properties
└── version.gradle
├── settings.gradle
└── version.properties
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | *.ipr
3 | *.iws
4 | /pom.xml
5 | *jar
6 | /lib
7 | /classes
8 | /native
9 | /.lein-failures
10 | /checkouts
11 | /.lein-deps-sum
12 | .gradle
13 | build
14 | .classpath
15 | .project
16 | .settings
17 | .idea
18 | /bin
19 |
--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | Cascading-jdbc Change Log
2 |
3 | 3.0.0
4 | - updated to Cascading 3.0
5 | - support for Apache Tez
6 | - moved tests onto cascading-hadoop2-tez platform
7 | - major code restructuring to simplify database interactions
8 | - removed c.j.JDBCTapCollector and leave creation of RecordReader to the computational framework
9 | - removed deprecated code
10 |
11 | 2.7.0
12 | - update to Cascading 2.7
13 | - fix issue in Derby InputSplit handling
14 | - deprecated c.j.Utils, use c.u.Util instead
15 |
16 | 2.6.0
17 | - add cascading-jdbc-: to Cascading Frameworks property
18 | - enabled parameter sanitization for username and password
19 | - added support for Teradata (cascading-jdbc-teradata)
20 | - update to cascading 2.6
21 |
22 | 2.5.5
23 | - enable mysql streaming
24 |
25 | 2.5.4
26 | - update to cascading 2.5.5
27 | - fix a bug related to type handling and table creation
28 |
29 | 2.5.3
30 | - RedshiftScheme constructor is now accepting additional conditions
31 | - JDBCTap uses DatabaseMetaData to check if a table exists
32 |
33 | 2.5.2
34 | - upgrade redshift jdbc driver to 8.4-702.jdbc4
35 | - reduce size of test jars
36 | - replaceOnInsert support for MySQL
37 | - more robust query discovery
38 |
39 | 2.5.1
40 | - update to cascading 2.5.2
41 | - fix issue in batch processing
42 | - enable hadoop2-mr1 platform for the lingual providers
43 |
44 | 2.5.0
45 | - Support for Cascading 2.5.x
46 | - More robust batch processing
47 | - More robust check if a table exists
48 | - full support for SinkMode.UPDATE
49 |
50 | 2.2.0
51 | - initial release of Cascading 2.2 compatible version
52 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | import java.text.SimpleDateFormat
22 |
23 | buildscript {
24 | repositories {
25 | mavenLocal()
26 | mavenCentral()
27 | jcenter()
28 | }
29 | dependencies {
30 | classpath 'com.netflix.nebula:gradle-extra-configurations-plugin:2.2.1'
31 | classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.1'
32 | }
33 | }
34 |
35 | apply from: 'etc/version.gradle'
36 |
37 | version = releaseVersion
38 |
39 | // the version of hadoop used for compiling and testing
40 | ext.hadoopVersion = "2.6.0"
41 | ext.tezVersion = "0.6.1"
42 |
43 | ext.cascadingVersion = '3.0.0'
44 | ext.lingualVersion = '2.0.0-wip-+'
45 |
46 | ext.timestamp = new SimpleDateFormat( "yyyyMMdd" ).format( new Date() )
47 |
48 | allprojects {
49 |
50 | group = 'cascading'
51 | version = releaseVersion
52 |
53 | repositories {
54 | mavenLocal()
55 | mavenCentral()
56 | maven{ url 'http://conjars.org/repo/' }
57 | maven{ url 'http://repo.pentaho.org/artifactory/repo/' }
58 | }
59 |
60 | apply plugin: 'java'
61 | apply plugin: 'idea'
62 | apply plugin: 'maven'
63 | apply plugin: 'eclipse'
64 | apply plugin: 'provided-base'
65 | apply plugin: 'com.github.johnrengelman.shadow'
66 |
67 | }
68 |
69 | subprojects{
70 |
71 | dependencies {
72 |
73 | provided group: 'cascading', name: 'cascading-hadoop2-tez', version: cascadingVersion, changing: true
74 | provided group: 'org.slf4j', name: 'slf4j-api', version: '1.7.5'
75 | provided group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.7.5'
76 | provided( group: 'org.apache.hadoop', name: 'hadoop-common', version: hadoopVersion )
77 | provided( group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: hadoopVersion )
78 | testCompile group: 'junit', name: 'junit', version: '4.11'
79 | testCompile group: 'org.mockito', name: 'mockito-all', version: '1.9.5'
80 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests'
81 | testRuntime group: 'log4j', name: 'log4j', version: '1.2.17'
82 |
83 | // only used in tests, not in the production code.
84 | testCompile(group: 'cascading', name: 'lingual-core', version: lingualVersion ){
85 | exclude group: 'xerces'
86 | }
87 |
88 | testCompile( group: 'org.apache.tez', name: 'tez-tests', version: tezVersion, classifier: 'tests' )
89 | testCompile( group: 'org.apache.tez', name: 'tez-api', version: tezVersion )
90 | testCompile( group: 'org.apache.tez', name: 'tez-dag', version: tezVersion )
91 | testCompile( group: 'org.apache.hadoop', name: 'hadoop-minicluster', version: hadoopVersion )
92 | }
93 |
94 | test {
95 | if ( System.getProperty('DEBUG', 'false') == 'true' ) {
96 | jvmArgs '-Xdebug', '-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005'
97 | }
98 | }
99 |
100 | task sourcesJar( type: Jar, dependsOn: classes ) {
101 | from sourceSets.main.allSource
102 | classifier = 'sources'
103 | }
104 |
105 | task javadocJar( type: Jar, dependsOn: javadoc ) {
106 | classifier = 'javadoc'
107 | from javadoc.destinationDir
108 | }
109 |
110 | task testsJar( type: Jar, dependsOn: testClasses ) {
111 | from sourceSets.test.output
112 | classifier = 'tests'
113 | }
114 |
115 | task testSourcesJar( type: Jar, dependsOn: classes ) {
116 | from sourceSets.test.allSource
117 | classifier = 'test-sources'
118 | }
119 |
120 | configurations {
121 | testArtifacts {
122 | extendsFrom testRuntime
123 | }
124 | }
125 |
126 | artifacts {
127 | archives jar
128 | archives shadowJar
129 | archives sourcesJar
130 | archives javadocJar
131 | archives testsJar
132 | archives testSourcesJar
133 | testArtifacts testsJar
134 | testArtifacts testSourcesJar
135 | }
136 |
137 | uploadArchives {
138 |
139 | def deployer = repositories.mavenDeployer {
140 | configuration = configurations.archives
141 |
142 | repository( url: repoUrl ) {
143 | authentication( userName: repoUserName, password: repoPassword )
144 | }
145 |
146 | pom.project {
147 | description 'Cascading JDBC is a collection of adapters for JDBC.'
148 | inceptionYear '2013'
149 | url 'http://cascading.org/'
150 | scm {
151 | url 'https://github.com/Cascading/cascading-jdbc.git'
152 | }
153 | licenses {
154 | license {
155 | name 'The Apache Software License, Version 2.0'
156 | url 'http://www.apache.org/licenses/LICENSE-2.0.txt'
157 | distribution 'repo'
158 | }
159 | }
160 | }
161 | }
162 | [install.repositories.mavenInstaller, deployer]*.pom*.whenConfigured { pom ->
163 | def scopeOrder = [test: 1, runtime: 2, provided: 3, compile: 4].withDefault { 100 }
164 | pom.dependencies = pom.dependencies.sort { scopeOrder[it.scope] }
165 | }
166 | }
167 |
168 | jar {
169 | manifest {
170 | attributes( "Build-Date": "${timestamp}" )
171 | }
172 | }
173 |
174 | shadowJar {
175 | classifier = "provider"
176 | configurations = []
177 | from {
178 | project.configurations.runtime.minus( [ project.configurations.provided ] )
179 | }
180 | mergeServiceFiles()
181 |
182 | exclude 'LICENSE'
183 | exclude 'META-INF/*.DSA'
184 | exclude 'META-INF/LICENSE*'
185 | }
186 |
187 | task createFrameworkProperties( dependsOn: processResources ) << {
188 | if ( project.name != "cascading-jdbc-core" ) {
189 | file( "${sourceSets.main.output.resourcesDir}/cascading" ).mkdirs()
190 | file( "${sourceSets.main.output.resourcesDir}/cascading/framework.properties").write("name=${project.name}:${version}\n" )
191 | }
192 | }
193 |
194 | jar.dependsOn( createFrameworkProperties )
195 | shadowJar.dependsOn( createFrameworkProperties )
196 |
197 | }
198 |
199 | idea {
200 | module {
201 | downloadJavadoc = true
202 | downloadSources = true
203 | }
204 | }
205 |
206 | eclipse {
207 | classpath {
208 | defaultOutputDir = file( 'build' )
209 | downloadSources = true
210 | downloadJavadoc = true
211 | }
212 | }
213 |
214 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | test{
21 | scanForTestClasses = false
22 | exclude( "**/*Check.class" )
23 | exclude( "**/*Base.class" )
24 | }
25 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/main/java/cascading/jdbc/InternalTypeMapping.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import java.lang.reflect.Type;
23 | import java.sql.Date;
24 | import java.sql.Time;
25 | import java.sql.Timestamp;
26 | import java.util.HashMap;
27 | import java.util.Map;
28 |
29 | import cascading.tuple.Fields;
30 | import cascading.tuple.type.CoercibleType;
31 |
32 |
33 | /**
34 | * Class that maps Java classes to SQL type definitions. When cascading-jdbc is
35 | * used from lingual, we might be presented with JVM internal classes and
36 | * {@link CoercibleType} instances of {@link Fields}, but we need to know the
37 | * more specific JDBC types, in order to write the tuple to the databases. This
38 | * class is doing an "educated guess" on the types.
39 | *
40 | * The rules are like this:
41 | *
42 | * int.class -> "int not null"
43 | * Integer.class -> "int"
44 | * String.class -> "varchar(256)"
45 | * long.class -> "int not null"
46 | * Long.class -> "int"
47 | * Time.class -> "time"
48 | * Date.class -> "date"
49 | * Timestamp.class -> "timestamp".
50 | *
51 | *
52 | *
53 | * Coercibles are translated like this and then given to the translation above:
54 | *
55 | *
56 | * cascading.lingual.type.SQLDateCoercibleType -> java.sql.Date.class
57 | * cascading.lingual.type.SQLDateTimeCoercibleType -> java.sql.Date.class
58 | * cascading.lingual.type.SQLTimeCoercibleType -> java.sql.Time.class
59 | * cascading.lingual.type.SQLTimestampCoercibleType -> java.sql.Timestamp.class
60 | *
61 | *
62 | * All other {@link CoercibleType} instances are converted to String.class, since anything should
63 | * be coercible to a String.
64 | *
65 | */
66 | public class InternalTypeMapping
67 | {
68 | private static final Map TYPES = new HashMap();
69 |
70 | private static final Map NATIVE_TYPES = new HashMap();
71 |
72 | static
73 | {
74 | TYPES.put( Integer.class, "int" );
75 | TYPES.put( int.class, "int not null" );
76 | TYPES.put( String.class, "varchar(256)" );
77 | TYPES.put( Long.class, "int" );
78 | TYPES.put( long.class, "int not null" );
79 | TYPES.put( Time.class, "time" );
80 | TYPES.put( Date.class, "date" );
81 | TYPES.put( Timestamp.class, "timestamp" );
82 |
83 | /*
84 | * we have no compile time dependency on lingual and we should never have
85 | * that, so we work around the types being unknown right now, by using class names.
86 | */
87 | NATIVE_TYPES.put( "cascading.lingual.type.SQLDateCoercibleType", java.sql.Date.class );
88 | NATIVE_TYPES.put( "cascading.lingual.type.SQLDateTimeCoercibleType", java.sql.Date.class );
89 | NATIVE_TYPES.put( "cascading.lingual.type.SQLTimeCoercibleType", java.sql.Time.class );
90 | NATIVE_TYPES.put( "cascading.lingual.type.SQLTimestampCoercibleType", java.sql.Timestamp.class );
91 |
92 | }
93 |
94 | /**
95 | * Method to determine the correct type, that a field should be
96 | * coerced to, before writing it to the database. The method uses an internal
97 | * mapping. If no class can be found in the mapping, it will return
98 | * String.class
;
99 | *
100 | * @param type The type of a {@link Fields} instance
101 | * @return a JVM internal type.
102 | */
103 | public static Type findInternalType( Type type )
104 | {
105 | if( ! ( type instanceof CoercibleType ) )
106 | return type;
107 |
108 | CoercibleType> coercible = (CoercibleType>) type;
109 | Type nativeType = NATIVE_TYPES.get( coercible.getClass().getName() );
110 | if( nativeType == null )
111 | nativeType = String.class;
112 | return nativeType;
113 |
114 | }
115 |
116 | /**
117 | * Returns a mapping of a java class to a SQL type as a {@link String}.
118 | *
119 | * @param type The {@link Type} to find the mapping for.
120 | *
121 | * @throws IllegalArgumentException If no mapping can be found.
122 | * */
123 | public static String sqltypeForClass( Type type )
124 | {
125 | String sqlType = TYPES.get( type );
126 | if( sqlType == null )
127 | {
128 | Type nativeType = findInternalType( type );
129 | sqlType = TYPES.get( nativeType );
130 | if( sqlType == null )
131 | throw new IllegalArgumentException( String.format( "cannot map type %s to a sql type", type ) );
132 | }
133 | return sqlType;
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/main/java/cascading/jdbc/JDBCFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import java.util.Properties;
23 |
24 | import org.apache.hadoop.mapred.InputFormat;
25 | import org.slf4j.Logger;
26 | import org.slf4j.LoggerFactory;
27 |
28 | import cascading.jdbc.db.DBInputFormat;
29 | import cascading.jdbc.db.DBOutputFormat;
30 | import cascading.scheme.Scheme;
31 | import cascading.tap.SinkMode;
32 | import cascading.tap.Tap;
33 | import cascading.tuple.Fields;
34 |
35 | /**
36 | * {@link JDBCFactory} is a factory class that can be used by the lingual
37 | * provider mechanism to create {@link JDBCScheme}s and {@link JDBCTap}s.
38 | *
39 | * */
40 | public class JDBCFactory
41 | {
42 | private static final Logger LOG = LoggerFactory.getLogger( JDBCFactory.class );
43 |
44 | public static final String DEFAULT_SEPARATOR = ":";
45 |
46 | public static final String PROTOCOL_JDBC_USER = "jdbcuser";
47 | public static final String PROTOCOL_JDBC_PASSWORD = "jdbcpassword";
48 | public static final String PROTOCOL_JDBC_DRIVER = "jdbcdriver";
49 |
50 | public static final String PROTOCOL_FIELD_SEPARATOR = "tabledesc.separator";
51 | public static final String PROTOCOL_TABLE_NAME = "tabledesc.tablename";
52 | public static final String PROTOCOL_COLUMN_NAMES = "tabledesc.columnnames";
53 | public static final String PROTOCOL_COLUMN_DEFS = "tabledesc.columndefs";
54 | public static final String PROTOCOL_PRIMARY_KEYS = "tabledesc.primarykeys";
55 | public static final String PROTOCOL_SINK_MODE = "sinkmode";
56 |
57 | public static final String FORMAT_SEPARATOR = "separator";
58 | public static final String FORMAT_COLUMNS = "columnnames";
59 | public static final String FORMAT_ORDER_BY = "orderBy";
60 | public static final String FORMAT_CONDITIONS = "conditions";
61 | public static final String FORMAT_LIMIT = "limit";
62 | public static final String FORMAT_UPDATE_BY = "updateBy";
63 | public static final String FORMAT_TABLE_ALIAS = "tableAlias";
64 |
65 | public static final String FORMAT_SELECT_QUERY = "selectQuery";
66 | public static final String FORMAT_COUNT_QUERY = "countQuery";
67 |
68 | /**
69 | * Creates a new Tap for the given arguments.
70 | *
71 | * @param protocol name of the protocol, only accepts "jdbc".
72 | * @param scheme a {@link JDBCScheme} instance.
73 | * @param identifier The identifier of the tap, which is assumed to be the
74 | * jdbc URL.
75 | * @param mode a {@link SinkMode}. All are supported.
76 | * @param properties The Properties object containing the table description,
77 | * optionally a jdbc user and a jdbc password.
78 | * @return a new {@link JDBCTap} instance.
79 | */
80 | @SuppressWarnings("rawtypes")
81 | public Tap createTap( String protocol, Scheme scheme, String identifier, SinkMode mode, Properties properties )
82 | {
83 | LOG.info( "creating jdbc protocol with properties {} in mode {}", properties, mode );
84 |
85 | String driver = properties.getProperty( PROTOCOL_JDBC_DRIVER );
86 |
87 | String jdbcUserProperty = properties.getProperty( PROTOCOL_JDBC_USER );
88 | String jdbcPasswordProperty = properties.getProperty( PROTOCOL_JDBC_PASSWORD );
89 |
90 | String jdbcUser = null;
91 | if( jdbcUserProperty != null && !jdbcUserProperty.isEmpty() )
92 | jdbcUser = jdbcUserProperty;
93 |
94 | String jdbcPassword = null;
95 | if( jdbcPasswordProperty != null && !jdbcPasswordProperty.isEmpty() )
96 | jdbcPassword = jdbcPasswordProperty;
97 |
98 | final TableDesc tableDesc = createTableDescFromProperties( properties );
99 |
100 | JDBCScheme jdbcScheme = (JDBCScheme) scheme;
101 |
102 | /*
103 | * it is possible, that the schema information given via properties is
104 | * incomplete and therefore, we derive it from the given fields. We can only
105 | * do that, if we actually get meaningful fields. There is a second place,
106 | * where this happens, which is the presentSinkFields method of the
107 | * JDBCScheme.
108 | */
109 | Fields sinkFields = jdbcScheme.getSinkFields();
110 | if( !tableDesc.hasRequiredTableInformation() && sinkFields != Fields.UNKNOWN && sinkFields != Fields.ALL && sinkFields != null
111 | && sinkFields.getTypes() != null )
112 | {
113 | LOG.debug( "tabledesc information incomplete, falling back to sink-fields {}", jdbcScheme.getSinkFields() );
114 | tableDesc.completeFromFields( jdbcScheme.getSinkFields() );
115 | ( (JDBCScheme) scheme ).setColumns( tableDesc.getColumnNames() );
116 | }
117 |
118 | // users can overwrite the sink mode.
119 | String sinkModeProperty = properties.getProperty( PROTOCOL_SINK_MODE );
120 | SinkMode userMode = mode;
121 | if( sinkModeProperty != null && !sinkModeProperty.isEmpty() )
122 | userMode = SinkMode.valueOf( sinkModeProperty );
123 |
124 | return new JDBCTap( identifier, jdbcUser, jdbcPassword, driver, tableDesc, jdbcScheme, userMode );
125 |
126 | }
127 |
128 | /**
129 | * Creates a new {@link JDBCScheme} instance for the given format, fields and
130 | * properties.
131 | *
132 | * @param format The format of the scheme. This is JDBC driver dependent.
133 | * @param fields The fields to interact with.
134 | * @param properties The {@link Properties} object containing the necessary
135 | * information to construct a {@link JDBCScheme}.
136 | * @return a new {@link JDBCScheme} instance.
137 | */
138 | public Scheme createScheme( String format, Fields fields, Properties properties )
139 | {
140 | LOG.info( "creating {} format with properties {} and fields {}", format, properties, fields );
141 |
142 | String selectQuery = properties.getProperty( FORMAT_SELECT_QUERY );
143 | String countQuery = properties.getProperty( FORMAT_COUNT_QUERY );
144 | String separator = properties.getProperty( FORMAT_SEPARATOR, DEFAULT_SEPARATOR );
145 | long limit = -1;
146 |
147 | String limitProperty = properties.getProperty( FORMAT_LIMIT );
148 | if( limitProperty != null && !limitProperty.isEmpty() )
149 | limit = Long.parseLong( limitProperty );
150 |
151 | String[] columnNames = getColumnNames(fields, properties, separator);
152 |
153 | boolean tableAlias = getTableAlias(properties);
154 |
155 | if( selectQuery != null )
156 | {
157 | if( countQuery == null )
158 | throw new IllegalArgumentException( "no count query for select query given" );
159 |
160 | return createScheme( fields, selectQuery, countQuery, limit, columnNames, tableAlias );
161 | }
162 |
163 | String conditions = properties.getProperty( FORMAT_CONDITIONS );
164 |
165 | String updateByProperty = properties.getProperty( FORMAT_UPDATE_BY );
166 | String[] updateBy = null;
167 | if( updateByProperty != null && !updateByProperty.isEmpty() )
168 | updateBy = updateByProperty.split( separator );
169 |
170 | Fields updateByFields = null;
171 | if( updateByProperty != null && !updateByProperty.isEmpty() )
172 | updateByFields = new Fields( updateBy );
173 |
174 | String[] orderBy = null;
175 | String orderByProperty = properties.getProperty( FORMAT_ORDER_BY );
176 | if( orderByProperty != null && !orderByProperty.isEmpty() )
177 | orderBy = orderByProperty.split( separator );
178 |
179 | return createUpdatableScheme( fields, limit, columnNames, tableAlias, conditions, updateBy, updateByFields, orderBy );
180 |
181 | }
182 |
183 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions,
184 | String[] updateBy, Fields updateByFields, String[] orderBy, Properties properties )
185 | {
186 | return new JDBCScheme( getInputFormatClass(), getOutputFormClass(), fields, columnNames, orderBy, conditions, limit, updateByFields,
187 | updateBy, tableAlias );
188 | }
189 |
190 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions,
191 | String[] updateBy, Fields updateByFields, String[] orderBy )
192 | {
193 | return createUpdatableScheme( fields, limit, columnNames, tableAlias, conditions, updateBy, updateByFields, orderBy, new Properties() );
194 | }
195 |
196 | protected Scheme createScheme( Fields fields, String selectQuery, String countQuery, long limit, String[] columnNames, boolean tableAlias )
197 | {
198 | return new JDBCScheme( getInputFormatClass(), fields, columnNames, selectQuery, countQuery, limit, tableAlias );
199 | }
200 |
201 | /**
202 | * Private helper method to extract values representing a {@link TableDesc}
203 | * instance from the properties passed to the createTap method.
204 | *
205 | * @param properties A properties instance.
206 | * @return A {@link TableDesc} instance.
207 | *
208 | */
209 | protected TableDesc createTableDescFromProperties( Properties properties )
210 | {
211 | String tableName = properties.getProperty( PROTOCOL_TABLE_NAME );
212 |
213 | if( tableName == null || tableName.isEmpty() )
214 | throw new IllegalArgumentException( "no tablename given" );
215 |
216 | String separator = properties.getProperty( PROTOCOL_FIELD_SEPARATOR, DEFAULT_SEPARATOR );
217 |
218 | String[] columnNames = null;
219 | String columnNamesProperty = properties.getProperty( PROTOCOL_COLUMN_NAMES );
220 | if( columnNamesProperty != null && !columnNamesProperty.isEmpty() )
221 | columnNames = columnNamesProperty.split( separator );
222 |
223 | String[] columnDefs = null;
224 | String columnDefsProperty = properties.getProperty( PROTOCOL_COLUMN_DEFS );
225 | if( columnDefsProperty != null && !columnDefsProperty.isEmpty() )
226 | columnDefs = columnDefsProperty.split( separator );
227 |
228 | String primaryKeysProperty = properties.getProperty( PROTOCOL_PRIMARY_KEYS );
229 |
230 | String[] primaryKeys = null;
231 |
232 | if( primaryKeysProperty != null && !primaryKeysProperty.isEmpty() )
233 | primaryKeys = primaryKeysProperty.split( separator );
234 |
235 | TableDesc desc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys );
236 | return desc;
237 | }
238 |
239 | /**
240 | * Returns {@link DBInputFormat} class. This can be overwritten in subclasses, if they
241 | * have a custom {@link DBInputFormat}.
242 | *
243 | * @return the {@link InputFormat} to use.
244 | * */
245 | protected Class extends DBInputFormat> getInputFormatClass()
246 | {
247 | return DBInputFormat.class;
248 | }
249 |
250 | /**
251 | * Returns {@link DBOutputFormat} class. This can be overwritten in subclasses, if they
252 | * have a custom {@link DBInputFormat}.
253 | *
254 | * @return the {@link InputFormat} to use.
255 | * */
256 | protected Class extends DBOutputFormat> getOutputFormClass()
257 | {
258 | return DBOutputFormat.class;
259 | }
260 |
261 | protected String[] getColumnNames( Fields fields, Properties properties, String separator )
262 | {
263 | String[] columNames = null;
264 | String columnNamesProperty = properties.getProperty( FORMAT_COLUMNS );
265 | if( columnNamesProperty != null && !columnNamesProperty.isEmpty() )
266 | columNames = columnNamesProperty.split( separator );
267 | else if( fields != null )
268 | {
269 | columNames = new String[ fields.size() ];
270 | for( int i = 0; i < fields.size(); i++ )
271 | {
272 | Comparable> cmp = fields.get( i );
273 | columNames[ i ] = cmp.toString();
274 | }
275 | }
276 | return columNames;
277 | }
278 |
279 | protected boolean getTableAlias( Properties properties )
280 | {
281 | boolean tableAlias = false;
282 | String tableAliasProperty = properties.getProperty( FORMAT_TABLE_ALIAS );
283 | if( tableAliasProperty != null )
284 | Boolean.parseBoolean( tableAliasProperty );
285 |
286 | return tableAlias;
287 | }
288 | }
289 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/main/java/cascading/jdbc/JDBCUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.io.IOException;
24 | import java.sql.Connection;
25 | import java.sql.DatabaseMetaData;
26 | import java.sql.ResultSet;
27 | import java.sql.SQLException;
28 | import java.sql.Statement;
29 | import java.util.ArrayList;
30 | import java.util.Collections;
31 | import java.util.List;
32 |
33 | import org.slf4j.Logger;
34 | import org.slf4j.LoggerFactory;
35 |
36 | /**
37 | * Utility class for interacting with databases via JDBC.
38 | */
39 | public class JDBCUtil
40 | {
41 | /**Logger*/
42 | private static final Logger LOG = LoggerFactory.getLogger( JDBCUtil.class );
43 |
44 | /**
45 | * Method to check if a table exists in the database of the given Connection object
46 | * */
47 | public static boolean tableExists( Connection connection, TableDesc tableDesc ) throws IOException
48 | {
49 | ResultSet tables = null;
50 | try
51 | {
52 | DatabaseMetaData dbm = connection.getMetaData();
53 | tables = dbm.getTables( null, null, tableDesc.getTableName(), null );
54 | if( tables.next() )
55 | return true;
56 | tables.close();
57 | // try again with upper case for oracle compatibility:
58 | // see http://stackoverflow.com/questions/2942788/check-if-table-exists
59 | tables = dbm.getTables( null, null, tableDesc.getTableName().toUpperCase(), null );
60 | if( tables.next() )
61 | return true;
62 | }
63 | catch( SQLException exception )
64 | {
65 | throw new IOException( exception );
66 | }
67 | finally
68 | {
69 | if( tables != null )
70 | try
71 | {
72 | tables.close();
73 | }
74 | catch( SQLException exception )
75 | {
76 | throw new IOException( exception );
77 | }
78 | }
79 | return false;
80 | }
81 |
82 | /**
83 | * Creates a table from the given table descriptor if it does not exist.
84 | * */
85 | public static void createTableIfNotExists( Connection connection, TableDesc tableDesc ) throws IOException
86 | {
87 | if( tableExists( connection, tableDesc ) )
88 | return;
89 |
90 | executeUpdate( connection, tableDesc.getCreateTableStatement() );
91 | }
92 |
93 | /**
94 | * Executes the given sql query on the given Connection.
95 | * */
96 | public static int executeUpdate( Connection connection, String updateString ) throws IOException
97 | {
98 | Statement statement = null;
99 | int result;
100 | try
101 | {
102 | LOG.info( "executing update: {}", updateString );
103 |
104 | statement = connection.createStatement();
105 | result = statement.executeUpdate( updateString );
106 |
107 | connection.commit();
108 | statement.close();
109 | }
110 | catch( SQLException exception )
111 | {
112 | throw new IOException( "SQL error code: " + exception.getErrorCode() + " executing update statement: " + updateString, exception );
113 | }
114 |
115 | finally
116 | {
117 | try
118 | {
119 | if( statement != null )
120 | statement.close();
121 | }
122 | catch( SQLException exception )
123 | {
124 | throw new IOException( exception );
125 | }
126 | }
127 | return result;
128 | }
129 |
130 | /**
131 | * Drops the table described by the table descriptor if it exists.
132 | * */
133 | public static void dropTable( Connection connection, TableDesc tableDesc ) throws IOException
134 | {
135 | if( tableExists( connection, tableDesc ) )
136 | executeUpdate( connection, tableDesc.getTableDropStatement() );
137 | }
138 |
139 | /**
140 | * Closes the given database connection.
141 | * */
142 | public static void closeConnection( Connection connection ) throws IOException
143 | {
144 | if ( connection != null )
145 | {
146 | try
147 | {
148 | if( connection.isClosed() )
149 | return;
150 | connection.commit();
151 | connection.close();
152 | }
153 | catch( SQLException exception )
154 | {
155 | throw new IOException( exception );
156 | }
157 | }
158 | }
159 |
160 | /**
161 | * Method executeQuery allows for ad-hoc queries to be sent to the remote
162 | * RDBMS. A value of -1 for returnResults will return a List of all results
163 | * from the query, a value of 0 will return an empty List.
164 | *
165 | * @param queryString of type String
166 | * @param returnResults of type int
167 | * @return List
168 | */ public static List
64 | */
65 | public interface DBWritable {
66 |
67 | /**
68 | * Sets the fields of the object in the {@link PreparedStatement}.
69 | *
70 | * @param statement the statement that the fields are put into.
71 | * @throws SQLException
72 | */
73 | public void write(PreparedStatement statement) throws SQLException;
74 |
75 | /**
76 | * Reads the fields of the object from the {@link ResultSet}.
77 | *
78 | * @param resultSet the {@link ResultSet} to get the fields from.
79 | * @throws SQLException
80 | */
81 | public void readFields(ResultSet resultSet) throws SQLException;
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/GenericJdbcCheck.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import static org.junit.Assert.fail;
23 |
24 | import org.junit.Before;
25 |
26 | public class GenericJdbcCheck extends JDBCTestingBase
27 | {
28 |
29 | public final static String JDBC_URL_PROPERTY_NAME = "cascading.jdbcurl";
30 |
31 | public final static String JDBC_DRIVER_PROPERTY_NAME = "cascading.jdbcdriver";
32 |
33 | @Before
34 | public void setUp()
35 | {
36 | if ( System.getProperty( JDBC_DRIVER_PROPERTY_NAME ) == null || System.getProperty( JDBC_URL_PROPERTY_NAME ) == null )
37 | fail( String.format( "please set the '%s' and '%s' system properties", JDBC_DRIVER_PROPERTY_NAME, JDBC_URL_PROPERTY_NAME ) );
38 |
39 | setJdbcurl( System.getProperty( JDBC_URL_PROPERTY_NAME ) );
40 | setDriverName( System.getProperty( JDBC_DRIVER_PROPERTY_NAME ) );
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/InternalTypeMappingTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import static org.junit.Assert.*;
23 |
24 | import java.sql.Date;
25 | import java.sql.Time;
26 | import java.sql.Timestamp;
27 |
28 | import org.junit.Test;
29 |
30 | import cascading.lingual.type.SQLDateCoercibleType;
31 | import cascading.lingual.type.SQLTimeCoercibleType;
32 | import cascading.lingual.type.SQLTimestampCoercibleType;
33 |
34 | public class InternalTypeMappingTest
35 | {
36 |
37 | @Test
38 | public void testMappings()
39 | {
40 | assertEquals( "int not null", InternalTypeMapping.sqltypeForClass( int.class ) );
41 | assertEquals( "int not null", InternalTypeMapping.sqltypeForClass( long.class ) );
42 | assertEquals( "int", InternalTypeMapping.sqltypeForClass( Integer.class ) );
43 | assertEquals( "int", InternalTypeMapping.sqltypeForClass( Long.class ) );
44 | assertEquals( "varchar(256)", InternalTypeMapping.sqltypeForClass( String.class ) );
45 | assertEquals( "timestamp", InternalTypeMapping.sqltypeForClass( Timestamp.class ) );
46 | assertEquals( "time", InternalTypeMapping.sqltypeForClass( Time.class ) );
47 | assertEquals( "date", InternalTypeMapping.sqltypeForClass( Date.class ) );
48 | }
49 |
50 | @Test(expected = IllegalArgumentException.class)
51 | public void testUnknownClass()
52 | {
53 | InternalTypeMapping.sqltypeForClass( boolean.class );
54 | }
55 |
56 | @Test
57 | public void testTypeWithCoercibles()
58 | {
59 | assertEquals( "date", InternalTypeMapping.sqltypeForClass( new SQLDateCoercibleType() ) );
60 | assertEquals( "time", InternalTypeMapping.sqltypeForClass( new SQLTimeCoercibleType() ) );
61 | assertEquals( "timestamp", InternalTypeMapping.sqltypeForClass( new SQLTimestampCoercibleType() ) );
62 | }
63 | }
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/JDBCFactoryTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import static org.junit.Assert.assertArrayEquals;
24 | import static org.junit.Assert.assertEquals;
25 | import static org.junit.Assert.assertNotNull;
26 | import static org.mockito.Mockito.mock;
27 | import static org.mockito.Mockito.when;
28 |
29 | import java.util.Properties;
30 |
31 | import org.junit.Test;
32 |
33 | import cascading.jdbc.JDBCFactory;
34 | import cascading.jdbc.JDBCScheme;
35 | import cascading.jdbc.JDBCTap;
36 | import cascading.jdbc.TableDesc;
37 | import cascading.jdbc.db.DBInputFormat;
38 | import cascading.scheme.Scheme;
39 | import cascading.tap.SinkMode;
40 | import cascading.tuple.Fields;
41 |
42 | /**
43 | * Tests for {@link JDBCFactory}.
44 | *
45 | * */
46 | public class JDBCFactoryTest
47 | {
48 |
49 | @Test(expected = IllegalArgumentException.class)
50 | public void testCreateTapNoTableName()
51 | {
52 | String protocol = "jdbc";
53 | String identifier = "jdbc:some:stuf//database";
54 | JDBCScheme mockScheme = mock( JDBCScheme.class );
55 |
56 | JDBCFactory factory = new JDBCFactory();
57 |
58 | Properties props = new Properties();
59 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
60 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
61 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" );
62 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" );
63 |
64 | factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props );
65 | }
66 |
67 | @Test(expected = IllegalArgumentException.class)
68 | public void testCreateTapEmptyTableName()
69 | {
70 | String protocol = "jdbc";
71 | String identifier = "jdbc:some:stuf//database";
72 | JDBCScheme mockScheme = mock( JDBCScheme.class );
73 |
74 | JDBCFactory factory = new JDBCFactory();
75 |
76 | Properties props = new Properties();
77 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
78 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
79 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" );
80 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" );
81 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "" );
82 |
83 | factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props );
84 | }
85 |
86 |
87 | @Test
88 | public void testCreateTapFullyWorking()
89 | {
90 | String protocol = "jdbc";
91 | String identifier = "jdbc:some:stuf//database";
92 | JDBCScheme mockScheme = mock( JDBCScheme.class );
93 |
94 | JDBCFactory factory = new JDBCFactory();
95 |
96 | Properties props = new Properties();
97 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
98 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
99 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" );
100 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" );
101 |
102 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" );
103 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" );
104 |
105 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" );
106 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" );
107 |
108 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props );
109 | assertEquals( mockScheme, tap.getScheme() );
110 | assertEquals( "myTable", tap.getTableName() );
111 | assertEquals( SinkMode.REPLACE, tap.getSinkMode() );
112 | TableDesc tdesc = tap.tableDesc;
113 |
114 | assertEquals( "myTable", tdesc.getTableName() );
115 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() );
116 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() );
117 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() );
118 |
119 | }
120 |
121 | @Test
122 | public void testCreateTapSinkModeOverwrite()
123 | {
124 | String protocol = "jdbc";
125 | String identifier = "jdbc:some:stuf//database";
126 | JDBCScheme mockScheme = mock( JDBCScheme.class );
127 |
128 | JDBCFactory factory = new JDBCFactory();
129 |
130 | Properties props = new Properties();
131 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
132 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
133 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" );
134 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" );
135 |
136 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" );
137 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" );
138 |
139 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" );
140 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" );
141 | props.setProperty( JDBCFactory.PROTOCOL_SINK_MODE, "KEEP" );
142 |
143 |
144 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props );
145 | assertEquals( mockScheme, tap.getScheme() );
146 | assertEquals( "myTable", tap.getTableName() );
147 | assertEquals( SinkMode.KEEP, tap.getSinkMode() );
148 | TableDesc tdesc = tap.tableDesc;
149 |
150 | assertEquals( "myTable", tdesc.getTableName() );
151 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() );
152 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() );
153 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() );
154 |
155 | }
156 |
157 | @Test
158 | public void testCreateTapFullyWorkingWithEmptyUserAndPass()
159 | {
160 | String protocol = "jdbc";
161 | String identifier = "jdbc:some:stuf//database";
162 | JDBCScheme mockScheme = mock( JDBCScheme.class );
163 |
164 | JDBCFactory factory = new JDBCFactory();
165 |
166 | Properties props = new Properties();
167 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
168 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
169 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "" );
170 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "" );
171 |
172 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" );
173 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" );
174 |
175 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" );
176 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" );
177 |
178 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.UPDATE, props );
179 | assertEquals( mockScheme, tap.getScheme() );
180 | assertEquals( "myTable", tap.getTableName() );
181 | assertEquals( SinkMode.UPDATE, tap.getSinkMode() );
182 | TableDesc tdesc = tap.tableDesc;
183 |
184 | assertEquals( "myTable", tdesc.getTableName() );
185 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() );
186 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() );
187 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() );
188 |
189 | }
190 |
191 | @Test
192 | public void testCreateTapWithMissingTableInformation()
193 | {
194 | String protocol = "jdbc";
195 | String identifier = "jdbc:some:stuf//database";
196 | JDBCScheme mockScheme = mock( JDBCScheme.class );
197 |
198 | JDBCFactory factory = new JDBCFactory();
199 |
200 | Properties props = new Properties();
201 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" );
202 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" );
203 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "" );
204 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "" );
205 |
206 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" );
207 |
208 | String[] columnNames = new String [] {"id", "name", "lastname"};
209 | @SuppressWarnings("rawtypes")
210 | Class[] fieldTypes = new Class>[] {int.class, String.class, String.class};
211 |
212 | Fields fields = new Fields(columnNames, fieldTypes);
213 | when(mockScheme.getSinkFields()).thenReturn( fields );
214 |
215 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.UPDATE, props );
216 |
217 | TableDesc tdesc = tap.tableDesc;
218 | assertEquals( "myTable", tdesc.getTableName() );
219 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() );
220 | assertArrayEquals( new String[] { "int not null", "varchar(256)", "varchar(256)" }, tdesc.getColumnDefs() );
221 |
222 | }
223 |
224 | @Test
225 | public void testCreateScheme()
226 | {
227 | JDBCFactory factory = new JDBCFactory();
228 | Fields fields = new Fields( "ONE", "TWO", "THREE" );
229 |
230 | Properties schemeProperties = new Properties();
231 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" );
232 |
233 | Scheme, ?, ?, ?, ?> scheme = factory.createScheme( "someFormat", fields, schemeProperties );
234 | assertNotNull( scheme );
235 |
236 | JDBCScheme jdbcScheme = (JDBCScheme) scheme;
237 |
238 | assertArrayEquals( jdbcScheme.getColumns(), new String[] { "one", "two", "three" } );
239 |
240 | }
241 |
242 |
243 | @Test
244 | public void testCreateSchemeColumnsFromFields()
245 | {
246 | JDBCFactory factory = new JDBCFactory();
247 | Fields fields = new Fields( "one", "two", "three" );
248 |
249 | Properties schemeProperties = new Properties();
250 |
251 | Scheme, ?, ?, ?, ?> scheme = factory.createScheme( "someFormat", fields, schemeProperties );
252 | assertNotNull( scheme );
253 |
254 | JDBCScheme jdbcScheme = (JDBCScheme) scheme;
255 |
256 | assertArrayEquals( jdbcScheme.getColumns(), new String[] { "one", "two", "three" } );
257 |
258 | }
259 |
260 | @Test(expected = IllegalArgumentException.class)
261 | public void testCreateSchemeWithSelectNoCount()
262 | {
263 | JDBCFactory factory = new JDBCFactory();
264 | Fields fields = new Fields( "one", "two", "three" );
265 |
266 | Properties schemeProperties = new Properties();
267 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" );
268 | schemeProperties.setProperty( JDBCFactory.FORMAT_SELECT_QUERY, "select one, two, three from table" );
269 |
270 | factory.createScheme( "someFormat", fields, schemeProperties );
271 | }
272 |
273 | @Test
274 | public void testCreateSchemeWithSelectAndCount()
275 | {
276 | JDBCFactory factory = new JDBCFactory();
277 | Fields fields = new Fields( "one", "two", "three" );
278 |
279 | Properties schemeProperties = new Properties();
280 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" );
281 | schemeProperties.setProperty( JDBCFactory.FORMAT_SELECT_QUERY, "select one, two, three from table" );
282 | schemeProperties.setProperty( JDBCFactory.FORMAT_COUNT_QUERY, "select count(*) from table" );
283 |
284 | Scheme, ?, ?, ?, ?> scheme = factory.createScheme( "someFormat", fields, schemeProperties );
285 | assertNotNull( scheme );
286 | }
287 |
288 | @Test
289 | public void testGetInputFormatClass()
290 | {
291 | assertEquals(DBInputFormat.class, new JDBCFactory().getInputFormatClass());
292 | }
293 |
294 | }
295 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/JDBCSchemeTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import static org.junit.Assert.*;
24 | import static org.mockito.Mockito.*;
25 |
26 | import org.apache.hadoop.mapred.JobConf;
27 | import org.junit.Test;
28 |
29 | import cascading.flow.FlowProcess;
30 | import cascading.jdbc.db.DBInputFormat;
31 | import cascading.jdbc.db.DBOutputFormat;
32 | import cascading.tuple.Fields;
33 |
34 | public class JDBCSchemeTest
35 | {
36 |
37 | @SuppressWarnings("unchecked")
38 | @Test
39 | public void testPresentSinkFields()
40 | {
41 | String[] columnNames = new String[]{ "id", "firstname", "lastname" };
42 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, columnNames, null, null, -1, null, null,
43 | null );
44 |
45 | @SuppressWarnings("rawtypes")
46 | Class[] fieldTypes = new Class>[]{ int.class, String.class, String.class };
47 | Fields fields = new Fields( columnNames, fieldTypes );
48 | FlowProcess fp = mock( FlowProcess.class );
49 |
50 | JDBCTap tap = mock( JDBCTap.class );
51 |
52 | TableDesc desc = new TableDesc( "test_table" );
53 | when( tap.getTableDesc() ).thenReturn( desc );
54 |
55 | assertFalse( desc.hasRequiredTableInformation() );
56 |
57 | scheme.presentSinkFields( fp, tap, fields );
58 |
59 | assertTrue( desc.hasRequiredTableInformation() );
60 | assertEquals( fields, scheme.getSinkFields() );
61 |
62 | assertArrayEquals( columnNames, desc.getColumnNames() );
63 |
64 | assertArrayEquals( new String[]{ "int not null", "varchar(256)", "varchar(256)" }, desc.getColumnDefs() );
65 |
66 | }
67 |
68 | @SuppressWarnings("unchecked")
69 | @Test
70 | public void testPresentSinkFieldsWithNullColumns()
71 | {
72 | String[] columnNames = new String[]{ "id", "firstname", "lastname" };
73 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, null, null, null, -1, null, null, null );
74 |
75 | @SuppressWarnings("rawtypes")
76 | Class[] fieldTypes = new Class>[]{ int.class, String.class, String.class };
77 | Fields fields = new Fields( columnNames, fieldTypes );
78 | FlowProcess fp = mock( FlowProcess.class );
79 |
80 | JDBCTap tap = mock( JDBCTap.class );
81 |
82 | TableDesc desc = new TableDesc( "test_table" );
83 | when( tap.getTableDesc() ).thenReturn( desc );
84 |
85 | scheme.presentSinkFields( fp, tap, fields );
86 |
87 | assertTrue( desc.hasRequiredTableInformation() );
88 |
89 | assertArrayEquals( columnNames, scheme.getColumns() );
90 |
91 | }
92 |
93 | @SuppressWarnings("unchecked")
94 | @Test(expected = IllegalArgumentException.class)
95 | public void testPresentSinkFieldsWithFieldsMismatch()
96 | {
97 | String[] columnNames = new String[]{ "id", "firstname", "lastname" };
98 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, columnNames, null, null, -1, null, null,
99 | null );
100 |
101 | @SuppressWarnings("rawtypes")
102 | Class[] fieldTypes = new Class>[]{ int.class, String.class };
103 | Fields fields = new Fields( new String[]{ "id", "firstname" }, fieldTypes );
104 | FlowProcess fp = mock( FlowProcess.class );
105 |
106 | JDBCTap tap = mock( JDBCTap.class );
107 |
108 | TableDesc desc = new TableDesc( "test_table" );
109 | when( tap.getTableDesc() ).thenReturn( desc );
110 |
111 | scheme.presentSinkFields( fp, tap, fields );
112 |
113 | }
114 |
115 | }
116 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/TableDescTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.lingual.type.SQLDateCoercibleType;
24 | import cascading.tuple.Fields;
25 | import org.junit.Test;
26 |
27 | import static org.junit.Assert.*;
28 |
29 | public class TableDescTest
30 | {
31 |
32 | @Test
33 | public void testHasRequiredTableInformation()
34 | {
35 | TableDesc desc = new TableDesc( "name" );
36 | assertFalse( desc.hasRequiredTableInformation() );
37 |
38 | desc = new TableDesc( "name", null, null, null );
39 | assertFalse( desc.hasRequiredTableInformation() );
40 |
41 | desc = new TableDesc( "name", new String[]{ "id" }, null, null );
42 | assertFalse( desc.hasRequiredTableInformation() );
43 |
44 | desc = new TableDesc( "name", new String[]{ "id" }, new String[]{ "int" }, new String[] {"foo"} );
45 | assertTrue( desc.hasRequiredTableInformation() );
46 |
47 | }
48 |
49 | @Test
50 | public void testCompleteFromFields()
51 | {
52 | TableDesc desc = new TableDesc( "name" );
53 | assertFalse( desc.hasRequiredTableInformation() );
54 |
55 | Fields fields = new Fields( "id", int.class );
56 | desc.completeFromFields( fields );
57 |
58 | assertTrue( desc.hasRequiredTableInformation() );
59 |
60 | assertArrayEquals( new String[]{ "id" }, desc.getColumnNames() );
61 |
62 | assertArrayEquals( new String[]{ "int not null" }, desc.getColumnDefs() );
63 | }
64 |
65 | @Test
66 | public void testCompleteFromFieldsWithCoercibleType()
67 | {
68 | TableDesc desc = new TableDesc( "name" );
69 | assertFalse( desc.hasRequiredTableInformation() );
70 |
71 | Fields fields = new Fields( "creation_date", new SQLDateCoercibleType() );
72 | desc.completeFromFields( fields );
73 |
74 | assertTrue( desc.hasRequiredTableInformation() );
75 |
76 | assertArrayEquals( new String[]{ "creation_date" }, desc.getColumnNames() );
77 |
78 | assertArrayEquals( new String[]{ "date" }, desc.getColumnDefs() );
79 |
80 | }
81 |
82 | @Test(expected = IllegalArgumentException.class)
83 | public void testCompleteFromFieldsMissingType()
84 | {
85 | TableDesc desc = new TableDesc( "name" );
86 | assertFalse( desc.hasRequiredTableInformation() );
87 |
88 | Fields fields = new Fields( "id" );
89 | desc.completeFromFields( fields );
90 | }
91 |
92 | @Test(expected = IllegalStateException.class)
93 | public void testCompleteFromFieldsWithUnknownFields()
94 | {
95 | TableDesc desc = new TableDesc( "name" );
96 | assertFalse( desc.hasRequiredTableInformation() );
97 |
98 | Fields fields = Fields.UNKNOWN;
99 | desc.completeFromFields( fields );
100 | }
101 |
102 | }
103 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/java/cascading/jdbc/TupleRecordTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import static org.junit.Assert.*;
24 | import static org.mockito.Mockito.*;
25 |
26 | import java.sql.PreparedStatement;
27 | import java.sql.ResultSet;
28 | import java.sql.ResultSetMetaData;
29 | import java.sql.SQLException;
30 |
31 | import org.junit.Test;
32 |
33 | import cascading.tuple.Tuple;
34 |
35 | public class TupleRecordTest
36 | {
37 |
38 | @Test
39 | public void testTupleRecord()
40 | {
41 | Tuple tup = new Tuple();
42 | TupleRecord tupleRecord = new TupleRecord();
43 |
44 | tupleRecord.setTuple( tup );
45 | assertSame( tup, tupleRecord.getTuple() );
46 |
47 | }
48 |
49 | @Test
50 | public void testWrite() throws SQLException
51 | {
52 | Tuple t = new Tuple( "one", "two", "three" );
53 | PreparedStatement stmt = mock( PreparedStatement.class );
54 | TupleRecord tupleRecord = new TupleRecord( t );
55 | tupleRecord.write( stmt );
56 | verify( stmt ).setObject( 1, "one" );
57 | verify( stmt ).setObject( 2, "two" );
58 | verify( stmt ).setObject( 3, "three" );
59 | verifyNoMoreInteractions( stmt );
60 | }
61 |
62 | @Test
63 | public void testRead() throws SQLException
64 | {
65 | Tuple expectedTuple = new Tuple( "foo", "bar", "baz" );
66 |
67 | ResultSet resultSet = mock( ResultSet.class );
68 | ResultSetMetaData rsm = mock( ResultSetMetaData.class );
69 | when( rsm.getColumnCount() ).thenReturn( 3 );
70 | when( resultSet.getMetaData() ).thenReturn( rsm );
71 | when( resultSet.getObject( 1 ) ).thenReturn( "foo" );
72 | when( resultSet.getObject( 2 ) ).thenReturn( "bar" );
73 | when( resultSet.getObject( 3 ) ).thenReturn( "baz" );
74 |
75 | TupleRecord tupleRecord = new TupleRecord();
76 |
77 | tupleRecord.readFields( resultSet );
78 |
79 | Tuple result = tupleRecord.getTuple();
80 |
81 | assertEquals( expectedTuple, result );
82 |
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/resources/data/small.txt:
--------------------------------------------------------------------------------
1 | 1 a A
2 | 1 b B
3 | 1 c C
4 | 2 b B
5 | 2 c C
6 | 2 d D
7 | 3 c C
8 | 4 b B
9 | 4 c C
10 | 4 d D
11 | 5 a A
12 | 5 b B
13 | 5 e E
14 |
--------------------------------------------------------------------------------
/cascading-jdbc-core/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # log4j configuration used during build and unit tests
22 |
23 | log4j.rootLogger=info,stdout
24 | log4j.threshhold=ALL
25 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
27 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
28 |
29 | log4j.logger.cascading=INFO
--------------------------------------------------------------------------------
/cascading-jdbc-derby/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | ext.derbyVersion = "10.11.1.1"
24 |
25 | dependencies{
26 | compile project( ':cascading-jdbc-core' )
27 |
28 | compile( group: 'org.apache.derby', name: 'derby', version: derbyVersion )
29 | compile( group: 'org.apache.derby', name: 'derbyclient', version: derbyVersion )
30 | compile( group: 'org.apache.derby', name: 'derbynet', version: derbyVersion )
31 |
32 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
33 | }
34 |
35 | task configInfo() << {
36 | ( configurations.compile.minus([configurations.provided])).each{ entry ->
37 | println entry
38 | }
39 | }
40 |
41 |
42 | task myJar( type: Jar ) {
43 | classifier = 'provider'
44 | dependsOn configurations.runtime
45 | from {
46 | ( configurations.compile.minus([configurations.provided])).collect{
47 | it.isDirectory() ? it : zipTree(it)
48 | }
49 | } {
50 | exclude "META-INF/*.SF"
51 | exclude "META-INF/*.DSA"
52 | exclude "META-INF/*.RSA"
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/cascading-jdbc-derby/src/main/java/cascading/jdbc/DerbyFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.db.DBInputFormat;
24 | import cascading.jdbc.db.DerbyDBInputFormat;
25 |
26 | /**
27 | *
28 | */
29 | public class DerbyFactory extends JDBCFactory
30 | {
31 | @Override
32 | protected Class extends DBInputFormat> getInputFormatClass()
33 | {
34 | return DerbyDBInputFormat.class;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/cascading-jdbc-derby/src/main/java/cascading/jdbc/db/DerbyDBInputFormat.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc.db;
22 |
23 | import java.io.IOException;
24 | import java.sql.SQLException;
25 |
26 | import org.apache.hadoop.io.LongWritable;
27 | import org.apache.hadoop.mapred.JobConf;
28 | import org.apache.hadoop.mapred.RecordReader;
29 |
30 | /**
31 | *
32 | */
33 | public class DerbyDBInputFormat extends DBInputFormat
34 | {
35 |
36 | @Override
37 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
38 | {
39 | return new DerbyDBRecordReader( split, inputClass, job );
40 | }
41 |
42 | class DerbyDBRecordReader extends DBInputFormat.DBRecordReader
43 | {
44 | protected DerbyDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
45 | {
46 | super( split, inputClass, job );
47 | }
48 |
49 | /** Returns the query for selecting the records from an Oracle DB. */
50 | protected String getSelectQuery()
51 | {
52 | StringBuilder query = new StringBuilder();
53 |
54 | // derby-specific codepath
55 | if( dbConf.getInputQuery() == null )
56 | {
57 | query.append( "SELECT " );
58 |
59 | for( int i = 0; i < fieldNames.length; i++ )
60 | {
61 | query.append( fieldNames[ i ] );
62 | if( i != fieldNames.length - 1 )
63 | {
64 | query.append( ", " );
65 | }
66 | }
67 |
68 | query.append( " FROM " ).append( tableName );
69 | if( conditions != null && conditions.length() > 0 )
70 | query.append( " WHERE " ).append( conditions );
71 |
72 | String orderBy = dbConf.getInputOrderBy();
73 | if( orderBy != null && orderBy.length() > 0 )
74 | query.append( " ORDER BY " ).append( orderBy );
75 | }
76 | else
77 | {
78 | //PREBUILT QUERY
79 | query.append( dbConf.getInputQuery() );
80 | }
81 | try
82 | {
83 | if( split.getLength() > 0 && split.getStart() >= 0 )
84 | {
85 | query.append( " OFFSET " ).append( split.getStart() )
86 | .append( " ROWS FETCH NEXT " ).append( split.getEnd() - split.getStart() ).append( " ROWS ONLY" );
87 | }
88 | }
89 | catch( IOException ex )
90 | {
91 | // ignore, will not throw.
92 | }
93 |
94 | return query.toString();
95 | }
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/cascading-jdbc-derby/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=derby
23 | cascading.bind.provider.derby.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.derby.factory.classname=cascading.jdbc.DerbyFactory
27 |
28 | # the protocol is jdbc
29 | cascading.bind.provider.derby.protocol.names=jdbc
30 | cascading.bind.provider.derby.protocol.jdbc.schemes=derby
31 | cascading.bind.provider.derby.protocol.jdbc.jdbcdriver=org.apache.derby.jdbc.ClientDriver
32 | cascading.bind.provider.derby.protocol.jdbc.tabledescseparator=:
33 | cascading.bind.provider.derby.protocol.jdbc.jdbcuser=
34 | cascading.bind.provider.derby.protocol.jdbc.jdbcpassword=
35 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.tablename=
36 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.columnnames=
37 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.columndefs=
38 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.primarykeys=
39 | cascading.bind.provider.derby.protocol.jdbc.sinkmode=
40 |
41 | # the format is derby
42 | cascading.bind.provider.derby.format.names=derby
43 | cascading.bind.provider.derby.format.derby.protocols=jdbc
44 | cascading.bind.provider.derby.format.derby.separator=:
45 | cascading.bind.provider.derby.format.derby.columnnames=
46 | cascading.bind.provider.derby.format.derby.orderBy=
47 | cascading.bind.provider.derby.format.derby.conditions=
48 | cascading.bind.provider.derby.format.derby.limit=
49 | cascading.bind.provider.derby.format.derby.updateBy=
50 | cascading.bind.provider.derby.format.derby.tableAlias=
51 | cascading.bind.provider.derby.format.derby.selectquery=
52 | cascading.bind.provider.derby.format.derby.countquery=
53 |
--------------------------------------------------------------------------------
/cascading-jdbc-derby/src/test/java/cascading/jdbc/DerbyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.io.PrintWriter;
24 | import java.net.InetAddress;
25 |
26 | import cascading.jdbc.db.DerbyDBInputFormat;
27 | import org.apache.derby.drda.NetworkServerControl;
28 | import org.junit.After;
29 | import org.junit.Before;
30 |
31 | /**
32 | * This class runs the tests against an in network instance of apache derby:
33 | * http://db.apache.org/derby/
34 | * */
35 | public class DerbyTest extends JDBCTestingBase
36 | {
37 |
38 | private final int PORT = 9006;
39 | private NetworkServerControl serverControl;
40 |
41 |
42 |
43 | @Before
44 | public void setUp() throws Exception
45 | {
46 | System.setProperty( "derby.storage.rowLocking", "true" );
47 | System.setProperty( "derby.locks.monitor", "true" );
48 | System.setProperty( "derby.locks.deadlockTrace", "true" );
49 | System.setProperty( "derby.system.home", "build/derby" );
50 |
51 | serverControl = new NetworkServerControl( InetAddress.getByName( "localhost" ), PORT );
52 | serverControl.start( new PrintWriter(System.out,true ) );
53 |
54 | setDriverName( "org.apache.derby.jdbc.ClientDriver" );
55 | setJdbcurl( String.format("jdbc:derby://localhost:%s/testing;create=true", PORT) );
56 | setInputFormatClass( DerbyDBInputFormat.class );
57 | setFactory( new DerbyFactory() );
58 |
59 | }
60 |
61 | @After
62 | public void tearDown() throws Exception
63 | {
64 | serverControl.shutdown();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/cascading-jdbc-h2/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | dependencies{
24 | compile project( ':cascading-jdbc-core' )
25 |
26 | compile( group: 'com.h2database', name:'h2', version: '1.3.173' )
27 |
28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
29 | }
30 |
31 |
--------------------------------------------------------------------------------
/cascading-jdbc-h2/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=h2
23 | cascading.bind.provider.h2.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.h2.factory.classname=cascading.provider.JDBCFactory
27 |
28 | # the protocol is jdbc
29 | cascading.bind.provider.h2.protocol.names=jdbc
30 | cascading.bind.provider.h2.protocol.jdbc.schemes=h2
31 | cascading.bind.provider.h2.protocol.jdbc.jdbcdriver=org.h2.Driver
32 | cascading.bind.provider.h2.protocol.jdbc.tabledescseparator=:
33 | cascading.bind.provider.h2.protocol.jdbc.jdbcuser=
34 | cascading.bind.provider.h2.protocol.jdbc.jdbcpassword=
35 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.tablename=
36 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.columnnames=
37 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.columndefs=
38 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.primarykeys=
39 | cascading.bind.provider.h2.protocol.jdbc.sinkmode=
40 |
41 | # the format is h2
42 | cascading.bind.provider.h2.format.names=h2
43 | cascading.bind.provider.h2.format.h2.protocols=jdbc
44 | cascading.bind.provider.h2.format.h2.separator=:
45 | cascading.bind.provider.h2.format.h2.columnnames=
46 | cascading.bind.provider.h2.format.h2.orderBy=
47 | cascading.bind.provider.h2.format.h2.conditions=
48 | cascading.bind.provider.h2.format.h2.limit=
49 | cascading.bind.provider.h2.format.h2.updateBy=
50 | cascading.bind.provider.h2.format.h2.tableAlias=
51 | cascading.bind.provider.h2.format.h2.selectquery=
52 | cascading.bind.provider.h2.format.h2.countquery=
53 |
54 |
--------------------------------------------------------------------------------
/cascading-jdbc-h2/src/test/java/cascading/jdbc/H2Test.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import org.junit.Before;
24 |
25 | /**
26 | * Runs the tests against an instance of h2:
27 | * http://www.h2database.com/html/main.html
28 | * */
29 | public class H2Test extends JDBCTestingBase
30 | {
31 |
32 | @Before
33 | public void setUp()
34 | {
35 | setDriverName( "org.h2.Driver" );
36 | setJdbcurl( "jdbc:h2:mem:testing;DB_CLOSE_DELAY=-1;MVCC=true" );
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | dependencies{
24 | compile project( ':cascading-jdbc-core' )
25 |
26 | compile( group: 'mysql', name: 'mysql-connector-java', version: '5.1.34' )
27 |
28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
29 |
30 | testRuntime ( group: 'mysql', name: 'mysql-connector-java', version: '5.1.34' )
31 | }
32 |
33 | test{
34 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.mysql" ) )
35 | }
36 |
37 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/java/cascading/jdbc/MySqlFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.util.Properties;
24 |
25 | import cascading.jdbc.db.DBOutputFormat;
26 | import cascading.jdbc.db.DBInputFormat;
27 | import cascading.jdbc.db.MySqlDBOutputFormat;
28 | import cascading.jdbc.db.MySqlDBInputFormat;
29 | import cascading.scheme.Scheme;
30 | import cascading.tuple.Fields;
31 |
32 | /**
33 | * Subclass of JDBCFactory with mysql specific behaviour.
34 | */
35 | public class MySqlFactory extends JDBCFactory
36 | {
37 | public static final String PROTOCOL_REPLACE_ON_INSERT = "replaceoninsert";
38 |
39 | @Override
40 | protected Class extends DBOutputFormat> getOutputFormClass()
41 | {
42 | return MySqlDBOutputFormat.class;
43 | }
44 |
45 | @Override
46 | protected Class extends DBInputFormat> getInputFormatClass()
47 | {
48 | return MySqlDBInputFormat.class;
49 | }
50 |
51 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions,
52 | String[] updateBy, Fields updateByFields, String[] orderBy, Properties properties )
53 | {
54 | boolean replaceOnInsert = false;
55 | String replaceOnInsertProperty = properties.getProperty( PROTOCOL_REPLACE_ON_INSERT );
56 | if( replaceOnInsertProperty != null && !replaceOnInsertProperty.isEmpty() )
57 | replaceOnInsert = Boolean.parseBoolean( replaceOnInsertProperty );
58 |
59 | return new MySqlScheme( getInputFormatClass(), getOutputFormClass(), fields, columnNames, orderBy, conditions, limit, updateByFields,
60 | updateBy, tableAlias, replaceOnInsert );
61 | }
62 |
63 | protected Scheme createScheme( Fields fields, String selectQuery, String countQuery, long limit, String[] columnNames, Boolean tableAlias )
64 | {
65 | return new MySqlScheme( getInputFormatClass(), fields, columnNames, selectQuery, countQuery, limit, tableAlias );
66 | }
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/java/cascading/jdbc/MySqlScheme.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.db.DBOutputFormat;
24 | import cascading.tuple.Fields;
25 | import org.apache.hadoop.conf.Configuration;
26 | import org.apache.hadoop.mapred.JobConf;
27 | import org.apache.hadoop.mapred.OutputCollector;
28 | import org.apache.hadoop.mapred.RecordReader;
29 |
30 | import cascading.flow.FlowProcess;
31 | import cascading.jdbc.db.DBInputFormat;
32 | import cascading.jdbc.db.MySqlDBOutputFormat;
33 | import cascading.jdbc.db.MySqlDBConfiguration;
34 | import cascading.tap.Tap;
35 |
36 | public class MySqlScheme extends JDBCScheme
37 | {
38 |
39 | /**
40 | * If true, will use mysql's 'ON DUPLICATE KEY UPDATE' to update existing rows with the same key
41 | * with the new data. See http://dev.mysql.com/doc/refman/5.0/en/insert-on-duplicate.html.
42 | */
43 | private boolean replaceOnInsert = false;
44 |
45 | /**
46 | * Constructor MySqlScheme creates a new MySqlScheme instance.
47 | *
48 | * Specify replaceOnInsert if you want to change the default insert behavior.
49 | *
50 | * @param inputFormatClass of type Class extends DBInputFormat>
51 | * @param columns of type String[]
52 | * @param orderBy of type String[]
53 | * @param conditions of type String
54 | * @param updateBy of type String[]
55 | * @param replaceOnInsert of type boolean
56 | */
57 | public MySqlScheme( Class extends DBInputFormat> inputFormatClass, String[] columns, String[] orderBy,
58 | String conditions, String[] updateBy, boolean replaceOnInsert )
59 | {
60 | super( inputFormatClass, MySqlDBOutputFormat.class, columns, orderBy, conditions, -1, updateBy );
61 | this.replaceOnInsert = replaceOnInsert;
62 | }
63 |
64 |
65 | /**
66 | * Constructor MysqlScheme creates a new MysqlScheme instance.
67 | *
68 | * Specify replaceOnInsert if you want to change the default insert behavior.
69 | *
70 | * @param inputFormatClass of type Class extends DBInputFormat>
71 | * @param outputFormatClass of type Class extends DBOutputFormat>
72 | * @param columnFields of type Fields
73 | * @param columnNames of type String[]
74 | * @param orderBy of type String[]
75 | * @param conditions of type String
76 | * @param limit of type long
77 | * @param updateByFields of type Fields
78 | * @param updateBy of type String[]
79 | * @param tableAlias of type boolean
80 | */
81 | public MySqlScheme( Class extends DBInputFormat> inputFormatClass, Class extends DBOutputFormat> outputFormatClass,
82 | Fields columnFields, String[] columnNames, String[] orderBy, String conditions,
83 | long limit, Fields updateByFields, String[] updateBy, boolean tableAlias, boolean replaceOnInsert )
84 | {
85 | super( inputFormatClass, outputFormatClass, columnFields, columnNames, orderBy, conditions, limit, updateByFields, updateBy, tableAlias );
86 | this.replaceOnInsert = replaceOnInsert;
87 | }
88 |
89 | public MySqlScheme( Class extends DBInputFormat> inputFormatClass, Fields fields, String[] columnNames, String selectQuery,
90 | String countQuery, long limit, boolean tableAlias )
91 | {
92 | super( inputFormatClass, fields, columnNames, selectQuery, countQuery, limit, tableAlias );
93 | }
94 |
95 | @Override
96 | public void sinkConfInit( FlowProcess extends Configuration> process, Tap tap, Configuration configuration )
97 | {
98 | MySqlDBConfiguration conf = new MySqlDBConfiguration( configuration );
99 | conf.setReplaceOnInsert( replaceOnInsert );
100 |
101 | super.sinkConfInit( process, tap, configuration );
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBConfiguration.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc.db;
22 |
23 | import org.apache.hadoop.conf.Configuration;
24 |
25 | public class MySqlDBConfiguration
26 | {
27 |
28 | /** Boolean to use ON DUPLICATE KEY UPDATE for INSERTs when outputting tuples to MySQL. */
29 | public static final String REPLACE_ON_INSERT = "mapred.jdbc.output.replace.on.insert";
30 |
31 | private Configuration configuration;
32 |
33 | public MySqlDBConfiguration( Configuration configuration )
34 | {
35 | this.configuration = configuration;
36 | }
37 |
38 | public boolean getReplaceOnInsert()
39 | {
40 | return configuration.getBoolean( MySqlDBConfiguration.REPLACE_ON_INSERT, false );
41 | }
42 |
43 | public void setReplaceOnInsert( boolean replaceOnInsert )
44 | {
45 | configuration.setBoolean( MySqlDBConfiguration.REPLACE_ON_INSERT, replaceOnInsert );
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBInputFormat.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc.db;
22 |
23 | import java.sql.ResultSet;
24 | import java.sql.SQLException;
25 | import java.sql.Statement;
26 |
27 | import org.apache.hadoop.mapred.JobConf;
28 | import org.apache.hadoop.mapred.InputFormat;
29 | import org.apache.hadoop.mapred.RecordReader;
30 | import org.apache.hadoop.io.LongWritable;
31 | import org.apache.hadoop.io.Writable;
32 | import org.apache.hadoop.mapred.*;
33 |
34 | import java.io.IOException;
35 |
36 | public class MySqlDBInputFormat extends DBInputFormat
37 | {
38 |
39 | protected class MySqlDBRecordReader extends DBRecordReader
40 | {
41 | protected MySqlDBRecordReader( DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
42 | {
43 | super( split, inputClass, job );
44 | }
45 |
46 | @Override
47 | protected Statement createStatement() throws SQLException
48 | {
49 | Statement statement = connection.createStatement( ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY );
50 | statement.setFetchSize( Integer.MIN_VALUE );
51 | return statement;
52 | }
53 | }
54 |
55 | @Override
56 | protected RecordReader getRecordReaderInternal( DBInputSplit split, Class inputClass, JobConf job ) throws SQLException,
57 | IOException
58 | {
59 | return new MySqlDBRecordReader( split, inputClass, job );
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBOutputFormat.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc.db;
22 |
23 | import java.io.IOException;
24 | import java.util.Collections;
25 |
26 | import org.apache.hadoop.fs.FileSystem;
27 | import org.apache.hadoop.mapred.JobConf;
28 | import org.apache.hadoop.mapred.OutputFormat;
29 | import org.apache.hadoop.mapred.RecordWriter;
30 | import org.apache.hadoop.util.Progressable;
31 |
32 | public class MySqlDBOutputFormat extends DBOutputFormat
33 | {
34 |
35 | private boolean replaceOnInsert = false;
36 |
37 | /** {@inheritDoc} */
38 | public RecordWriter getRecordWriter( FileSystem filesystem, JobConf job, String name, Progressable progress ) throws IOException
39 | {
40 | MySqlDBConfiguration dbConf = new MySqlDBConfiguration( job );
41 | replaceOnInsert = dbConf.getReplaceOnInsert();
42 |
43 | return super.getRecordWriter( filesystem, job, name, progress );
44 | }
45 |
46 | /** {@inheritDoc} */
47 | @Override
48 | protected String constructInsertQuery( String table, String[] fieldNames )
49 | {
50 | StringBuilder query = new StringBuilder( super.constructInsertQuery( table, fieldNames ) );
51 | if( replaceOnInsert )
52 | {
53 | query.append( " ON DUPLICATE KEY UPDATE " );
54 | for( int i = 0; i < fieldNames.length; i++ )
55 | {
56 | query.append( String.format( "%s=VALUES(%s)", fieldNames[i], fieldNames[i] ) );
57 | if( i != fieldNames.length - 1 )
58 | {
59 | query.append( "," ); }
60 | }
61 | }
62 | return query.toString();
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=mysql
23 | cascading.bind.provider.mysql.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.mysql.factory.classname=cascading.jdbc.MySqlFactory
27 |
28 | # protocol is jdbc
29 | cascading.bind.provider.mysql.protocol.names=jdbc
30 | cascading.bind.provider.mysql.protocol.jdbc.schemes=mysql
31 | cascading.bind.provider.mysql.protocol.jdbc.tableExistsQuery=SHOW TABLES LIKE '%s'
32 | cascading.bind.provider.mysql.protocol.jdbc.jdbcdriver=com.mysql.jdbc.Driver
33 | cascading.bind.provider.mysql.protocol.jdbc.tabledescseparator=:
34 | cascading.bind.provider.mysql.protocol.jdbc.jdbcuser=
35 | cascading.bind.provider.mysql.protocol.jdbc.jdbcpassword=
36 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.tablename=
37 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.columnnames=
38 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.columndefs=
39 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.primarykeys=
40 | cascading.bind.provider.mysql.protocol.jdbc.sinkmode=
41 |
42 | # format is mysql
43 | cascading.bind.provider.mysql.format.names=mysql
44 | cascading.bind.provider.mysql.format.mysql.protocols=jdbc
45 | cascading.bind.provider.mysql.format.mysql.separator=:
46 | cascading.bind.provider.mysql.format.mysql.columnnames=
47 | cascading.bind.provider.mysql.format.mysql.orderBy=
48 | cascading.bind.provider.mysql.format.mysql.conditions=
49 | cascading.bind.provider.mysql.format.mysql.limit=
50 | cascading.bind.provider.mysql.format.mysql.updateBy=
51 | cascading.bind.provider.mysql.format.mysql.tableAlias=
52 | cascading.bind.provider.mysql.format.mysql.selectquery=
53 | cascading.bind.provider.mysql.format.mysql.countquery=
54 | cascading.bind.provider.mysql.format.mysql.replaceoninsert=false
55 |
56 |
--------------------------------------------------------------------------------
/cascading-jdbc-mysql/src/test/java/cascading/jdbc/MysqlTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.db.MySqlDBInputFormat;
24 | import org.junit.Before;
25 |
26 | /**
27 | * Runs the tests against an instance of mysql
28 | * */
29 | public class MysqlTest extends JDBCTestingBase
30 | {
31 |
32 | @Before
33 | public void setUp()
34 | {
35 | setDriverName( "com.mysql.jdbc.Driver" );
36 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) );
37 | setFactory( new MySqlFactory() );
38 | setInputFormatClass( MySqlDBInputFormat.class );
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | dependencies{
24 | compile project( ':cascading-jdbc-core' )
25 |
26 | compile( group: 'com.oracle', name: 'ojdbc6', version: '11.2.0.4' )
27 |
28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
29 | }
30 |
31 | test{
32 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.oracle" ) )
33 | }
34 |
35 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/src/main/java/cascading/jdbc/OracleJDBCFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import cascading.jdbc.db.DBInputFormat;
23 | import cascading.jdbc.db.OracleDBInputFormat;
24 |
25 | /**
26 | * Oracle specific subclass of {@link JDBCFactory}.
27 | * */
28 | public class OracleJDBCFactory extends JDBCFactory
29 | {
30 | @Override
31 | protected Class extends DBInputFormat> getInputFormatClass()
32 | {
33 | return OracleDBInputFormat.class;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/src/main/java/cascading/jdbc/db/OracleDBInputFormat.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | /*
19 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
20 | *
21 | * Project and contact information: http://www.cascading.org/
22 | *
23 | * This file is part of the Cascading project.
24 | *
25 | * Licensed under the Apache License, Version 2.0 (the "License");
26 | * you may not use this file except in compliance with the License.
27 | * You may obtain a copy of the License at
28 | *
29 | * http://www.apache.org/licenses/LICENSE-2.0
30 | *
31 | * Unless required by applicable law or agreed to in writing, software
32 | * distributed under the License is distributed on an "AS IS" BASIS,
33 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34 | * See the License for the specific language governing permissions and
35 | * limitations under the License.
36 | */
37 |
38 | package cascading.jdbc.db;
39 |
40 | import java.io.IOException;
41 | import java.sql.SQLException;
42 |
43 | import org.apache.hadoop.io.LongWritable;
44 | import org.apache.hadoop.mapred.JobConf;
45 | import org.apache.hadoop.mapred.RecordReader;
46 |
47 | @SuppressWarnings("rawtypes")
48 | public class OracleDBInputFormat extends DBInputFormat
49 | {
50 | @Override
51 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
52 | {
53 | return new OracleDBRecordReader( split, inputClass, job );
54 | }
55 |
56 | class OracleDBRecordReader extends DBInputFormat.DBRecordReader
57 | {
58 | protected OracleDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
59 | {
60 | super( split, inputClass, job );
61 | }
62 |
63 | /** Returns the query for selecting the records from an Oracle DB. */
64 | protected String getSelectQuery()
65 | {
66 | StringBuilder query = new StringBuilder();
67 |
68 | // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET.
69 | if( dbConf.getInputQuery() == null )
70 | {
71 | query.append( "SELECT " );
72 |
73 | for( int i = 0; i < fieldNames.length; i++ )
74 | {
75 | query.append( fieldNames[ i ] );
76 | if( i != fieldNames.length - 1 )
77 | {
78 | query.append( ", " );
79 | }
80 | }
81 |
82 | query.append( " FROM " ).append( tableName );
83 | if( conditions != null && conditions.length() > 0 )
84 | query.append( " WHERE " ).append( conditions );
85 |
86 | String orderBy = dbConf.getInputOrderBy();
87 | if( orderBy != null && orderBy.length() > 0 )
88 | query.append( " ORDER BY " ).append( orderBy );
89 |
90 | }
91 | else
92 | {
93 | //PREBUILT QUERY
94 | query.append( dbConf.getInputQuery() );
95 | }
96 |
97 | try
98 | {
99 |
100 | if( split.getLength() > 0 && split.getStart() >= 0 )
101 | {
102 | String querystring = query.toString();
103 |
104 | query = new StringBuilder();
105 | query.append( "SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( " );
106 | query.append( querystring );
107 | query.append( " ) a WHERE rownum <= " ).append( split.getStart() );
108 | query.append( " + " ).append( split.getLength() );
109 | query.append( " ) WHERE dbif_rno >= " ).append( split.getStart() + 1 );
110 | }
111 | }
112 | catch( IOException ex )
113 | {
114 | // ignore, will not throw.
115 | }
116 |
117 | return query.toString();
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=oracle
23 | cascading.bind.provider.oracle.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.oracle.factory.classname=cascading.jdbc.OracleJDBCFactory
27 |
28 | # protocol is jdbc
29 | cascading.bind.provider.oracle.protocol.names=jdbc
30 | cascading.bind.provider.oracle.protocol.jdbc.schemes=oracle
31 | cascading.bind.provider.oracle.protocol.jdbc.jdbcdriver=oracle.jdbc.OracleDriver
32 | cascading.bind.provider.oracle.protocol.jdbc.tabledescseparator=:
33 | cascading.bind.provider.oracle.protocol.jdbc.jdbcuser=
34 | cascading.bind.provider.oracle.protocol.jdbc.jdbcpassword=
35 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.tablename=
36 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.columnnames=
37 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.columndefs=
38 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.primarykeys=
39 | cascading.bind.provider.oracle.protocol.jdbc.sinkmode=
40 |
41 | # format is oracle
42 | cascading.bind.provider.oracle.format.names=oracle
43 | cascading.bind.provider.oracle.format.oracle.protocols=jdbc
44 | cascading.bind.provider.oracle.format.oracle.separator=:
45 | cascading.bind.provider.oracle.format.oracle.columnnames=
46 | cascading.bind.provider.oracle.format.oracle.orderBy=
47 | cascading.bind.provider.oracle.format.oracle.conditions=
48 | cascading.bind.provider.oracle.format.oracle.limit=
49 | cascading.bind.provider.oracle.format.oracle.updateBy=
50 | cascading.bind.provider.oracle.format.oracle.tableAlias=
51 | cascading.bind.provider.oracle.format.oracle.selectquery=
52 | cascading.bind.provider.oracle.format.oracle.countquery=
53 |
54 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/src/test/java/cascading/jdbc/OracleJDBCFactoryTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import static org.junit.Assert.*;
23 |
24 | import org.junit.Test;
25 |
26 | import cascading.jdbc.db.OracleDBInputFormat;
27 |
28 | public class OracleJDBCFactoryTest
29 | {
30 |
31 | @Test
32 | public void testGetInputFormatClass()
33 | {
34 | assertEquals(OracleDBInputFormat.class, new OracleJDBCFactory().getInputFormatClass());
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/cascading-jdbc-oracle/src/test/java/cascading/jdbc/OracleTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 | package cascading.jdbc;
21 |
22 | import org.junit.Before;
23 |
24 | import cascading.jdbc.db.OracleDBInputFormat;
25 |
26 | /**
27 | * Tests against an oracle database.
28 | * */
29 | public class OracleTest extends JDBCTestingBase
30 | {
31 |
32 | @Before
33 | public void setUp()
34 | {
35 | setDriverName( "oracle.jdbc.OracleDriver" );
36 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) );
37 | setInputFormatClass( OracleDBInputFormat.class );
38 | setFactory( new OracleJDBCFactory() );
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/cascading-jdbc-postgresql/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | dependencies{
24 | compile project( ':cascading-jdbc-core' )
25 |
26 | compile( group: 'postgresql', name: 'postgresql', version: '9.1-901-1.jdbc4' )
27 |
28 | testCompile ( group: 'postgresql', name: 'postgresql', version: '9.1-901-1.jdbc4' )
29 |
30 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
31 | }
32 |
33 | test{
34 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.postgresql" ) )
35 | }
36 |
37 |
--------------------------------------------------------------------------------
/cascading-jdbc-postgresql/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=postgresql
23 | cascading.bind.provider.postgresql.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.postgresql.factory.classname=cascading.jdbc.JDBCFactory
27 |
28 | # protocol is jdbc
29 | cascading.bind.provider.postgresql.protocol.names=jdbc
30 | cascading.bind.provider.postgresql.protocol.jdbc.schemes=postgresql
31 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcdriver=org.postgresql.Driver
32 | cascading.bind.provider.postgresql.protocol.jdbc.tabledescseparator=:
33 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcuser=
34 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcpassword=
35 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.tablename=
36 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columnnames=
37 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columndefs=
38 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.primarykeys=
39 | cascading.bind.provider.postgresql.protocol.jdbc.sinkmode=
40 |
41 | # format is postgresql
42 | cascading.bind.provider.postgresql.format.names=postgresql
43 | cascading.bind.provider.postgresql.format.postgres.protocols=jdbc
44 | cascading.bind.provider.postgresql.format.postgres.separator=:
45 | cascading.bind.provider.postgresql.format.postgres.columnnames=
46 | cascading.bind.provider.postgresql.format.postgres.orderBy=
47 | cascading.bind.provider.postgresql.format.postgres.conditions=
48 | cascading.bind.provider.postgresql.format.postgres.limit=
49 | cascading.bind.provider.postgresql.format.postgres.updateBy=
50 | cascading.bind.provider.postgresql.format.postgres.tableAlias=
51 | cascading.bind.provider.postgresql.format.postgres.selectquery=
52 | cascading.bind.provider.postgresql.format.postgres.countquery=
53 |
54 |
--------------------------------------------------------------------------------
/cascading-jdbc-postgresql/src/test/java/cascading/jdbc/PostgresTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import org.junit.Before;
24 |
25 | /**
26 | * Runs the tests against postgres.
27 | * */
28 | public class PostgresTest extends JDBCTestingBase
29 | {
30 |
31 | @Before
32 | public void setUp()
33 | {
34 | setDriverName( "org.postgresql.Driver" );
35 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) );
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | ext.redshiftPostgresVersion = '8.4-702.jdbc4'
24 |
25 | dependencies {
26 |
27 | compile project( ':cascading-jdbc-core' )
28 | compile group: 'postgresql', name: 'postgresql', version: redshiftPostgresVersion
29 |
30 | testCompile ( group: 'postgresql', name: 'postgresql', version: redshiftPostgresVersion )
31 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
32 | }
33 |
34 | configurations {
35 | sampleCode {
36 | extendsFrom compile
37 | }
38 | }
39 |
40 | test{
41 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.redshift" ) )
42 | }
43 |
44 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/AWSCredentials.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.io.Serializable;
24 |
25 | /** Holder for the AWS credentials. {@link Serializable} is required for EMR use. */
26 | public class AWSCredentials implements Serializable
27 | {
28 | private String awsAccessKey;
29 | private String awsSecretKey;
30 |
31 | public final static AWSCredentials RUNTIME_DETERMINED = new AWSCredentials( AWSCredentials.class.getName(), AWSCredentials.class.getName() );
32 |
33 | public AWSCredentials( String awsAccessKey, String awsSecretKey )
34 | {
35 | this.awsAccessKey = awsAccessKey;
36 | this.awsSecretKey = awsSecretKey;
37 | }
38 |
39 | public String getAwsAccessKey()
40 | {
41 | return awsAccessKey;
42 | }
43 |
44 | public String getAwsSecretKey()
45 | {
46 | return awsSecretKey;
47 | }
48 |
49 | public boolean isBlank() {
50 | return awsAccessKey == null && awsSecretKey == null;
51 | }
52 |
53 | @Override
54 | public boolean equals( Object object )
55 | {
56 | if( this == object )
57 | return true;
58 |
59 | if( !( object instanceof AWSCredentials ) )
60 | return false;
61 |
62 | AWSCredentials that = (AWSCredentials) object;
63 |
64 | if( awsAccessKey != null ? !awsAccessKey.equals( that.awsAccessKey ) : that.awsAccessKey != null )
65 | return false;
66 | if( awsSecretKey != null ? !awsSecretKey.equals( that.awsSecretKey ) : that.awsSecretKey != null )
67 | return false;
68 |
69 | return true;
70 | }
71 |
72 | @Override
73 | public int hashCode()
74 | {
75 | int result = awsAccessKey != null ? awsAccessKey.hashCode() : 0;
76 | result = 31 * result + ( awsSecretKey != null ? awsSecretKey.hashCode() : 0 );
77 | return result;
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/InvalidCodepointForRedshiftException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | /** Indicates that a line had a codepoint that */
24 |
25 |
26 | public class InvalidCodepointForRedshiftException extends RuntimeException
27 | {
28 |
29 | private final String originalString;
30 |
31 | public InvalidCodepointForRedshiftException( String originalString )
32 | {
33 | this.originalString = originalString;
34 | }
35 |
36 | @Override
37 | public String getMessage()
38 | {
39 | return String.format( "The string contains characters not allowed in a Redshift DB. Original string: \"%s\"", originalString );
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.util.HashMap;
24 | import java.util.Map;
25 | import java.util.Properties;
26 |
27 | import cascading.scheme.Scheme;
28 | import cascading.tap.SinkMode;
29 | import cascading.tap.Tap;
30 | import cascading.tuple.Fields;
31 | import cascading.util.Util;
32 | import org.slf4j.Logger;
33 | import org.slf4j.LoggerFactory;
34 |
35 | /**
36 | * The {@link RedshiftFactory} is a factory class to create {@link RedshiftTap}s
37 | * and {@link RedshiftScheme}s. The class is meant to be used by lingual for dynamically creating
39 | * Taps and Schemes, so that redshift can be used as a provider within lingual.
42 | */
43 | public class RedshiftFactory extends JDBCFactory
44 | {
45 |
46 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftFactory.class );
47 |
48 | /** environment variable for the aws access key */
49 | private static final String SYSTEM_AWS_ACCESS_KEY = "AWS_ACCESS_KEY";
50 |
51 | /** environment variable for the aws secret key */
52 | private static final String SYSTEM_AWS_SECRET_KEY = "AWS_SECRET_KEY";
53 |
54 | public static final String PROTOCOL_S3_OUTPUT_PATH = "s3outputpath";
55 | public static final String PROTOCOL_AWS_ACCESS_KEY = "awsacceskey";
56 | public static final String PROTOCOL_AWS_SECRET_KEY = "awssecretkey";
57 | ;
58 | public static final String PROTOCOL_KEEP_DEBUG_HFS_DATA = "keepdebughfsdata";
59 | public static final String PROTOCOL_USE_DIRECT_INSERT = "usedirectinsert";
60 |
61 | public static final String FORMAT_DISTRIBUTION_KEY = "distributionkey";
62 | public static final String FORMAT_SORT_KEYS = "sortkeys";
63 | public static final String FORMAT_COPY_OPTIONS_PREFIX = "copyoptions.";
64 | public static final String FORMAT_FIELD_DELIMITER = "fielddelimiter";
65 | public static final String FORMAT_QUOTE_CHARACTER = "quotecharacter";
66 |
67 | @SuppressWarnings("unused")
68 | public String getDescription()
69 | {
70 | return getClass().getSimpleName();
71 | }
72 |
73 | @SuppressWarnings("rawtypes")
74 | public Scheme createScheme( String format, Fields fields, Properties formatProperties )
75 | {
76 | LOG.info( "creating RedshiftScheme for format {} with fields {} and properties {}", format, fields, formatProperties );
77 |
78 | String delimiter = formatProperties.getProperty( FORMAT_FIELD_DELIMITER, RedshiftScheme.DEFAULT_DELIMITER );
79 | String quoteCharacter = formatProperties.getProperty( FORMAT_QUOTE_CHARACTER, RedshiftScheme.DEFAULT_QUOTE );
80 |
81 | RedshiftTableDesc redshiftTableDesc = createTableDescFromProperties( fields, formatProperties, true );
82 |
83 | Map copyOptions = extractCopyOptions( formatProperties, FORMAT_COPY_OPTIONS_PREFIX );
84 |
85 | boolean tableAlias = getTableAlias( formatProperties );
86 |
87 | return new RedshiftScheme( fields, redshiftTableDesc, delimiter, quoteCharacter, copyOptions, tableAlias );
88 | }
89 |
90 | @SuppressWarnings("rawtypes")
91 | public Tap createTap( String protocol, Scheme scheme, String identifier, SinkMode sinkMode, Properties protocolProperties )
92 | {
93 | LOG.info( "creating RedshiftTap with properties {} in mode {}", protocolProperties, sinkMode );
94 |
95 | String jdbcUserProperty = protocolProperties.getProperty( PROTOCOL_JDBC_USER );
96 | String jdbcPasswordProperty = protocolProperties.getProperty( PROTOCOL_JDBC_PASSWORD );
97 |
98 | String jdbcUser = null;
99 | if( !Util.isEmpty( jdbcUserProperty ) )
100 | jdbcUser = jdbcUserProperty;
101 |
102 | String jdbcPassword = null;
103 | if( !Util.isEmpty( jdbcPasswordProperty ) )
104 | jdbcPassword = jdbcPasswordProperty;
105 |
106 | String hfsStagingDir = protocolProperties.getProperty( PROTOCOL_S3_OUTPUT_PATH, "/tmp" );
107 |
108 | AWSCredentials credentials = determineAwsCredentials( protocolProperties );
109 |
110 | boolean keepDebugHdfsData = Boolean.parseBoolean( protocolProperties.getProperty( PROTOCOL_KEEP_DEBUG_HFS_DATA ) );
111 | boolean useDirectInsert = Boolean.parseBoolean( protocolProperties.getProperty( PROTOCOL_USE_DIRECT_INSERT, "true" ) );
112 |
113 | // source fields will be the JDBC-typed fields so use them as defaults.
114 | RedshiftTableDesc redshiftTableDesc = createTableDescFromProperties( scheme.getSourceFields(), protocolProperties, false );
115 |
116 | Fields sinkFields = scheme.getSinkFields();
117 | if( !redshiftTableDesc.hasRequiredTableInformation() && sinkFields != Fields.UNKNOWN && sinkFields != Fields.ALL && sinkFields != null
118 | && sinkFields.getTypes() != null )
119 | {
120 | LOG.debug( "tabledesc information incomplete, falling back to sink-fields {}", scheme.getSinkFields() );
121 | redshiftTableDesc.completeFromFields( scheme.getSinkFields() );
122 | ( (JDBCScheme) scheme ).setColumns( redshiftTableDesc.getColumnNames() );
123 | }
124 |
125 | // users can overwrite the sink mode.
126 | String sinkModeProperty = protocolProperties.getProperty( PROTOCOL_SINK_MODE );
127 | if( !Util.isEmpty( sinkModeProperty ) )
128 | sinkMode = SinkMode.valueOf( sinkModeProperty );
129 |
130 | return new RedshiftTap( identifier, jdbcUser, jdbcPassword, hfsStagingDir, credentials, redshiftTableDesc, (RedshiftScheme) scheme, sinkMode, keepDebugHdfsData, useDirectInsert );
131 | }
132 |
133 | private RedshiftTableDesc createTableDescFromProperties( Fields fields, Properties properties, boolean allowNullName )
134 | {
135 | String tableName = properties.getProperty( PROTOCOL_TABLE_NAME, null );
136 |
137 | if( !allowNullName )
138 | if( Util.isEmpty( tableName ) )
139 | throw new IllegalArgumentException( "no tablename given" );
140 |
141 | String separator = properties.getProperty( PROTOCOL_FIELD_SEPARATOR, DEFAULT_SEPARATOR );
142 |
143 | String[] columnNames = getColumnNames( fields, properties, separator );
144 |
145 | String[] columnDefs = null;
146 | String columnDefsProperty = properties.getProperty( PROTOCOL_COLUMN_DEFS, null );
147 | if( !Util.isEmpty( columnDefsProperty ) )
148 | columnDefs = columnDefsProperty.split( separator );
149 |
150 | String distributionKey = properties.getProperty( FORMAT_DISTRIBUTION_KEY );
151 |
152 | String[] sortKeys = null;
153 | if( properties.containsKey( FORMAT_SORT_KEYS ) )
154 | sortKeys = properties.getProperty( FORMAT_SORT_KEYS ).split( DEFAULT_SEPARATOR );
155 |
156 | RedshiftTableDesc desc = new RedshiftTableDesc( tableName, columnNames, columnDefs, distributionKey, sortKeys );
157 | return desc;
158 | }
159 |
160 | /**
161 | * Helper method that tries to determine the AWS credentials. It first tries
162 | * the {@link Properties} passed in, next it checks for the environment
163 | * variables AWS_ACCESS_KEY
and AWS_SECRET_KEY
. If
164 | * none of the above contains the credentials, the method returns
165 | * {@link AWSCredentials}.
166 | *
167 | * @param properties a {@link Properties} object, which can contain the AWS
168 | * credentials.
169 | * @return an {@link AWSCredentials} installed.
170 | */
171 | private AWSCredentials determineAwsCredentials( Properties properties )
172 | {
173 | // try to determine the aws credentials starting with the assumption
174 | // that they are available from the AWS environment
175 | AWSCredentials awsCredentials = AWSCredentials.RUNTIME_DETERMINED;
176 |
177 | // first try the properties
178 | String awsAccessKey = properties.getProperty( PROTOCOL_AWS_ACCESS_KEY );
179 | String awsSecretKey = properties.getProperty( PROTOCOL_AWS_SECRET_KEY );
180 |
181 | if( !Util.isEmpty( awsAccessKey ) && !Util.isEmpty( awsSecretKey ) )
182 | awsCredentials = new AWSCredentials( awsAccessKey, awsSecretKey );
183 |
184 | // next try environment variables
185 | if( awsCredentials == AWSCredentials.RUNTIME_DETERMINED )
186 | {
187 | awsAccessKey = System.getenv( SYSTEM_AWS_ACCESS_KEY );
188 | awsSecretKey = System.getenv( SYSTEM_AWS_SECRET_KEY );
189 | if( !Util.isEmpty( awsAccessKey ) && !Util.isEmpty( awsSecretKey ) )
190 | awsCredentials = new AWSCredentials( awsAccessKey, awsSecretKey );
191 | }
192 | return awsCredentials;
193 | }
194 |
195 | public static Map extractCopyOptions( Properties properties, String copyOptionsPrefix )
196 | {
197 | Map copyOptions = new HashMap();
198 | for( CopyOption curOption : CopyOption.values() )
199 | {
200 | String propConfName = copyOptionsPrefix + curOption.toString();
201 | if( properties.containsKey( propConfName ) )
202 | {
203 | String propValue = properties.get( propConfName ) != null ? properties.get( propConfName ).toString() : null;
204 | copyOptions.put( curOption, propValue );
205 | }
206 | }
207 | return copyOptions;
208 | }
209 |
210 | /** Enum of all the COPY options supported by the Redshift load command and information about how to covert them to SQL commands. */
211 | public static enum CopyOption
212 | {
213 | FIXEDWIDTH( "\'%s\'" ),
214 | DELIMITER( "\'%s\'" ),
215 | CSV( " QUOTE \'%s\' " ),
216 | ENCRYPTED,
217 | GZIP,
218 | LZOP,
219 | REMOVEQUOTES,
220 | EXPLICIT_IDS,
221 | ACCEPTINVCHARS( "\'%s\'" ),
222 | MAXERROR( "%s" ),
223 | DATEFORMAT( "\'%s\'" ),
224 | TIMEFORMAT( "\'%s\'" ),
225 | IGNOREHEADER( "%s" ),
226 | ACCEPTANYDATE,
227 | IGNOREBLANKLINES,
228 | TRUNCATECOLUMNS,
229 | FILLRECORD,
230 | TRIMBLANKS,
231 | NOLOAD,
232 | NULL( "\'%s\'" ),
233 | EMPTYASNULL,
234 | BLANKSASNULL,
235 | COMPROWS( "%s" ),
236 | COMPUPDATE( "%s" ),
237 | STATUPDATE( "%s" ),
238 | ESCAPE,
239 | ROUNDEC;
240 |
241 | private String formattableCommandString;
242 |
243 | CopyOption( String formattableCommandString )
244 | {
245 | this.formattableCommandString = formattableCommandString;
246 | }
247 |
248 | CopyOption()
249 | {
250 | this.formattableCommandString = "";
251 | }
252 |
253 | public String getArguments( String argument )
254 | {
255 | if( this.equals( CSV ) && argument == null )
256 | return " CSV ";
257 |
258 | if( formattableCommandString.length() == 0 || argument == null )
259 | return formattableCommandString;
260 |
261 | return String.format( formattableCommandString + " ", argument );
262 | }
263 |
264 | }
265 |
266 |
267 | }
268 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftSafeDelimitedParser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.io.IOException;
24 |
25 | import cascading.scheme.util.DelimitedParser;
26 | import cascading.tap.TapException;
27 | import cascading.tuple.Fields;
28 | import org.apache.hadoop.util.StringUtils;
29 |
30 | /** {@link DelimitedParser} that treats the presence of characters that Redshift can't handle as an error in that line. */
31 |
32 | public class RedshiftSafeDelimitedParser extends DelimitedParser
33 | {
34 | private static final char BACKSLASH = 0x5c;
35 |
36 | public RedshiftSafeDelimitedParser( String delimiter, String quote, Class[] types, boolean strict, boolean safe, Fields sourceFields, Fields sinkFields )
37 | {
38 | super( delimiter, quote, types, strict, safe, sourceFields, sinkFields );
39 | }
40 |
41 | public RedshiftSafeDelimitedParser( String delimiter, String quote )
42 | {
43 | this( delimiter, quote, null, true, true, null, null );
44 | }
45 |
46 | @Override
47 | public Appendable joinLine( Iterable iterable, Appendable buffer )
48 | {
49 | try
50 | {
51 | return joinWithQuote( iterable, buffer );
52 | }
53 | catch( IOException e )
54 | {
55 | throw new TapException( "unable to append data", e );
56 | }
57 | }
58 |
59 | protected Appendable joinWithQuote( Iterable tuple, Appendable buffer ) throws IOException
60 | {
61 | int count = 0;
62 |
63 | for( Object value : tuple )
64 | {
65 | if( count != 0 )
66 | buffer.append( delimiter );
67 |
68 | if( value != null )
69 | {
70 | if( value instanceof String )
71 | {
72 | String valueString = value.toString();
73 |
74 | if( containsAnyInvalidCodepoints( valueString ) )
75 | {
76 | throw new InvalidCodepointForRedshiftException( valueString );
77 | }
78 |
79 | String escaped = StringUtils.escapeString( valueString, BACKSLASH, new char[]{'"', '\''} );
80 | buffer.append( quote ).append( escaped ).append( quote );
81 | }
82 | else
83 | {
84 | buffer.append( value.toString() );
85 | }
86 | }
87 | count++;
88 | }
89 |
90 | return buffer;
91 | }
92 |
93 | private boolean containsAnyInvalidCodepoints( String s )
94 | {
95 | for( int i = 0; i < s.length(); i++ )
96 | {
97 | if( isExcludedCodepoint( s.codePointAt( i ) ) )
98 | {
99 | return true;
100 | }
101 | }
102 | return false;
103 | }
104 |
105 | private boolean isExcludedCodepoint( int codepoint )
106 | {
107 | if( codepoint >= 0xD800 && codepoint <= 0xDFFF )
108 | {
109 | return true;
110 | }
111 | if( codepoint >= 0xFDD0 && codepoint <= 0xFDEF )
112 | {
113 | return true;
114 | }
115 | if( codepoint >= 0xFFFE && codepoint <= 0xFFFF )
116 | {
117 | return true;
118 | }
119 | return false;
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftScheme.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.lang.String;
24 | import java.util.HashMap;
25 | import java.util.Map;
26 |
27 | import cascading.flow.FlowProcess;
28 | import cascading.jdbc.db.DBInputFormat;
29 | import cascading.scheme.Scheme;
30 | import cascading.scheme.hadoop.TextDelimited;
31 | import cascading.tap.Tap;
32 | import cascading.tuple.Fields;
33 | import org.apache.hadoop.conf.Configuration;
34 | import org.apache.hadoop.mapred.JobConf;
35 | import org.apache.hadoop.mapred.OutputCollector;
36 | import org.apache.hadoop.mapred.RecordReader;
37 | import org.slf4j.Logger;
38 | import org.slf4j.LoggerFactory;
39 |
40 | /**
41 | * This class and {@link RedshiftTap} manage the ability to read and write data to Amazon's Redshift via EMR.
42 | * Because Redshift data is loaded into Redshift via S3 but or JDBC but always read via JDBC these classes wrap the
43 | * pairing of an HFS {@link Tap} and a JDBC {@link Tap} for reading behind one {@link Scheme}
44 | * object.
45 | */
46 | public class RedshiftScheme extends JDBCScheme
47 | {
48 |
49 | public static final String DEFAULT_DELIMITER = ",";
50 | public static final String DEFAULT_QUOTE = "\"";
51 |
52 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftScheme.class );
53 |
54 | private TextDelimited textDelimited;
55 | private Scheme sinkScheme;
56 | private RedshiftTableDesc redshiftTableDesc;
57 | private Map copyOptions = new HashMap();
58 |
59 | /**
60 | * The primary constructor. Any temporary scratch files will be created with default values for filed delimiters. This
61 | * will work fine for csv, tab delimited and so on but may lead to errors if there is binary data stored in the files.
62 | *
63 | * @param redshiftTableDesc description of the table structure.
64 | */
65 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc )
66 | {
67 | this( fields, redshiftTableDesc, DEFAULT_DELIMITER, DEFAULT_QUOTE, null, false );
68 | }
69 |
70 | /**
71 | * Like primary constructor, but also takes a String conditions allowing the query to be restricted to a subset of the table.
72 | *
73 | * @param redshiftTableDesc description of the table structure.
74 | * @param conditions where clause to restrict the query.
75 | */
76 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String conditions )
77 | {
78 | this( fields, redshiftTableDesc, DEFAULT_DELIMITER, DEFAULT_QUOTE, null, conditions, false );
79 | }
80 |
81 | /**
82 | * Use this constructor if you need fine-grained control over the temporary file used to stage data for uploading. You
83 | * almost certainly don't want to do this unless you know for a fact that your data contains, ex. binary data that might
84 | * cause issues with default column detection (ex. if you use the \001 character).
85 | *
86 | * @param redshiftTableDesc description of the table structure.
87 | * @param delimiter single character indicating the separator between fields in a file to load
88 | * @param quoteCharacter single character to enclose data within a field in cases where the field contains a delimiter
89 | * @param copyOptions custom arguments passed to the COPY command for processing. In most cases, proper cleaning of the data
90 | * before sending it to this Tap is a better alternative.
91 | */
92 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String delimiter, String quoteCharacter, Map copyOptions, Boolean tableAlias )
93 | {
94 | this(fields, redshiftTableDesc, delimiter, quoteCharacter, copyOptions, null, tableAlias);
95 | }
96 |
97 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String delimiter, String quoteCharacter, Map copyOptions, String conditions, Boolean tableAlias )
98 | {
99 | super( fields, redshiftTableDesc.getColumnNames(), conditions );
100 | super.tableAlias = tableAlias;
101 | // from the perspective of the JDBC-based parent class flag all fields as JDBC types.
102 | // for the internally managed S3 sink, use HFS tables (where Date is a String) so that the Tap doesn't
103 | // write out the integer representation.
104 | this.redshiftTableDesc = redshiftTableDesc;
105 | this.textDelimited = new TextDelimited( redshiftTableDesc.getHFSFields(), false, new RedshiftSafeDelimitedParser( delimiter, quoteCharacter ) );
106 | textDelimited.setSinkFields( getSinkFields() );
107 | this.sinkScheme = this;
108 | if( copyOptions != null )
109 | this.copyOptions.putAll( copyOptions );
110 |
111 | if( !this.copyOptions.containsKey( RedshiftFactory.CopyOption.DELIMITER ) )
112 | this.copyOptions.put( RedshiftFactory.CopyOption.DELIMITER, DEFAULT_DELIMITER );
113 |
114 | this.copyOptions.put( RedshiftFactory.CopyOption.REMOVEQUOTES, null );
115 | }
116 |
117 | public RedshiftScheme( String[] columns, String[] orderBy, String[] updateBy )
118 | {
119 | super( columns, orderBy, updateBy );
120 | }
121 |
122 | public RedshiftScheme( Class extends DBInputFormat> inputFormat, Fields fields, String[] columns )
123 | {
124 | super( inputFormat, fields, columns );
125 | }
126 |
127 | public RedshiftScheme( String[] columnsNames, String contentsQuery, String countStarQuery )
128 | {
129 | super( columnsNames, contentsQuery, countStarQuery );
130 | }
131 |
132 | public TextDelimited getTextDelimited()
133 | {
134 | return textDelimited;
135 | }
136 |
137 | public TableDesc getRedshiftTableDesc()
138 | {
139 | return redshiftTableDesc;
140 | }
141 |
142 | public Map getCopyOptions()
143 | {
144 | return copyOptions;
145 | }
146 |
147 | @Override
148 | public void sinkConfInit( FlowProcess extends Configuration> flowProcess, Tap tap, Configuration jobConf )
149 | {
150 | if( ( (RedshiftTap) tap ).isUseDirectInsert() )
151 | {
152 | sinkScheme = this;
153 | super.sinkConfInit( flowProcess, tap, jobConf );
154 | }
155 | else
156 | {
157 | sinkScheme = textDelimited;
158 | sinkScheme.sinkConfInit( flowProcess, tap, jobConf );
159 | }
160 | }
161 |
162 | @Override
163 | public String toString()
164 | {
165 | if( getSinkFields().equals( getSourceFields() ) )
166 | return getClass().getSimpleName() + "[" + getSourceFields().print() + "]";
167 | else
168 | return getClass().getSimpleName() + "[" + getSourceFields().print() + "->" + getSinkFields().print() + "]";
169 | }
170 |
171 | }
172 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftTableDesc.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.lang.reflect.Type;
24 | import java.sql.Time;
25 | import java.util.ArrayList;
26 | import java.util.List;
27 |
28 | import cascading.tuple.Fields;
29 | import cascading.util.Util;
30 | import org.slf4j.Logger;
31 | import org.slf4j.LoggerFactory;
32 |
33 | /**
34 | * Adds in the Distribution Key and Sort Keys columns that are specific to Redshift. See AWS's docs for info. Note that
35 | * these columns must exist as defined column; they can't be keys that aren't in the columnNames list.
36 | */
37 | public class RedshiftTableDesc extends TableDesc
38 | {
39 |
40 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftTap.class );
41 |
42 | private String distributionkey;
43 | private String[] sortKeys;
44 |
45 |
46 | public RedshiftTableDesc( String tableName, String[] columnNames, String[] columnDefs, String distributionkey, String[] sortKeys )
47 | {
48 | super( tableName, columnNames, columnDefs, null );
49 | this.distributionkey = distributionkey;
50 | this.sortKeys = sortKeys;
51 | }
52 |
53 | @Override
54 | public String getCreateTableStatement()
55 | {
56 | List createTableStatement = new ArrayList();
57 |
58 | createTableStatement = addCreateTableBodyTo( createTableStatement );
59 | String createTableCommand = String.format( getCreateTableFormat(), getTableName(), Util.join( createTableStatement, ", " ), getRedshiftTableKeys() );
60 | LOG.info( "Creating table: " + createTableCommand );
61 | return createTableCommand;
62 | }
63 |
64 | @Override
65 | public String[] getPrimaryKeys()
66 | {
67 | return null;
68 | }
69 |
70 | @Override
71 | protected List addCreateTableBodyTo( List createTableStatement )
72 | {
73 | createTableStatement = addDefinitionsTo( createTableStatement );
74 |
75 | return createTableStatement;
76 | }
77 |
78 | public Fields getHFSFields()
79 | {
80 | String[] columnDefs = getColumnDefs();
81 | if (columnDefs == null)
82 | return Fields.ALL;
83 |
84 | Type[] types = new Type[ columnDefs.length ];
85 |
86 | for( int i = 0; i < columnDefs.length; i++ )
87 | try
88 | {
89 | types[ i ] = findHFSTypeFor( columnDefs[ i ] );
90 | }
91 | catch( ClassNotFoundException exception )
92 | {
93 | LOG.error( "unable to find HFS type for: {}. defaulting to string", columnDefs[ i ] );
94 | types[ i ] = String.class;
95 | }
96 |
97 | return new Fields( getColumnNames(), types );
98 | }
99 |
100 | public static Type findHFSTypeFor( String fieldName ) throws ClassNotFoundException
101 | {
102 | if( "int".equals( fieldName ) )
103 | return int.class;
104 | else if( "int not null".equalsIgnoreCase( fieldName ) )
105 | return Integer.class;
106 | else if( fieldName != null && fieldName.startsWith( "varchar" ) )
107 | return String.class;
108 | else if( "time".equalsIgnoreCase( fieldName ) )
109 | return Time.class;
110 | else if( "date".equalsIgnoreCase( fieldName ) )
111 | return String.class;
112 | else if( "timestamp".equalsIgnoreCase( fieldName ) )
113 | return String.class;
114 | else
115 | return String.class;
116 | }
117 |
118 | protected String getCreateTableFormat()
119 | {
120 | return "CREATE TABLE %s ( %s ) %s";
121 | }
122 |
123 | private String getRedshiftTableKeys()
124 | {
125 | StringBuilder sb = new StringBuilder().append( "" );
126 |
127 | if( distributionkey != null )
128 | sb.append( " DISTKEY (" ).append( distributionkey ).append( ") " );
129 |
130 | if( sortKeys != null && sortKeys.length > 0 )
131 | sb.append( " SORTKEY (" ).append( Util.join( sortKeys, "," ) ).append( ") " );
132 |
133 | return sb.toString();
134 | }
135 |
136 |
137 | }
138 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftTap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import java.io.IOException;
24 | import java.util.Map;
25 | import java.util.UUID;
26 |
27 | import cascading.flow.FlowProcess;
28 | import cascading.jdbc.db.DBConfiguration;
29 | import cascading.tap.SinkMode;
30 | import cascading.tap.Tap;
31 | import cascading.tap.hadoop.Hfs;
32 | import cascading.tuple.TupleEntryCollector;
33 | import org.apache.hadoop.conf.Configuration;
34 | import org.apache.hadoop.mapred.OutputCollector;
35 | import org.slf4j.Logger;
36 | import org.slf4j.LoggerFactory;
37 |
38 | /**
39 | * This class and {@link RedshiftScheme} manage the ability to read and write data to Amazon's Redshift via EMR.
40 | * Because Redshift data is loaded into Redshift via S3 but read from it via JDBC both these classes wrap the
41 | * pairing of an HFS {@link Tap} (for writing) and a JDBC {@link Tap} for reading behind one {@link cascading.scheme.Scheme}
42 | * object.
43 | */
44 | public class RedshiftTap extends JDBCTap
45 | {
46 |
47 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftTap.class );
48 |
49 | public static final String DB_DRIVER = "org.postgresql.Driver";
50 |
51 | private AWSCredentials awsCredentials;
52 | private RedshiftScheme redshiftScheme;
53 | private Hfs hfsStagingDir;
54 | private String s3WorkingDir;
55 | private boolean keepDebugHfsData;
56 | private boolean useDirectInsert;
57 |
58 |
59 | /**
60 | * Redshift tap to stage data to S3 and then issue a JDBC COPY command to specified Redshift table
61 | *
62 | * @param sinkMode use {@link SinkMode#REPLACE} to drop Redshift table before loading;
63 | * {@link SinkMode#UPDATE} to not drop table for incremental loading
64 | */
65 | public RedshiftTap( String connectionUrl, String username, String password, String hfsStagingDir, AWSCredentials awsCredentials, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode, boolean keepDebugHfsData, boolean useDirectInsert )
66 | {
67 | super( connectionUrl, username, password, DB_DRIVER, redshiftTableDesc, redshiftScheme, sinkMode );
68 | this.redshiftScheme = redshiftScheme;
69 | String workingDirPath = hfsStagingDir + "/" + UUID.randomUUID();
70 | this.s3WorkingDir = workingDirPath.replaceAll( "s3n://", "s3://" );
71 | this.hfsStagingDir = new Hfs( redshiftScheme.getTextDelimited(), workingDirPath );
72 | this.awsCredentials = awsCredentials;
73 | this.keepDebugHfsData = keepDebugHfsData;
74 | this.useDirectInsert = useDirectInsert;
75 | LOG.info( "created {} ", toString() );
76 | }
77 |
78 | /**
79 | * Redshift tap to stage data to S3 and then issue a JDBC COPY command to specified Redshift table
80 | *
81 | * @param sinkMode use {@link SinkMode#REPLACE} to drop Redshift table before loading;
82 | * {@link SinkMode#UPDATE} to not drop table for incremental loading
83 | */
84 | public RedshiftTap( String connectionUrl, String username, String password, String hfsStagingDir, AWSCredentials awsCredentials, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode )
85 | {
86 | this( connectionUrl, username, password, hfsStagingDir, awsCredentials, redshiftTableDesc, redshiftScheme, sinkMode, false, true );
87 | }
88 |
89 | /**
90 | * Simplified constructor for testing
91 | */
92 | protected RedshiftTap( String connectionUrl, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode )
93 | {
94 | this( connectionUrl, null, null, null, null, redshiftTableDesc, redshiftScheme, sinkMode, false, true );
95 | }
96 |
97 | /**
98 | * Simplified constructor for testing
99 | */
100 | protected RedshiftTap( String connectionUrl, RedshiftScheme redshiftScheme )
101 | {
102 | this( connectionUrl, null, null, null, null, null, redshiftScheme, null, false, true );
103 | }
104 |
105 | @Override
106 | public void sourceConfInit( FlowProcess extends Configuration> process, Configuration configuration )
107 | {
108 | if( username == null )
109 | DBConfiguration.configureDB( configuration, driverClassName, connectionUrl );
110 | else
111 | DBConfiguration.configureDB( configuration, driverClassName, connectionUrl, username, password );
112 |
113 | super.sourceConfInit( process, configuration );
114 | }
115 |
116 | @Override
117 | public void sinkConfInit( FlowProcess extends Configuration> process, Configuration conf )
118 | {
119 | if (!useDirectInsert) {
120 | // if we haven't set the credentials beforehand try to set them from the job conf
121 | if( awsCredentials.equals( AWSCredentials.RUNTIME_DETERMINED ) )
122 | {
123 | String accessKey = conf.get( "fs.s3n.awsAccessKeyId", null );
124 | String secretKey = conf.get( "fs.s3n.awsSecretAccessKey", null );
125 | awsCredentials = new AWSCredentials( accessKey, secretKey );
126 | }
127 | // make the credentials to be used available to the JobConf if they were set differently
128 | conf.set( "fs.s3n.awsAccessKeyId", awsCredentials.getAwsAccessKey() );
129 | conf.set( "fs.s3n.awsSecretAccessKey", awsCredentials.getAwsSecretKey() );
130 | }
131 | super.sinkConfInit( process, conf );
132 | }
133 |
134 | @Override
135 | public TupleEntryCollector openForWrite( FlowProcess extends Configuration> flowProcess, OutputCollector outputCollector ) throws IOException
136 | {
137 | // force a table creation if one does not exist
138 | LOG.info( "creating db table: " + getTableName() );
139 | super.createResource( flowProcess );
140 | if( useDirectInsert )
141 | {
142 | return super.openForWrite( flowProcess, outputCollector );
143 | }
144 | else
145 | {
146 | LOG.info( "Creating scratch dir: " + hfsStagingDir.getIdentifier() );
147 | hfsStagingDir.createResource( flowProcess );
148 | return hfsStagingDir.openForWrite( flowProcess );
149 | }
150 | }
151 |
152 | @Override
153 | public boolean createResource( Configuration configuration ) throws IOException
154 | {
155 | LOG.info( "creating resources" );
156 | boolean createSuccess = true;
157 | if( !useDirectInsert )
158 | {
159 | LOG.info( "creating hfs scratch space: {}", hfsStagingDir.getIdentifier() );
160 | createSuccess = hfsStagingDir.createResource( configuration );
161 | }
162 | if( createSuccess )
163 | {
164 | LOG.info( "creating DB table: {}", super.getIdentifier() );
165 | createSuccess = super.createResource( configuration );
166 | }
167 | return createSuccess;
168 | }
169 |
170 | @Override
171 | public boolean deleteResource( Configuration configuration ) throws IOException
172 | {
173 | LOG.info( "deleting resources" );
174 | boolean deleteSuccsess;
175 | LOG.info( "deleting DB table: {}", super.getIdentifier() );
176 | deleteSuccsess = super.deleteResource( configuration );
177 | if( deleteSuccsess && hfsStagingDir.resourceExists( configuration ) )
178 | {
179 | LOG.info( "deleting hfs scratch space: {}", hfsStagingDir.getIdentifier() );
180 | deleteSuccsess = hfsStagingDir.deleteResource( configuration );
181 | }
182 | return deleteSuccsess;
183 | }
184 |
185 | @Override
186 | public boolean commitResource( Configuration configuration ) throws IOException
187 | {
188 | if( !useDirectInsert )
189 | {
190 | String copyCommand = buildCopyFromS3Command();
191 | try
192 | {
193 | int results = super.executeUpdate( copyCommand );
194 | if( results != 0 )
195 | LOG.info( "Copy return code: {} ( expected: 0 )", results );
196 | }
197 | finally
198 | {
199 | // clean scratch resources even if load failed.
200 | if( !keepDebugHfsData && hfsStagingDir.resourceExists( configuration ) )
201 | hfsStagingDir.deleteResource( configuration );
202 | }
203 | }
204 | return true;
205 | }
206 |
207 | @Override
208 | public long getModifiedTime( Configuration configuration ) throws IOException
209 | {
210 | if( hfsStagingDir.resourceExists( configuration ) )
211 | return hfsStagingDir.getModifiedTime( configuration );
212 | return super.getModifiedTime( configuration );
213 | }
214 |
215 | public boolean isUseDirectInsert()
216 | {
217 | return useDirectInsert;
218 | }
219 |
220 | public String buildCopyFromS3Command()
221 | {
222 | return String.format( "COPY %s from '%s' %s %s ;",
223 | redshiftScheme.getRedshiftTableDesc().getTableName(),
224 | s3WorkingDir,
225 | buildAuthenticationOptions(),
226 | buildCopyOptions() );
227 | }
228 |
229 | protected String buildAuthenticationOptions()
230 | {
231 | return String.format( " CREDENTIALS 'aws_access_key_id=%s;aws_secret_access_key=%s' ",
232 | awsCredentials.getAwsAccessKey(),
233 | awsCredentials.getAwsSecretKey() );
234 | }
235 |
236 | private String buildCopyOptions()
237 | {
238 | StringBuilder builder = new StringBuilder();
239 | for( Map.Entry copyOption : redshiftScheme.getCopyOptions().entrySet() )
240 | {
241 | builder.append( " " );
242 | if( copyOption.getValue() == null )
243 | builder.append( copyOption.getKey().toString() );
244 | else
245 | builder.append( copyOption.getKey().toString() ).append( " " ).append( copyOption.getKey().getArguments( copyOption.getValue() ) );
246 | }
247 | return builder.toString();
248 | }
249 |
250 | @Override
251 | public String toString()
252 | {
253 | if( getIdentifier() != null )
254 | return getClass().getSimpleName() + "[\"" + getScheme() + "\"]" + "[->\"" + hfsStagingDir.getIdentifier() + "\"->\"" + super.getIdentifier() + "\"]"; // sanitize
255 | else
256 | return getClass().getSimpleName() + "[\"" + getScheme() + "\"]" + "[no more info]";
257 | }
258 |
259 | }
260 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 |
23 | cascading.bind.provider.names=redshift
24 | cascading.bind.provider.redshift.platforms=hadoop,hadoop2-mr1
25 |
26 | # one or the other
27 | cascading.bind.provider.redshift.factory.classname=cascading.jdbc.RedshiftFactory
28 |
29 |
30 | # define protocols differentiated by properties
31 | cascading.bind.provider.redshift.protocol.names=jdbc
32 | cascading.bind.provider.redshift.protocol.jdbc.schemes=postgresql
33 | cascading.bind.provider.postgresql.protocol.jdbc.tabledescseparator=:
34 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.tablename=
35 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columnnames=
36 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columndefs=
37 | cascading.bind.provider.postgresql.protocol.jdbc.s3outputpath=
38 | cascading.bind.provider.postgresql.protocol.jdbc.awsaccesskey=
39 | cascading.bind.provider.postgresql.protocol.jdbc.awssecretkey=
40 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcuser=
41 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcpassword=
42 | cascading.bind.provider.postgresql.protocol.jdbc.keepdebughfsdata=
43 |
44 | #
45 | cascading.bind.provider.redshift.format.names=postgresql
46 | cascading.bind.provider.redshift.format.postgresql.protocols=jdbc
47 | cascading.bind.provider.postgresql.format.postgresql.separator=:
48 | cascading.bind.provider.postgresql.format.postgresql.columnnames=
49 | cascading.bind.provider.postgresql.format.postgresql.orderBy=
50 | cascading.bind.provider.postgresql.format.postgresql.limit=
51 | cascading.bind.provider.postgresql.format.postgresql.updateBy=
52 | cascading.bind.provider.postgresql.format.postgresql.tableAlias=
53 | cascading.bind.provider.postgresql.format.postgresql.selectquery=
54 | cascading.bind.provider.postgresql.format.postgresql.countquery=
55 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/test/java/cascading/jdbc/RedshiftSafeDelimitedParserTest.java:
--------------------------------------------------------------------------------
1 | package cascading.jdbc;
2 |
3 | import java.io.UnsupportedEncodingException;
4 |
5 | import cascading.tuple.Tuple;
6 | import org.junit.Test;
7 |
8 | import static org.junit.Assert.assertEquals;
9 |
10 | public class RedshiftSafeDelimitedParserTest {
11 | @Test
12 | public void shouldJoinValuesAndQuoteStringField() {
13 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\"");
14 | StringBuffer buf = new StringBuffer();
15 |
16 | parser.joinLine(new Tuple("Hello", "world"), buf);
17 |
18 | assertEquals("\"Hello\",\"world\"", buf.toString());
19 | }
20 |
21 | @Test
22 | public void shouldJoinValuesWithoutQuotingNumeric() {
23 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\"");
24 | StringBuffer buf = new StringBuffer();
25 |
26 | parser.joinLine(new Tuple("Hello", 102), buf);
27 |
28 | assertEquals("\"Hello\",102", buf.toString());
29 | }
30 |
31 | @Test
32 | public void shouldEscapeSingleQuotes() {
33 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\"");
34 | StringBuffer buf = new StringBuffer();
35 |
36 | parser.joinLine(new Tuple("Some", "'name"), buf);
37 |
38 | assertEquals("\"Some\",\"\\'name\"", buf.toString());
39 | }
40 |
41 | @Test(expected=InvalidCodepointForRedshiftException.class)
42 | public void shouldThrowErrorWithInvalidCodepointCharacter() throws UnsupportedEncodingException {
43 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\"");
44 | StringBuffer buf = new StringBuffer();
45 |
46 | byte[] characterBytes = new byte[] {(byte) 0xED, (byte) 0xA0, (byte) 0x80};
47 |
48 | parser.joinLine(new Tuple(new String(characterBytes, "UTF-8")), buf);
49 |
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/cascading-jdbc-redshift/src/test/java/cascading/jdbc/RedshiftTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | /**
24 | * Tests against Postgres database since that's the Redshift API .
25 | **/
26 |
27 | import java.util.Properties;
28 |
29 | import cascading.tap.SinkMode;
30 | import cascading.tuple.Fields;
31 | import org.junit.Before;
32 |
33 | public class RedshiftTest extends JDBCTestingBase
34 | {
35 |
36 | @Before
37 | public void setUp()
38 | {
39 | setDriverName( RedshiftTap.DB_DRIVER );
40 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) );
41 | setJDBCFactory( new RedshiftFactory() );
42 | }
43 |
44 | @Override
45 | protected RedshiftScheme getNewJDBCScheme( Fields fields, String[] columnNames )
46 | {
47 | return new RedshiftScheme( inputFormatClass, fields, columnNames );
48 | }
49 |
50 | @Override
51 | protected RedshiftScheme getNewJDBCScheme( String[] columns, String[] orderBy, String[] updateBy )
52 | {
53 | return new RedshiftScheme( columns, orderBy, updateBy );
54 | }
55 |
56 | @Override
57 | protected RedshiftScheme getNewJDBCScheme( String[] columnsNames, String contentsQuery, String countStarQuery )
58 | {
59 | return new RedshiftScheme( columnsNames, contentsQuery, countStarQuery );
60 | }
61 |
62 | @Override
63 | protected RedshiftTableDesc getNewTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys )
64 | {
65 | return new RedshiftTableDesc( tableName, columnNames, columnDefs, null, null );
66 | }
67 |
68 | @Override
69 | protected RedshiftTap getNewJDBCTap( TableDesc tableDesc, JDBCScheme jdbcScheme, SinkMode sinkMode )
70 | {
71 | return new RedshiftTap( jdbcurl, (RedshiftTableDesc) tableDesc, (RedshiftScheme) jdbcScheme, sinkMode );
72 | }
73 |
74 | @Override
75 | protected RedshiftTap getNewJDBCTap( JDBCScheme jdbcScheme )
76 | {
77 | return new RedshiftTap( jdbcurl, (RedshiftScheme) jdbcScheme );
78 | }
79 |
80 | @Override
81 | protected SinkMode getSinkModeForReset()
82 | {
83 | return SinkMode.REPLACE;
84 | }
85 |
86 | @Override
87 | protected Properties createProperties()
88 | {
89 | Properties properties = super.createProperties();
90 | properties.put( RedshiftFactory.PROTOCOL_USE_DIRECT_INSERT, "true" );
91 | return properties;
92 | }
93 | }
94 |
95 |
96 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | evaluationDependsOn( ":cascading-jdbc-core" )
22 |
23 | ext.teradataVersion = "14.10.00.39"
24 |
25 | dependencies {
26 | compile project( ':cascading-jdbc-core' )
27 |
28 | compile( group: 'com.teradata', name: 'terajdbc4', version: teradataVersion )
29 | compile( group: 'com.teradata', name: 'tdgssconfig', version: teradataVersion )
30 |
31 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath
32 | }
33 |
34 | test {
35 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.teradata" ) )
36 | }
37 |
38 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/main/java/cascading/jdbc/TeradataJDBCFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.db.DBInputFormat;
24 | import cascading.jdbc.db.TeradataDBInputFormat;
25 |
26 | /**
27 | * Teradata specific subclass of {@link cascading.jdbc.JDBCFactory}
28 | * */
29 | public class TeradataJDBCFactory extends JDBCFactory
30 | {
31 | @Override
32 | protected Class extends DBInputFormat> getInputFormatClass()
33 | {
34 | return TeradataDBInputFormat.class;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/main/java/cascading/jdbc/TeradataTableDesc.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2009 Concurrent, Inc.
3 | *
4 | * This work has been released into the public domain
5 | * by the copyright holder. This applies worldwide.
6 | *
7 | * In case this is not legally possible:
8 | * The copyright holder grants any entity the right
9 | * to use this work for any purpose, without any
10 | * conditions, unless such conditions are required by law.
11 | */
12 |
13 | package cascading.jdbc;
14 |
15 | import java.io.Serializable;
16 | import java.lang.reflect.Type;
17 | import java.util.ArrayList;
18 | import java.util.List;
19 | import java.util.Arrays;
20 |
21 | import cascading.tuple.Fields;
22 |
23 | /**
24 | * Class TeradataTableDesc extends TableDesc which describes a SQL based table,
25 | * this description is used by the
26 | * {@link JDBCTap} when creating a missing table and by the JDBCScheme, for the
27 | * correct type coercion.
28 | *
29 | * This class is used to override completeFromFields to use TeradataInternalMapping.java
30 | *
31 | * @see JDBCTap
32 | * @see JDBCScheme
33 | */
34 | public class TeradataTableDesc extends TableDesc implements Serializable
35 | {
36 | private static final long serialVersionUID = 5009899098019404131L;
37 |
38 | /**
39 | * Field columnNames
40 | */
41 | String[] columnNames;
42 | /**
43 | * Field primaryKeys
44 | */
45 | String[] primaryKeys;
46 |
47 | /**
48 | * Constructor TeradataTableDesc creates a new TeradataTableDesc instance.
49 | *
50 | * @param tableName of type String
51 | * @param columnNames of type String[]
52 | * @param columnDefs of type String[]
53 | * @param primaryKeys of type String
54 | */
55 | public TeradataTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys )
56 | {
57 | super( tableName, columnNames, columnDefs, primaryKeys );
58 | this.columnNames = columnNames;
59 | this.primaryKeys = primaryKeys;
60 | }
61 |
62 | /**
63 | * {@inheritDoc}
64 | */
65 | @Override
66 | public void completeFromFields( Fields fields )
67 | {
68 | if( !hasRequiredTableInformation() )
69 | {
70 | List names = new ArrayList();
71 | List defs = new ArrayList();
72 |
73 | for( int i = 0; i < fields.size(); i++ )
74 | {
75 | Comparable> cmp = fields.get( i );
76 | names.add( cmp.toString() );
77 | Type internalType = InternalTypeMapping.findInternalType( fields.getType( i ) );
78 | String type = InternalTypeMapping.sqltypeForClass( internalType );
79 | defs.add( type );
80 | }
81 | if( columnNames == null || columnNames.length == 0 )
82 | columnNames = names.toArray( new String[ names.size() ] );
83 | if( columnDefs == null || columnDefs.length == 0 )
84 | columnDefs = defs.toArray( new String[ defs.size() ] );
85 |
86 | for( int i = 0; i < columnNames.length; i++ )
87 | {
88 | if( Arrays.asList( primaryKeys ).contains( columnNames[ i ] ) )
89 | {
90 | if( columnDefs[ i ].equalsIgnoreCase( "varchar(256)" ) )
91 | columnDefs[ i ] = "varchar(256) not null";
92 | }
93 | }
94 |
95 | // now it has to be complete and usable, if not bail out.
96 | if( !hasRequiredTableInformation() )
97 | throw new IllegalStateException( "could not derive TableDesc from given fields." );
98 | }
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/main/java/cascading/jdbc/db/TeradataDBInputFormat.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc.db;
22 |
23 | import java.io.IOException;
24 | import java.sql.Connection;
25 | import java.sql.SQLException;
26 |
27 | import cascading.CascadingException;
28 | import org.apache.hadoop.io.LongWritable;
29 | import org.apache.hadoop.mapred.JobConf;
30 | import org.apache.hadoop.mapred.RecordReader;
31 |
32 | /**
33 | * Teradata specific sub-class of DBInputFormat that provides a special select query for getting the data from a
34 | * Teradata instance.
35 | */
36 | @SuppressWarnings("rawtypes")
37 | public class TeradataDBInputFormat extends DBInputFormat
38 | {
39 | @Override
40 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
41 | {
42 | return new TeradataDBRecordReader( split, inputClass, job );
43 | }
44 |
45 | class TeradataDBRecordReader extends DBInputFormat.DBRecordReader
46 | {
47 | protected TeradataDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException
48 | {
49 | super( new cascading.jdbc.db.DBInputFormat.DBInputSplit(), inputClass, job );
50 | }
51 |
52 | /** Returns the query for selecting the records from an Teradata DB.
53 | * omits the LIMIT and OFFSET for FASTEXPORT
54 | */
55 | public String getSelectQuery()
56 | {
57 | StringBuilder query = new StringBuilder();
58 |
59 | if( dbConf.getInputQuery() == null )
60 | {
61 | query.append( "SELECT " );
62 |
63 | for( int i = 0; i < fieldNames.length; i++ )
64 | {
65 | query.append( fieldNames[ i ] );
66 |
67 | if( i != fieldNames.length - 1 )
68 | query.append( ", " );
69 | }
70 | query.append( " FROM " ).append( tableName );
71 |
72 | if( conditions != null && conditions.length() > 0 )
73 | query.append( " WHERE (" ).append( conditions ).append( ")" );
74 |
75 | String orderBy = dbConf.getInputOrderBy();
76 |
77 | if( orderBy != null && orderBy.length() > 0 )
78 | query.append( " ORDER BY " ).append( orderBy );
79 | }
80 | else
81 | query.append( dbConf.getInputQuery() );
82 |
83 | return query.toString();
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/main/resources/cascading/bind/provider.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # default name of provider
22 | cascading.bind.provider.names=teradata
23 | cascading.bind.provider.teradata.platforms=hadoop,hadoop2-mr1
24 |
25 | # factory
26 | cascading.bind.provider.teradata.factory.classname=cascading.jdbc.TeradataJDBCFactory
27 |
28 | # protocol is jdbc
29 | cascading.bind.provider.teradata.protocol.names=jdbc
30 | cascading.bind.provider.teradata.protocol.jdbc.schemes=teradata
31 | cascading.bind.provider.teradata.protocol.jdbc.jdbcdriver=com.teradata.jdbc.TeraDriver
32 | cascading.bind.provider.teradata.protocol.jdbc.tabledescseparator=:
33 | cascading.bind.provider.teradata.protocol.jdbc.jdbcuser=
34 | cascading.bind.provider.teradata.protocol.jdbc.jdbcpassword=
35 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.tablename=
36 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.columnnames=
37 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.columndefs=
38 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.primarykeys=
39 | cascading.bind.provider.teradata.protocol.jdbc.sinkmode=
40 |
41 | # format is teradata
42 | cascading.bind.provider.teradata.format.names=teradata
43 | cascading.bind.provider.teradata.format.teradata.protocols=jdbc
44 | cascading.bind.provider.teradata.format.teradata.separator=:
45 | cascading.bind.provider.teradata.format.teradata.columnnames=
46 | cascading.bind.provider.teradata.format.teradata.orderBy=
47 | cascading.bind.provider.teradata.format.teradata.conditions=
48 | cascading.bind.provider.teradata.format.teradata.limit=
49 | cascading.bind.provider.teradata.format.teradata.updateBy=
50 | cascading.bind.provider.teradata.format.teradata.tableAlias=
51 | cascading.bind.provider.teradata.format.teradata.selectquery=
52 | cascading.bind.provider.teradata.format.teradata.countquery=
53 | cascading.bind.provider.teradata.format.teradata.replaceoninsert=false
54 |
55 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/test/java/cascading/jdbc/TeradataJDBCFactoryTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.db.TeradataDBInputFormat;
24 | import org.junit.Test;
25 |
26 | import static org.junit.Assert.*;
27 |
28 | public class TeradataJDBCFactoryTest
29 | {
30 |
31 | @Test
32 | public void testGetInputFormatClass()
33 | {
34 | assertEquals( TeradataDBInputFormat.class, new TeradataJDBCFactory().getInputFormatClass() );
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/cascading-jdbc-teradata/src/test/java/cascading/jdbc/TeradataTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.jdbc;
22 |
23 | import cascading.jdbc.TeradataTableDesc;
24 | import cascading.jdbc.db.TeradataDBInputFormat;
25 | import org.junit.Before;
26 |
27 | public class TeradataTest extends JDBCTestingBase
28 | {
29 | @Before
30 | public void setUp()
31 | {
32 | setDriverName( "com.teradata.jdbc.TeraDriver" );
33 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) );
34 | setInputFormatClass( TeradataDBInputFormat.class );
35 | setFactory( new TeradataJDBCFactory() );
36 | }
37 |
38 | @Override
39 | public TeradataTableDesc getNewTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys )
40 | {
41 | return new TeradataTableDesc( tableName, columnNames, columnDefs, primaryKeys );
42 | }
43 | }
--------------------------------------------------------------------------------
/etc/properties.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | if( project.properties[ 'teamcity' ] ) // make them system properties
22 | System.properties.putAll( project.properties[ 'teamcity' ] )
23 |
24 | if( System.properties[ 'aws.properties' ] )
25 | {
26 | file( System.properties[ 'aws.properties' ] ).withReader { reader ->
27 | def awsProperties = new Properties()
28 | awsProperties.load( reader )
29 | System.properties.putAll( awsProperties )
30 | }
31 | }
32 |
33 | ext.repoUrl = 'http://conjars.org/repo/'
34 | ext.repoUserName = System.properties[ 'publish.repo.userName' ]
35 | ext.repoPassword = System.properties[ 'publish.repo.password' ]
36 |
37 |
--------------------------------------------------------------------------------
/etc/s3Upload.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | import com.monochromeroad.gradle.plugin.aws.s3.S3Sync
22 | import com.monochromeroad.gradle.plugin.aws.s3.ACL
23 |
24 | buildscript {
25 | repositories {
26 | mavenLocal()
27 | mavenCentral()
28 | maven { url 'http://repository-monochromeroad.forge.cloudbees.com/release/' }
29 | maven { url 'http://conjars.org/repo/' } // use conjars if cloudbees is down
30 | }
31 | dependencies {
32 | classpath 'com.monochromeroad.gradle:gradle-aws-s3-sync:0.5'
33 | }
34 | }
35 |
36 | task s3Upload( type: S3Sync ) {
37 |
38 | accessKey = rootProject.awsAccessId
39 | secretKey = rootProject.awsSecretKey
40 |
41 | keepFiles = true // prevents deletion from bucket
42 |
43 | acl ACL.PublicRead
44 |
45 | configFile "${rootProject.projectDir}/etc/synchronizer.properties"
46 |
47 | ext.source = "${buildDir}/publish"
48 |
49 | ext.destination = "${project.s3Bucket}/${project.name}/${majorVersion}/"
50 |
51 | from source
52 | into destination
53 | }
54 |
--------------------------------------------------------------------------------
/etc/synchronizer.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # see http://jets3t.s3.amazonaws.com/toolkit/configuration.html
22 |
23 | #s3service.default-bucket-location=Tokyo
24 | # httpclient.max-connections=2
25 | # threaded-service.admin-max-thread-count=5
26 |
27 | ###
28 | # File/Object comparison properties
29 | ###
30 |
31 | filecomparer.skip-symlinks=true
32 | #filecomparer.use-md5-files=true
33 | #filecomparer.generate-md5-files=true
34 | #filecomparer.md5-files-root-dir=.cache
35 | filecomparer.skip-upload-of-md5-files=true
36 | filecomparer.assume-local-latest-in-mismatch=false
37 |
38 | # Page Caching - none
39 | upload.metadata.Cache-Control=no-cache
40 |
41 | upload.transformed-files-batch-size=1000
42 |
--------------------------------------------------------------------------------
/etc/version.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | apply from: './etc/properties.gradle'
22 |
23 | project.ext.currentCommit = System.properties[ 'build.vcs.number' ];
24 |
25 | if( !currentCommit )
26 | {
27 | def commitPath = File.createTempFile( "commit", "tmp" )
28 |
29 | ant.exec( dir: '.', executable: "git", output: commitPath ) {
30 | arg( line: 'rev-parse HEAD' )
31 | }
32 |
33 | currentCommit = commitPath.readLines().get( 0 )
34 |
35 | commitPath.delete()
36 | }
37 |
38 | def versionProperties = new Properties()
39 | file( 'version.properties' ).withInputStream { versionProperties.load( it ) }
40 |
41 | ext.majorVersion = versionProperties[ 'cascading-jdbc.release.major' ]
42 | ext.minorVersion = versionProperties[ 'cascading-jdbc.release.minor' ]
43 |
44 | ext.buildNumber = System.getProperty( 'build.number', 'dev' )
45 |
46 | if( System.properties[ 'cascading-jdbc.release.private' ] )
47 | buildNumber = "priv-${buildNumber}"
48 | else if( !System.properties[ 'cascading-jdbc.release.final' ] )
49 | buildNumber = "wip-${buildNumber}"
50 |
51 | ext.releaseTag = "${majorVersion}-${buildNumber}"
52 |
53 | if( !System.properties[ 'build.number' ] )
54 | releaseTag = "wip-${majorVersion}"
55 |
56 | ext.releaseVersion = majorVersion
57 |
58 | if( minorVersion )
59 | releaseVersion = "${releaseVersion}.${minorVersion}"
60 |
61 | if( !System.properties[ 'cascading-jdbc.release.final' ] )
62 | releaseVersion = "${releaseVersion}-${buildNumber}"
63 |
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | include 'cascading-jdbc-core'
22 | include 'cascading-jdbc-derby'
23 | include 'cascading-jdbc-h2'
24 |
25 | def optional = ["mysql", "postgresql", "oracle", "redshift", "teradata"]
26 |
27 | for ( dbsystem in optional ) {
28 | if ( System.getProperty( "cascading.jdbc.url.${dbsystem}" ) || System.getProperty( "dev" ) )
29 | include "cascading-jdbc-${dbsystem}"
30 | else
31 | logger.warn("excluding cascading-jdbc-${dbsystem} due to missing cascading.jdbc.url.${dbsystem} property")
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/version.properties:
--------------------------------------------------------------------------------
1 | cascading-jdbc.release.major=3.0
2 | cascading-jdbc.release.minor=0
3 |
--------------------------------------------------------------------------------