├── .gitignore ├── CHANGES.txt ├── README.md ├── build.gradle ├── cascading-jdbc-core ├── build.gradle └── src │ ├── main │ └── java │ │ └── cascading │ │ └── jdbc │ │ ├── InternalTypeMapping.java │ │ ├── JDBCFactory.java │ │ ├── JDBCScheme.java │ │ ├── JDBCTap.java │ │ ├── JDBCUtil.java │ │ ├── TableDesc.java │ │ ├── TupleRecord.java │ │ └── db │ │ ├── BatchProcessingException.java │ │ ├── DBConfiguration.java │ │ ├── DBInputFormat.java │ │ ├── DBOutputFormat.java │ │ └── DBWritable.java │ └── test │ ├── java │ └── cascading │ │ └── jdbc │ │ ├── GenericJdbcCheck.java │ │ ├── InternalTypeMappingTest.java │ │ ├── JDBCFactoryTest.java │ │ ├── JDBCSchemeTest.java │ │ ├── JDBCTestingBase.java │ │ ├── TableDescTest.java │ │ └── TupleRecordTest.java │ └── resources │ ├── data │ └── small.txt │ └── log4j.properties ├── cascading-jdbc-derby ├── build.gradle └── src │ ├── main │ ├── java │ │ └── cascading │ │ │ └── jdbc │ │ │ ├── DerbyFactory.java │ │ │ └── db │ │ │ └── DerbyDBInputFormat.java │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ └── DerbyTest.java ├── cascading-jdbc-h2 ├── build.gradle └── src │ ├── main │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ └── H2Test.java ├── cascading-jdbc-mysql ├── build.gradle └── src │ ├── main │ ├── java │ │ └── cascading │ │ │ └── jdbc │ │ │ ├── MySqlFactory.java │ │ │ ├── MySqlScheme.java │ │ │ └── db │ │ │ ├── MySqlDBConfiguration.java │ │ │ ├── MySqlDBInputFormat.java │ │ │ └── MySqlDBOutputFormat.java │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ └── MysqlTest.java ├── cascading-jdbc-oracle ├── build.gradle └── src │ ├── main │ ├── java │ │ └── cascading │ │ │ └── jdbc │ │ │ ├── OracleJDBCFactory.java │ │ │ └── db │ │ │ └── OracleDBInputFormat.java │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ ├── OracleJDBCFactoryTest.java │ └── OracleTest.java ├── cascading-jdbc-postgresql ├── build.gradle └── src │ ├── main │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ └── PostgresTest.java ├── cascading-jdbc-redshift ├── build.gradle └── src │ ├── main │ ├── java │ │ └── cascading │ │ │ └── jdbc │ │ │ ├── AWSCredentials.java │ │ │ ├── InvalidCodepointForRedshiftException.java │ │ │ ├── RedshiftFactory.java │ │ │ ├── RedshiftSafeDelimitedParser.java │ │ │ ├── RedshiftScheme.java │ │ │ ├── RedshiftTableDesc.java │ │ │ └── RedshiftTap.java │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ ├── RedshiftSafeDelimitedParserTest.java │ └── RedshiftTest.java ├── cascading-jdbc-teradata ├── build.gradle └── src │ ├── main │ ├── java │ │ └── cascading │ │ │ └── jdbc │ │ │ ├── TeradataJDBCFactory.java │ │ │ ├── TeradataTableDesc.java │ │ │ └── db │ │ │ └── TeradataDBInputFormat.java │ └── resources │ │ └── cascading │ │ └── bind │ │ └── provider.properties │ └── test │ └── java │ └── cascading │ └── jdbc │ ├── TeradataJDBCFactoryTest.java │ └── TeradataTest.java ├── etc ├── properties.gradle ├── s3Upload.gradle ├── synchronizer.properties └── version.gradle ├── settings.gradle └── version.properties /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | *.ipr 3 | *.iws 4 | /pom.xml 5 | *jar 6 | /lib 7 | /classes 8 | /native 9 | /.lein-failures 10 | /checkouts 11 | /.lein-deps-sum 12 | .gradle 13 | build 14 | .classpath 15 | .project 16 | .settings 17 | .idea 18 | /bin 19 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | Cascading-jdbc Change Log 2 | 3 | 3.0.0 4 | - updated to Cascading 3.0 5 | - support for Apache Tez 6 | - moved tests onto cascading-hadoop2-tez platform 7 | - major code restructuring to simplify database interactions 8 | - removed c.j.JDBCTapCollector and leave creation of RecordReader to the computational framework 9 | - removed deprecated code 10 | 11 | 2.7.0 12 | - update to Cascading 2.7 13 | - fix issue in Derby InputSplit handling 14 | - deprecated c.j.Utils, use c.u.Util instead 15 | 16 | 2.6.0 17 | - add cascading-jdbc-: to Cascading Frameworks property 18 | - enabled parameter sanitization for username and password 19 | - added support for Teradata (cascading-jdbc-teradata) 20 | - update to cascading 2.6 21 | 22 | 2.5.5 23 | - enable mysql streaming 24 | 25 | 2.5.4 26 | - update to cascading 2.5.5 27 | - fix a bug related to type handling and table creation 28 | 29 | 2.5.3 30 | - RedshiftScheme constructor is now accepting additional conditions 31 | - JDBCTap uses DatabaseMetaData to check if a table exists 32 | 33 | 2.5.2 34 | - upgrade redshift jdbc driver to 8.4-702.jdbc4 35 | - reduce size of test jars 36 | - replaceOnInsert support for MySQL 37 | - more robust query discovery 38 | 39 | 2.5.1 40 | - update to cascading 2.5.2 41 | - fix issue in batch processing 42 | - enable hadoop2-mr1 platform for the lingual providers 43 | 44 | 2.5.0 45 | - Support for Cascading 2.5.x 46 | - More robust batch processing 47 | - More robust check if a table exists 48 | - full support for SinkMode.UPDATE 49 | 50 | 2.2.0 51 | - initial release of Cascading 2.2 compatible version 52 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import java.text.SimpleDateFormat 22 | 23 | buildscript { 24 | repositories { 25 | mavenLocal() 26 | mavenCentral() 27 | jcenter() 28 | } 29 | dependencies { 30 | classpath 'com.netflix.nebula:gradle-extra-configurations-plugin:2.2.1' 31 | classpath 'com.github.jengelman.gradle.plugins:shadow:1.2.1' 32 | } 33 | } 34 | 35 | apply from: 'etc/version.gradle' 36 | 37 | version = releaseVersion 38 | 39 | // the version of hadoop used for compiling and testing 40 | ext.hadoopVersion = "2.6.0" 41 | ext.tezVersion = "0.6.1" 42 | 43 | ext.cascadingVersion = '3.0.0' 44 | ext.lingualVersion = '2.0.0-wip-+' 45 | 46 | ext.timestamp = new SimpleDateFormat( "yyyyMMdd" ).format( new Date() ) 47 | 48 | allprojects { 49 | 50 | group = 'cascading' 51 | version = releaseVersion 52 | 53 | repositories { 54 | mavenLocal() 55 | mavenCentral() 56 | maven{ url 'http://conjars.org/repo/' } 57 | maven{ url 'http://repo.pentaho.org/artifactory/repo/' } 58 | } 59 | 60 | apply plugin: 'java' 61 | apply plugin: 'idea' 62 | apply plugin: 'maven' 63 | apply plugin: 'eclipse' 64 | apply plugin: 'provided-base' 65 | apply plugin: 'com.github.johnrengelman.shadow' 66 | 67 | } 68 | 69 | subprojects{ 70 | 71 | dependencies { 72 | 73 | provided group: 'cascading', name: 'cascading-hadoop2-tez', version: cascadingVersion, changing: true 74 | provided group: 'org.slf4j', name: 'slf4j-api', version: '1.7.5' 75 | provided group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.7.5' 76 | provided( group: 'org.apache.hadoop', name: 'hadoop-common', version: hadoopVersion ) 77 | provided( group: 'org.apache.hadoop', name: 'hadoop-mapreduce-client-core', version: hadoopVersion ) 78 | testCompile group: 'junit', name: 'junit', version: '4.11' 79 | testCompile group: 'org.mockito', name: 'mockito-all', version: '1.9.5' 80 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests' 81 | testRuntime group: 'log4j', name: 'log4j', version: '1.2.17' 82 | 83 | // only used in tests, not in the production code. 84 | testCompile(group: 'cascading', name: 'lingual-core', version: lingualVersion ){ 85 | exclude group: 'xerces' 86 | } 87 | 88 | testCompile( group: 'org.apache.tez', name: 'tez-tests', version: tezVersion, classifier: 'tests' ) 89 | testCompile( group: 'org.apache.tez', name: 'tez-api', version: tezVersion ) 90 | testCompile( group: 'org.apache.tez', name: 'tez-dag', version: tezVersion ) 91 | testCompile( group: 'org.apache.hadoop', name: 'hadoop-minicluster', version: hadoopVersion ) 92 | } 93 | 94 | test { 95 | if ( System.getProperty('DEBUG', 'false') == 'true' ) { 96 | jvmArgs '-Xdebug', '-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005' 97 | } 98 | } 99 | 100 | task sourcesJar( type: Jar, dependsOn: classes ) { 101 | from sourceSets.main.allSource 102 | classifier = 'sources' 103 | } 104 | 105 | task javadocJar( type: Jar, dependsOn: javadoc ) { 106 | classifier = 'javadoc' 107 | from javadoc.destinationDir 108 | } 109 | 110 | task testsJar( type: Jar, dependsOn: testClasses ) { 111 | from sourceSets.test.output 112 | classifier = 'tests' 113 | } 114 | 115 | task testSourcesJar( type: Jar, dependsOn: classes ) { 116 | from sourceSets.test.allSource 117 | classifier = 'test-sources' 118 | } 119 | 120 | configurations { 121 | testArtifacts { 122 | extendsFrom testRuntime 123 | } 124 | } 125 | 126 | artifacts { 127 | archives jar 128 | archives shadowJar 129 | archives sourcesJar 130 | archives javadocJar 131 | archives testsJar 132 | archives testSourcesJar 133 | testArtifacts testsJar 134 | testArtifacts testSourcesJar 135 | } 136 | 137 | uploadArchives { 138 | 139 | def deployer = repositories.mavenDeployer { 140 | configuration = configurations.archives 141 | 142 | repository( url: repoUrl ) { 143 | authentication( userName: repoUserName, password: repoPassword ) 144 | } 145 | 146 | pom.project { 147 | description 'Cascading JDBC is a collection of adapters for JDBC.' 148 | inceptionYear '2013' 149 | url 'http://cascading.org/' 150 | scm { 151 | url 'https://github.com/Cascading/cascading-jdbc.git' 152 | } 153 | licenses { 154 | license { 155 | name 'The Apache Software License, Version 2.0' 156 | url 'http://www.apache.org/licenses/LICENSE-2.0.txt' 157 | distribution 'repo' 158 | } 159 | } 160 | } 161 | } 162 | [install.repositories.mavenInstaller, deployer]*.pom*.whenConfigured { pom -> 163 | def scopeOrder = [test: 1, runtime: 2, provided: 3, compile: 4].withDefault { 100 } 164 | pom.dependencies = pom.dependencies.sort { scopeOrder[it.scope] } 165 | } 166 | } 167 | 168 | jar { 169 | manifest { 170 | attributes( "Build-Date": "${timestamp}" ) 171 | } 172 | } 173 | 174 | shadowJar { 175 | classifier = "provider" 176 | configurations = [] 177 | from { 178 | project.configurations.runtime.minus( [ project.configurations.provided ] ) 179 | } 180 | mergeServiceFiles() 181 | 182 | exclude 'LICENSE' 183 | exclude 'META-INF/*.DSA' 184 | exclude 'META-INF/LICENSE*' 185 | } 186 | 187 | task createFrameworkProperties( dependsOn: processResources ) << { 188 | if ( project.name != "cascading-jdbc-core" ) { 189 | file( "${sourceSets.main.output.resourcesDir}/cascading" ).mkdirs() 190 | file( "${sourceSets.main.output.resourcesDir}/cascading/framework.properties").write("name=${project.name}:${version}\n" ) 191 | } 192 | } 193 | 194 | jar.dependsOn( createFrameworkProperties ) 195 | shadowJar.dependsOn( createFrameworkProperties ) 196 | 197 | } 198 | 199 | idea { 200 | module { 201 | downloadJavadoc = true 202 | downloadSources = true 203 | } 204 | } 205 | 206 | eclipse { 207 | classpath { 208 | defaultOutputDir = file( 'build' ) 209 | downloadSources = true 210 | downloadJavadoc = true 211 | } 212 | } 213 | 214 | -------------------------------------------------------------------------------- /cascading-jdbc-core/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | test{ 21 | scanForTestClasses = false 22 | exclude( "**/*Check.class" ) 23 | exclude( "**/*Base.class" ) 24 | } 25 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/InternalTypeMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import java.lang.reflect.Type; 23 | import java.sql.Date; 24 | import java.sql.Time; 25 | import java.sql.Timestamp; 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | import cascading.tuple.Fields; 30 | import cascading.tuple.type.CoercibleType; 31 | 32 | 33 | /** 34 | * Class that maps Java classes to SQL type definitions. When cascading-jdbc is 35 | * used from lingual, we might be presented with JVM internal classes and 36 | * {@link CoercibleType} instances of {@link Fields}, but we need to know the 37 | * more specific JDBC types, in order to write the tuple to the databases. This 38 | * class is doing an "educated guess" on the types. 39 | * 40 | * The rules are like this: 41 | *
 42 |  * int.class -> "int not null"
 43 |  * Integer.class -> "int"
 44 |  * String.class -> "varchar(256)"
 45 |  * long.class -> "int not null"
 46 |  * Long.class -> "int"
 47 |  * Time.class -> "time"
 48 |  * Date.class -> "date"
 49 |  * Timestamp.class -> "timestamp".
 50 |  * 
51 | * 52 | * 53 | * Coercibles are translated like this and then given to the translation above: 54 | * 55 | *
 56 |  * cascading.lingual.type.SQLDateCoercibleType -> java.sql.Date.class
 57 |  * cascading.lingual.type.SQLDateTimeCoercibleType -> java.sql.Date.class
 58 |  * cascading.lingual.type.SQLTimeCoercibleType -> java.sql.Time.class
 59 |  * cascading.lingual.type.SQLTimestampCoercibleType ->  java.sql.Timestamp.class
 60 |  * 
61 | * 62 | * All other {@link CoercibleType} instances are converted to String.class, since anything should 63 | * be coercible to a String. 64 | * 65 | */ 66 | public class InternalTypeMapping 67 | { 68 | private static final Map TYPES = new HashMap(); 69 | 70 | private static final Map NATIVE_TYPES = new HashMap(); 71 | 72 | static 73 | { 74 | TYPES.put( Integer.class, "int" ); 75 | TYPES.put( int.class, "int not null" ); 76 | TYPES.put( String.class, "varchar(256)" ); 77 | TYPES.put( Long.class, "int" ); 78 | TYPES.put( long.class, "int not null" ); 79 | TYPES.put( Time.class, "time" ); 80 | TYPES.put( Date.class, "date" ); 81 | TYPES.put( Timestamp.class, "timestamp" ); 82 | 83 | /* 84 | * we have no compile time dependency on lingual and we should never have 85 | * that, so we work around the types being unknown right now, by using class names. 86 | */ 87 | NATIVE_TYPES.put( "cascading.lingual.type.SQLDateCoercibleType", java.sql.Date.class ); 88 | NATIVE_TYPES.put( "cascading.lingual.type.SQLDateTimeCoercibleType", java.sql.Date.class ); 89 | NATIVE_TYPES.put( "cascading.lingual.type.SQLTimeCoercibleType", java.sql.Time.class ); 90 | NATIVE_TYPES.put( "cascading.lingual.type.SQLTimestampCoercibleType", java.sql.Timestamp.class ); 91 | 92 | } 93 | 94 | /** 95 | * Method to determine the correct type, that a field should be 96 | * coerced to, before writing it to the database. The method uses an internal 97 | * mapping. If no class can be found in the mapping, it will return 98 | * String.class; 99 | * 100 | * @param type The type of a {@link Fields} instance 101 | * @return a JVM internal type. 102 | */ 103 | public static Type findInternalType( Type type ) 104 | { 105 | if( ! ( type instanceof CoercibleType ) ) 106 | return type; 107 | 108 | CoercibleType coercible = (CoercibleType) type; 109 | Type nativeType = NATIVE_TYPES.get( coercible.getClass().getName() ); 110 | if( nativeType == null ) 111 | nativeType = String.class; 112 | return nativeType; 113 | 114 | } 115 | 116 | /** 117 | * Returns a mapping of a java class to a SQL type as a {@link String}. 118 | * 119 | * @param type The {@link Type} to find the mapping for. 120 | * 121 | * @throws IllegalArgumentException If no mapping can be found. 122 | * */ 123 | public static String sqltypeForClass( Type type ) 124 | { 125 | String sqlType = TYPES.get( type ); 126 | if( sqlType == null ) 127 | { 128 | Type nativeType = findInternalType( type ); 129 | sqlType = TYPES.get( nativeType ); 130 | if( sqlType == null ) 131 | throw new IllegalArgumentException( String.format( "cannot map type %s to a sql type", type ) ); 132 | } 133 | return sqlType; 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/JDBCFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import java.util.Properties; 23 | 24 | import org.apache.hadoop.mapred.InputFormat; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import cascading.jdbc.db.DBInputFormat; 29 | import cascading.jdbc.db.DBOutputFormat; 30 | import cascading.scheme.Scheme; 31 | import cascading.tap.SinkMode; 32 | import cascading.tap.Tap; 33 | import cascading.tuple.Fields; 34 | 35 | /** 36 | * {@link JDBCFactory} is a factory class that can be used by the lingual 37 | * provider mechanism to create {@link JDBCScheme}s and {@link JDBCTap}s. 38 | * 39 | * */ 40 | public class JDBCFactory 41 | { 42 | private static final Logger LOG = LoggerFactory.getLogger( JDBCFactory.class ); 43 | 44 | public static final String DEFAULT_SEPARATOR = ":"; 45 | 46 | public static final String PROTOCOL_JDBC_USER = "jdbcuser"; 47 | public static final String PROTOCOL_JDBC_PASSWORD = "jdbcpassword"; 48 | public static final String PROTOCOL_JDBC_DRIVER = "jdbcdriver"; 49 | 50 | public static final String PROTOCOL_FIELD_SEPARATOR = "tabledesc.separator"; 51 | public static final String PROTOCOL_TABLE_NAME = "tabledesc.tablename"; 52 | public static final String PROTOCOL_COLUMN_NAMES = "tabledesc.columnnames"; 53 | public static final String PROTOCOL_COLUMN_DEFS = "tabledesc.columndefs"; 54 | public static final String PROTOCOL_PRIMARY_KEYS = "tabledesc.primarykeys"; 55 | public static final String PROTOCOL_SINK_MODE = "sinkmode"; 56 | 57 | public static final String FORMAT_SEPARATOR = "separator"; 58 | public static final String FORMAT_COLUMNS = "columnnames"; 59 | public static final String FORMAT_ORDER_BY = "orderBy"; 60 | public static final String FORMAT_CONDITIONS = "conditions"; 61 | public static final String FORMAT_LIMIT = "limit"; 62 | public static final String FORMAT_UPDATE_BY = "updateBy"; 63 | public static final String FORMAT_TABLE_ALIAS = "tableAlias"; 64 | 65 | public static final String FORMAT_SELECT_QUERY = "selectQuery"; 66 | public static final String FORMAT_COUNT_QUERY = "countQuery"; 67 | 68 | /** 69 | * Creates a new Tap for the given arguments. 70 | * 71 | * @param protocol name of the protocol, only accepts "jdbc". 72 | * @param scheme a {@link JDBCScheme} instance. 73 | * @param identifier The identifier of the tap, which is assumed to be the 74 | * jdbc URL. 75 | * @param mode a {@link SinkMode}. All are supported. 76 | * @param properties The Properties object containing the table description, 77 | * optionally a jdbc user and a jdbc password. 78 | * @return a new {@link JDBCTap} instance. 79 | */ 80 | @SuppressWarnings("rawtypes") 81 | public Tap createTap( String protocol, Scheme scheme, String identifier, SinkMode mode, Properties properties ) 82 | { 83 | LOG.info( "creating jdbc protocol with properties {} in mode {}", properties, mode ); 84 | 85 | String driver = properties.getProperty( PROTOCOL_JDBC_DRIVER ); 86 | 87 | String jdbcUserProperty = properties.getProperty( PROTOCOL_JDBC_USER ); 88 | String jdbcPasswordProperty = properties.getProperty( PROTOCOL_JDBC_PASSWORD ); 89 | 90 | String jdbcUser = null; 91 | if( jdbcUserProperty != null && !jdbcUserProperty.isEmpty() ) 92 | jdbcUser = jdbcUserProperty; 93 | 94 | String jdbcPassword = null; 95 | if( jdbcPasswordProperty != null && !jdbcPasswordProperty.isEmpty() ) 96 | jdbcPassword = jdbcPasswordProperty; 97 | 98 | final TableDesc tableDesc = createTableDescFromProperties( properties ); 99 | 100 | JDBCScheme jdbcScheme = (JDBCScheme) scheme; 101 | 102 | /* 103 | * it is possible, that the schema information given via properties is 104 | * incomplete and therefore, we derive it from the given fields. We can only 105 | * do that, if we actually get meaningful fields. There is a second place, 106 | * where this happens, which is the presentSinkFields method of the 107 | * JDBCScheme. 108 | */ 109 | Fields sinkFields = jdbcScheme.getSinkFields(); 110 | if( !tableDesc.hasRequiredTableInformation() && sinkFields != Fields.UNKNOWN && sinkFields != Fields.ALL && sinkFields != null 111 | && sinkFields.getTypes() != null ) 112 | { 113 | LOG.debug( "tabledesc information incomplete, falling back to sink-fields {}", jdbcScheme.getSinkFields() ); 114 | tableDesc.completeFromFields( jdbcScheme.getSinkFields() ); 115 | ( (JDBCScheme) scheme ).setColumns( tableDesc.getColumnNames() ); 116 | } 117 | 118 | // users can overwrite the sink mode. 119 | String sinkModeProperty = properties.getProperty( PROTOCOL_SINK_MODE ); 120 | SinkMode userMode = mode; 121 | if( sinkModeProperty != null && !sinkModeProperty.isEmpty() ) 122 | userMode = SinkMode.valueOf( sinkModeProperty ); 123 | 124 | return new JDBCTap( identifier, jdbcUser, jdbcPassword, driver, tableDesc, jdbcScheme, userMode ); 125 | 126 | } 127 | 128 | /** 129 | * Creates a new {@link JDBCScheme} instance for the given format, fields and 130 | * properties. 131 | * 132 | * @param format The format of the scheme. This is JDBC driver dependent. 133 | * @param fields The fields to interact with. 134 | * @param properties The {@link Properties} object containing the necessary 135 | * information to construct a {@link JDBCScheme}. 136 | * @return a new {@link JDBCScheme} instance. 137 | */ 138 | public Scheme createScheme( String format, Fields fields, Properties properties ) 139 | { 140 | LOG.info( "creating {} format with properties {} and fields {}", format, properties, fields ); 141 | 142 | String selectQuery = properties.getProperty( FORMAT_SELECT_QUERY ); 143 | String countQuery = properties.getProperty( FORMAT_COUNT_QUERY ); 144 | String separator = properties.getProperty( FORMAT_SEPARATOR, DEFAULT_SEPARATOR ); 145 | long limit = -1; 146 | 147 | String limitProperty = properties.getProperty( FORMAT_LIMIT ); 148 | if( limitProperty != null && !limitProperty.isEmpty() ) 149 | limit = Long.parseLong( limitProperty ); 150 | 151 | String[] columnNames = getColumnNames(fields, properties, separator); 152 | 153 | boolean tableAlias = getTableAlias(properties); 154 | 155 | if( selectQuery != null ) 156 | { 157 | if( countQuery == null ) 158 | throw new IllegalArgumentException( "no count query for select query given" ); 159 | 160 | return createScheme( fields, selectQuery, countQuery, limit, columnNames, tableAlias ); 161 | } 162 | 163 | String conditions = properties.getProperty( FORMAT_CONDITIONS ); 164 | 165 | String updateByProperty = properties.getProperty( FORMAT_UPDATE_BY ); 166 | String[] updateBy = null; 167 | if( updateByProperty != null && !updateByProperty.isEmpty() ) 168 | updateBy = updateByProperty.split( separator ); 169 | 170 | Fields updateByFields = null; 171 | if( updateByProperty != null && !updateByProperty.isEmpty() ) 172 | updateByFields = new Fields( updateBy ); 173 | 174 | String[] orderBy = null; 175 | String orderByProperty = properties.getProperty( FORMAT_ORDER_BY ); 176 | if( orderByProperty != null && !orderByProperty.isEmpty() ) 177 | orderBy = orderByProperty.split( separator ); 178 | 179 | return createUpdatableScheme( fields, limit, columnNames, tableAlias, conditions, updateBy, updateByFields, orderBy ); 180 | 181 | } 182 | 183 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions, 184 | String[] updateBy, Fields updateByFields, String[] orderBy, Properties properties ) 185 | { 186 | return new JDBCScheme( getInputFormatClass(), getOutputFormClass(), fields, columnNames, orderBy, conditions, limit, updateByFields, 187 | updateBy, tableAlias ); 188 | } 189 | 190 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions, 191 | String[] updateBy, Fields updateByFields, String[] orderBy ) 192 | { 193 | return createUpdatableScheme( fields, limit, columnNames, tableAlias, conditions, updateBy, updateByFields, orderBy, new Properties() ); 194 | } 195 | 196 | protected Scheme createScheme( Fields fields, String selectQuery, String countQuery, long limit, String[] columnNames, boolean tableAlias ) 197 | { 198 | return new JDBCScheme( getInputFormatClass(), fields, columnNames, selectQuery, countQuery, limit, tableAlias ); 199 | } 200 | 201 | /** 202 | * Private helper method to extract values representing a {@link TableDesc} 203 | * instance from the properties passed to the createTap method. 204 | * 205 | * @param properties A properties instance. 206 | * @return A {@link TableDesc} instance. 207 | * 208 | */ 209 | protected TableDesc createTableDescFromProperties( Properties properties ) 210 | { 211 | String tableName = properties.getProperty( PROTOCOL_TABLE_NAME ); 212 | 213 | if( tableName == null || tableName.isEmpty() ) 214 | throw new IllegalArgumentException( "no tablename given" ); 215 | 216 | String separator = properties.getProperty( PROTOCOL_FIELD_SEPARATOR, DEFAULT_SEPARATOR ); 217 | 218 | String[] columnNames = null; 219 | String columnNamesProperty = properties.getProperty( PROTOCOL_COLUMN_NAMES ); 220 | if( columnNamesProperty != null && !columnNamesProperty.isEmpty() ) 221 | columnNames = columnNamesProperty.split( separator ); 222 | 223 | String[] columnDefs = null; 224 | String columnDefsProperty = properties.getProperty( PROTOCOL_COLUMN_DEFS ); 225 | if( columnDefsProperty != null && !columnDefsProperty.isEmpty() ) 226 | columnDefs = columnDefsProperty.split( separator ); 227 | 228 | String primaryKeysProperty = properties.getProperty( PROTOCOL_PRIMARY_KEYS ); 229 | 230 | String[] primaryKeys = null; 231 | 232 | if( primaryKeysProperty != null && !primaryKeysProperty.isEmpty() ) 233 | primaryKeys = primaryKeysProperty.split( separator ); 234 | 235 | TableDesc desc = new TableDesc( tableName, columnNames, columnDefs, primaryKeys ); 236 | return desc; 237 | } 238 | 239 | /** 240 | * Returns {@link DBInputFormat} class. This can be overwritten in subclasses, if they 241 | * have a custom {@link DBInputFormat}. 242 | * 243 | * @return the {@link InputFormat} to use. 244 | * */ 245 | protected Class getInputFormatClass() 246 | { 247 | return DBInputFormat.class; 248 | } 249 | 250 | /** 251 | * Returns {@link DBOutputFormat} class. This can be overwritten in subclasses, if they 252 | * have a custom {@link DBInputFormat}. 253 | * 254 | * @return the {@link InputFormat} to use. 255 | * */ 256 | protected Class getOutputFormClass() 257 | { 258 | return DBOutputFormat.class; 259 | } 260 | 261 | protected String[] getColumnNames( Fields fields, Properties properties, String separator ) 262 | { 263 | String[] columNames = null; 264 | String columnNamesProperty = properties.getProperty( FORMAT_COLUMNS ); 265 | if( columnNamesProperty != null && !columnNamesProperty.isEmpty() ) 266 | columNames = columnNamesProperty.split( separator ); 267 | else if( fields != null ) 268 | { 269 | columNames = new String[ fields.size() ]; 270 | for( int i = 0; i < fields.size(); i++ ) 271 | { 272 | Comparable cmp = fields.get( i ); 273 | columNames[ i ] = cmp.toString(); 274 | } 275 | } 276 | return columNames; 277 | } 278 | 279 | protected boolean getTableAlias( Properties properties ) 280 | { 281 | boolean tableAlias = false; 282 | String tableAliasProperty = properties.getProperty( FORMAT_TABLE_ALIAS ); 283 | if( tableAliasProperty != null ) 284 | Boolean.parseBoolean( tableAliasProperty ); 285 | 286 | return tableAlias; 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/JDBCUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.IOException; 24 | import java.sql.Connection; 25 | import java.sql.DatabaseMetaData; 26 | import java.sql.ResultSet; 27 | import java.sql.SQLException; 28 | import java.sql.Statement; 29 | import java.util.ArrayList; 30 | import java.util.Collections; 31 | import java.util.List; 32 | 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | 36 | /** 37 | * Utility class for interacting with databases via JDBC. 38 | */ 39 | public class JDBCUtil 40 | { 41 | /**Logger*/ 42 | private static final Logger LOG = LoggerFactory.getLogger( JDBCUtil.class ); 43 | 44 | /** 45 | * Method to check if a table exists in the database of the given Connection object 46 | * */ 47 | public static boolean tableExists( Connection connection, TableDesc tableDesc ) throws IOException 48 | { 49 | ResultSet tables = null; 50 | try 51 | { 52 | DatabaseMetaData dbm = connection.getMetaData(); 53 | tables = dbm.getTables( null, null, tableDesc.getTableName(), null ); 54 | if( tables.next() ) 55 | return true; 56 | tables.close(); 57 | // try again with upper case for oracle compatibility: 58 | // see http://stackoverflow.com/questions/2942788/check-if-table-exists 59 | tables = dbm.getTables( null, null, tableDesc.getTableName().toUpperCase(), null ); 60 | if( tables.next() ) 61 | return true; 62 | } 63 | catch( SQLException exception ) 64 | { 65 | throw new IOException( exception ); 66 | } 67 | finally 68 | { 69 | if( tables != null ) 70 | try 71 | { 72 | tables.close(); 73 | } 74 | catch( SQLException exception ) 75 | { 76 | throw new IOException( exception ); 77 | } 78 | } 79 | return false; 80 | } 81 | 82 | /** 83 | * Creates a table from the given table descriptor if it does not exist. 84 | * */ 85 | public static void createTableIfNotExists( Connection connection, TableDesc tableDesc ) throws IOException 86 | { 87 | if( tableExists( connection, tableDesc ) ) 88 | return; 89 | 90 | executeUpdate( connection, tableDesc.getCreateTableStatement() ); 91 | } 92 | 93 | /** 94 | * Executes the given sql query on the given Connection. 95 | * */ 96 | public static int executeUpdate( Connection connection, String updateString ) throws IOException 97 | { 98 | Statement statement = null; 99 | int result; 100 | try 101 | { 102 | LOG.info( "executing update: {}", updateString ); 103 | 104 | statement = connection.createStatement(); 105 | result = statement.executeUpdate( updateString ); 106 | 107 | connection.commit(); 108 | statement.close(); 109 | } 110 | catch( SQLException exception ) 111 | { 112 | throw new IOException( "SQL error code: " + exception.getErrorCode() + " executing update statement: " + updateString, exception ); 113 | } 114 | 115 | finally 116 | { 117 | try 118 | { 119 | if( statement != null ) 120 | statement.close(); 121 | } 122 | catch( SQLException exception ) 123 | { 124 | throw new IOException( exception ); 125 | } 126 | } 127 | return result; 128 | } 129 | 130 | /** 131 | * Drops the table described by the table descriptor if it exists. 132 | * */ 133 | public static void dropTable( Connection connection, TableDesc tableDesc ) throws IOException 134 | { 135 | if( tableExists( connection, tableDesc ) ) 136 | executeUpdate( connection, tableDesc.getTableDropStatement() ); 137 | } 138 | 139 | /** 140 | * Closes the given database connection. 141 | * */ 142 | public static void closeConnection( Connection connection ) throws IOException 143 | { 144 | if ( connection != null ) 145 | { 146 | try 147 | { 148 | if( connection.isClosed() ) 149 | return; 150 | connection.commit(); 151 | connection.close(); 152 | } 153 | catch( SQLException exception ) 154 | { 155 | throw new IOException( exception ); 156 | } 157 | } 158 | } 159 | 160 | /** 161 | * Method executeQuery allows for ad-hoc queries to be sent to the remote 162 | * RDBMS. A value of -1 for returnResults will return a List of all results 163 | * from the query, a value of 0 will return an empty List. 164 | * 165 | * @param queryString of type String 166 | * @param returnResults of type int 167 | * @return List 168 | */ public static List executeQuery( Connection connection, String queryString, int returnResults ) throws IOException 169 | { 170 | List result = Collections.emptyList(); 171 | 172 | LOG.info( "executing query: {}", queryString ); 173 | try( Statement statement = connection.createStatement() ) 174 | { 175 | ResultSet resultSet = statement.executeQuery( queryString ); 176 | 177 | if( returnResults != 0 ) 178 | result = copyResultSet( resultSet, returnResults ); 179 | 180 | connection.commit(); 181 | return result; 182 | } 183 | catch( SQLException exception ) 184 | { 185 | throw new IOException( exception ); 186 | } 187 | 188 | } 189 | 190 | /** 191 | * Helper method to copy a ResultSet into a List of Object arrays. 192 | * */ 193 | private static List copyResultSet( ResultSet resultSet, int length ) throws SQLException 194 | { 195 | List results = new ArrayList(); 196 | 197 | if( length == -1 ) 198 | length = Integer.MAX_VALUE; 199 | 200 | int size = resultSet.getMetaData().getColumnCount(); 201 | 202 | int count = 0; 203 | while( resultSet.next() && count < length ) 204 | { 205 | count++; 206 | Object[] row = new Object[ size ]; 207 | 208 | for( int i = 0; i < row.length; i++ ) 209 | row[ i ] = resultSet.getObject( i + 1 ); 210 | 211 | results.add( row ); 212 | } 213 | return results; 214 | } 215 | 216 | 217 | } 218 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/TableDesc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.Serializable; 24 | import java.lang.reflect.Type; 25 | import java.util.ArrayList; 26 | import java.util.Arrays; 27 | import java.util.List; 28 | 29 | import cascading.tuple.Fields; 30 | import cascading.util.Util; 31 | 32 | /** 33 | * Class TableDesc describes a SQL based table, this description is used by the 34 | * {@link JDBCTap} when creating a missing table and by the JDBCScheme, for the 35 | * correct type coercion. 36 | * 37 | * @see JDBCTap 38 | * @see JDBCScheme 39 | */ 40 | public class TableDesc implements Serializable 41 | { 42 | private static final long serialVersionUID = 5009899098019404131L; 43 | 44 | /** Field tableName */ 45 | String tableName; 46 | /** Field columnNames */ 47 | String[] columnNames; 48 | /** Field columnDefs */ 49 | String[] columnDefs; 50 | /** Field primaryKeys */ 51 | String[] primaryKeys; 52 | 53 | String tableExistsQuery; 54 | 55 | /** 56 | * Constructor TableDesc creates a new TableDesc instance. 57 | * 58 | * @param tableName of type String 59 | */ 60 | public TableDesc( String tableName ) 61 | { 62 | this.tableName = tableName; 63 | } 64 | 65 | /** 66 | * Constructor TableDesc creates a new TableDesc instance. 67 | * 68 | * @param tableName of type String 69 | * @param columnNames of type String[] 70 | * @param columnDefs of type String[] 71 | * @param primaryKeys of type String 72 | * @param tableExistsQuery of type String 73 | * 74 | * Constructor using a tableExistQuery is deprecated. The existance is determined via JDBC meta data now. 75 | * 76 | */ 77 | @Deprecated 78 | public TableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys, String tableExistsQuery ) 79 | { 80 | this.tableName = tableName; 81 | this.columnNames = columnNames; 82 | this.columnDefs = columnDefs; 83 | this.primaryKeys = primaryKeys; 84 | this.tableExistsQuery = tableExistsQuery; 85 | } 86 | 87 | /** 88 | * Constructor TableDesc creates a new TableDesc instance. 89 | * 90 | * @param tableName of type String 91 | * @param columnNames of type String[] 92 | * @param columnDefs of type String[] 93 | * @param primaryKeys of type String 94 | * 95 | */ 96 | public TableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys ) 97 | { 98 | this.tableName = tableName; 99 | this.columnNames = columnNames; 100 | this.columnDefs = columnDefs; 101 | this.primaryKeys = primaryKeys; 102 | } 103 | 104 | public String getTableName() 105 | { 106 | return tableName; 107 | } 108 | 109 | public String[] getColumnNames() 110 | { 111 | return columnNames; 112 | } 113 | 114 | public String[] getColumnDefs() 115 | { 116 | return columnDefs; 117 | } 118 | 119 | public String[] getPrimaryKeys() 120 | { 121 | return primaryKeys; 122 | } 123 | 124 | /** 125 | * Method getTableCreateStatement returns the tableCreateStatement of this 126 | * TableDesc object. 127 | * 128 | * @return the tableCreateStatement (type String) of this TableDesc object. 129 | */ 130 | public String getCreateTableStatement() 131 | { 132 | List createTableStatement = new ArrayList(); 133 | 134 | createTableStatement = addCreateTableBodyTo( createTableStatement ); 135 | 136 | return String.format( getCreateTableFormat(), tableName, Util.join( createTableStatement, ", " ) ); 137 | } 138 | 139 | protected List addCreateTableBodyTo( List createTableStatement ) 140 | { 141 | createTableStatement = addDefinitionsTo( createTableStatement ); 142 | createTableStatement = addPrimaryKeyTo( createTableStatement ); 143 | 144 | return createTableStatement; 145 | } 146 | 147 | protected String getCreateTableFormat() 148 | { 149 | return "CREATE TABLE %s ( %s )"; 150 | } 151 | 152 | protected List addDefinitionsTo( List createTableStatement ) 153 | { 154 | for( int i = 0; i < columnNames.length; i++ ) 155 | { 156 | String columnName = columnNames[ i ]; 157 | String columnDef = columnDefs[ i ]; 158 | 159 | createTableStatement.add( columnName + " " + columnDef ); 160 | } 161 | 162 | return createTableStatement; 163 | } 164 | 165 | protected List addPrimaryKeyTo( List createTableStatement ) 166 | { 167 | if( hasPrimaryKey() ) 168 | createTableStatement.add( String.format( "PRIMARY KEY( %s )", Util.join( primaryKeys, ", " ) ) ); 169 | 170 | return createTableStatement; 171 | } 172 | 173 | /** 174 | * Method getTableDropStatement returns the tableDropStatement of this 175 | * TableDesc object. 176 | * 177 | * @return the tableDropStatement (type String) of this TableDesc object. 178 | */ 179 | public String getTableDropStatement() 180 | { 181 | return String.format( getDropTableFormat(), tableName ); 182 | } 183 | 184 | protected String getDropTableFormat() 185 | { 186 | return "DROP TABLE %s"; 187 | } 188 | 189 | 190 | private boolean hasPrimaryKey() 191 | { 192 | return primaryKeys != null && primaryKeys.length != 0; 193 | } 194 | 195 | /** 196 | * Determines if the instance has a useful tablename, columns and column 197 | * descriptions set. Useful means, that they are non-null and not empty and 198 | * for each column, there is a type definition. 199 | * 200 | * @return Returns true if all requirements are met, otherwise 201 | * false. 202 | * */ 203 | public boolean hasRequiredTableInformation() 204 | { 205 | return tableName != null && !tableName.isEmpty() && columnNames != null && columnNames.length > 0 && columnDefs != null 206 | && columnNames.length > 0 && columnDefs.length == columnNames.length ; 207 | } 208 | 209 | /** 210 | * This method can be used to fill in the required column names and 211 | * descriptions from a Fields instance. This can be useful, when the types can 212 | * only be determined after a flow has been started. This mechanism can be 213 | * used to simplify the usage of the {@link JDBCTap} and {@link JDBCScheme} as 214 | * a provider. The method may throw an {@link IllegalStateException} if the 215 | * fields are insufficient to determine the correct types. 216 | * 217 | * 218 | * @param fields The {@link Fields} instance to derive the table structure 219 | * from. 220 | * 221 | * @throws IllegalArgumentException In case the instance is still incomplete, 222 | * after trying to determine the table structure fromt he given 223 | * fields. 224 | */ 225 | public void completeFromFields( Fields fields ) 226 | { 227 | if( !hasRequiredTableInformation() ) 228 | { 229 | List names = new ArrayList(); 230 | List defs = new ArrayList(); 231 | 232 | for( int i = 0; i < fields.size(); i++ ) 233 | { 234 | Comparable cmp = fields.get( i ); 235 | names.add( cmp.toString() ); 236 | Type internalType = InternalTypeMapping.findInternalType( fields.getType( i ) ); 237 | defs.add( InternalTypeMapping.sqltypeForClass( internalType ) ); 238 | } 239 | if( columnNames == null || columnNames.length == 0 ) 240 | columnNames = names.toArray( new String[ names.size() ] ); 241 | if( columnDefs == null || columnDefs.length == 0 ) 242 | columnDefs = defs.toArray( new String[ defs.size() ] ); 243 | 244 | // now it has to be complete and usable, if not bail out. 245 | if( !hasRequiredTableInformation() ) 246 | throw new IllegalStateException( "could not derive TableDesc from given fields." ); 247 | } 248 | } 249 | 250 | @Override 251 | public String toString() 252 | { 253 | return "TableDesc{" + "tableName='" + tableName + '\'' + ", columnNames=" 254 | + ( columnNames == null ? null : Arrays.asList( columnNames ) ) + ", columnDefs=" 255 | + ( columnDefs == null ? null : Arrays.asList( columnDefs ) ) + ", primaryKeys=" 256 | + ( primaryKeys == null ? null : Arrays.asList( primaryKeys ) ) + '}'; 257 | } 258 | 259 | @Override 260 | public boolean equals( Object object ) 261 | { 262 | if( this == object ) 263 | return true; 264 | if( ! ( object instanceof TableDesc ) ) 265 | return false; 266 | 267 | TableDesc tableDesc = (TableDesc) object; 268 | 269 | if( !Arrays.equals( columnDefs, tableDesc.columnDefs ) ) 270 | return false; 271 | if( !Arrays.equals( columnNames, tableDesc.columnNames ) ) 272 | return false; 273 | if( !Arrays.equals( primaryKeys, tableDesc.primaryKeys ) ) 274 | return false; 275 | if( tableName != null ? !tableName.equals( tableDesc.tableName ) : tableDesc.tableName != null ) 276 | return false; 277 | 278 | return true; 279 | } 280 | 281 | @Override 282 | public int hashCode() 283 | { 284 | int result = tableName != null ? tableName.hashCode() : 0; 285 | result = 31 * result + ( columnNames != null ? Arrays.hashCode( columnNames ) : 0 ); 286 | result = 31 * result + ( columnDefs != null ? Arrays.hashCode( columnDefs ) : 0 ); 287 | result = 31 * result + ( primaryKeys != null ? Arrays.hashCode( primaryKeys ) : 0 ); 288 | return result; 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/TupleRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 Concurrent, Inc. 3 | * 4 | * This work has been released into the public domain 5 | * by the copyright holder. This applies worldwide. 6 | * 7 | * In case this is not legally possible: 8 | * The copyright holder grants any entity the right 9 | * to use this work for any purpose, without any 10 | * conditions, unless such conditions are required by law. 11 | */ 12 | 13 | package cascading.jdbc; 14 | 15 | import cascading.jdbc.db.DBWritable; 16 | import cascading.tuple.Tuple; 17 | 18 | import java.sql.PreparedStatement; 19 | import java.sql.ResultSet; 20 | import java.sql.SQLException; 21 | 22 | public class TupleRecord implements DBWritable 23 | { 24 | private Tuple tuple; 25 | 26 | public TupleRecord() 27 | { 28 | } 29 | 30 | public TupleRecord( Tuple tuple ) 31 | { 32 | this.tuple = tuple; 33 | } 34 | 35 | public void setTuple( Tuple tuple ) 36 | { 37 | this.tuple = tuple; 38 | } 39 | 40 | public Tuple getTuple() 41 | { 42 | return tuple; 43 | } 44 | 45 | public void write( PreparedStatement statement ) throws SQLException 46 | { 47 | for( int i = 0; i < tuple.size(); i++ ) 48 | statement.setObject( i + 1, tuple.getObject( i ) ); 49 | } 50 | 51 | public void readFields( ResultSet resultSet ) throws SQLException 52 | { 53 | tuple = new Tuple(); 54 | 55 | for( int i = 0; i < resultSet.getMetaData().getColumnCount(); i++ ) 56 | tuple.add( resultSet.getObject( i + 1 ) ); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/db/BatchProcessingException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import java.sql.SQLException; 24 | 25 | /** 26 | * 27 | */ 28 | public class BatchProcessingException extends SQLException 29 | { 30 | public BatchProcessingException( String message ) 31 | { 32 | super( message ); 33 | } 34 | 35 | public BatchProcessingException( String message, Throwable cause ) 36 | { 37 | super( message, cause ); 38 | } 39 | 40 | public BatchProcessingException( Throwable cause ) 41 | { 42 | super( cause ); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/db/DBConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | /** 22 | * Licensed to the Apache Software Foundation (ASF) under one 23 | * or more contributor license agreements. See the NOTICE file 24 | * distributed with this work for additional information 25 | * regarding copyright ownership. The ASF licenses this file 26 | * to you under the Apache License, Version 2.0 (the 27 | * "License"); you may not use this file except in compliance 28 | * with the License. You may obtain a copy of the License at 29 | * 30 | * http://www.apache.org/licenses/LICENSE-2.0 31 | * 32 | * Unless required by applicable law or agreed to in writing, software 33 | * distributed under the License is distributed on an "AS IS" BASIS, 34 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 35 | * See the License for the specific language governing permissions and 36 | * limitations under the License. 37 | */ 38 | 39 | package cascading.jdbc.db; 40 | 41 | import java.io.IOException; 42 | import java.sql.Connection; 43 | import java.sql.DriverManager; 44 | import java.sql.SQLException; 45 | 46 | import cascading.jdbc.TableDesc; 47 | import cascading.util.Util; 48 | import org.apache.commons.logging.Log; 49 | import org.apache.commons.logging.LogFactory; 50 | import org.apache.hadoop.conf.Configuration; 51 | 52 | /** 53 | * A container for configuration property names for jobs with DB input/output.
The configuration can be 54 | * configured using the static methods in this class, {@link DBInputFormat}, and {@link 55 | * DBOutputFormat}.

Alternatively, the properties can be set in the configuration with proper 56 | * values. 57 | */ 58 | public class DBConfiguration 59 | { 60 | 61 | private static final Log LOG = LogFactory.getLog( DBConfiguration.class ); 62 | 63 | private static final String SEPARATOR = ":"; 64 | 65 | /** The JDBC Driver class name */ 66 | public static final String DRIVER_CLASS_PROPERTY = "mapred.jdbc.driver.class"; 67 | 68 | /** JDBC Database access URL */ 69 | public static final String URL_PROPERTY = "mapred.jdbc.url"; 70 | 71 | /** User name to access the database */ 72 | public static final String USERNAME_PROPERTY = "mapred.jdbc.username"; 73 | 74 | /** Password to access the database */ 75 | public static final String PASSWORD_PROPERTY = "mapred.jdbc.password"; 76 | 77 | /** Input table name */ 78 | public static final String INPUT_TABLE_NAME_PROPERTY = "mapred.jdbc.input.table.name"; 79 | 80 | /** Field names in the Input table */ 81 | public static final String INPUT_FIELD_NAMES_PROPERTY = "mapred.jdbc.input.field.names"; 82 | 83 | /** WHERE clause in the input SELECT statement */ 84 | public static final String INPUT_CONDITIONS_PROPERTY = "mapred.jdbc.input.conditions"; 85 | 86 | /** ORDER BY clause in the input SELECT statement */ 87 | public static final String INPUT_ORDER_BY_PROPERTY = "mapred.jdbc.input.orderby"; 88 | 89 | /** Whole input query, exluding LIMIT...OFFSET */ 90 | public static final String INPUT_QUERY = "mapred.jdbc.input.query"; 91 | 92 | /** The number of records to LIMIT, useful for testing */ 93 | public static final String INPUT_LIMIT = "mapred.jdbc.input.limit"; 94 | 95 | /** Input query to get the count of records */ 96 | public static final String INPUT_COUNT_QUERY = "mapred.jdbc.input.count.query"; 97 | 98 | /** Class name implementing DBWritable which will hold input tuples */ 99 | public static final String INPUT_CLASS_PROPERTY = "mapred.jdbc.input.class"; 100 | 101 | /** Boolean to include table name alias in the input SELECT statement */ 102 | public static final String INPUT_TABLE_ALIAS = "mapred.jdbc.input.table.alias"; 103 | 104 | /** Output table name */ 105 | public static final String OUTPUT_TABLE_NAME_PROPERTY = "mapred.jdbc.output.table.name"; 106 | 107 | /** Field names in the Output table */ 108 | public static final String OUTPUT_FIELD_NAMES_PROPERTY = "mapred.jdbc.output.field.names"; 109 | 110 | /** Field types in the Output table */ 111 | public static final String OUTPUT_FIELD_TYPES_PROPERTY = "mapred.jdbc.output.field.types"; 112 | 113 | /** Field types in the Output table */ 114 | public static final String OUTPUT_PRIMARY_KEYS_PROPERTY = "mapred.jdbc.output.tableprimarykeys"; 115 | 116 | /** Field names in the Output table */ 117 | public static final String OUTPUT_UPDATE_FIELD_NAMES_PROPERTY = "mapred.jdbc.output.update.field.names"; 118 | 119 | /** The number of statements to batch before executing */ 120 | public static final String BATCH_STATEMENTS_PROPERTY = "mapred.jdbc.batch.statements.num"; 121 | 122 | /** The number of splits allowed, becomes max concurrent reads. */ 123 | public static final String CONCURRENT_READS_PROPERTY = "mapred.jdbc.concurrent.reads.num"; 124 | 125 | private Configuration configuration; 126 | 127 | DBConfiguration( Configuration job ) 128 | { 129 | this.configuration = job; 130 | } 131 | 132 | /** 133 | * Sets the DB access related fields in the Configuration. 134 | * 135 | * @param job the configuration 136 | * @param driverClass JDBC Driver class name 137 | * @param dbUrl JDBC DB access URL. 138 | * @param userName DB access username 139 | * @param passwd DB access passwd 140 | */ 141 | public static void configureDB( Configuration job, String driverClass, String dbUrl, String userName, String passwd ) 142 | { 143 | job.set( DRIVER_CLASS_PROPERTY, driverClass ); 144 | job.set( URL_PROPERTY, dbUrl ); 145 | 146 | if( userName != null ) 147 | job.set( USERNAME_PROPERTY, userName ); 148 | 149 | if( passwd != null ) 150 | job.set( PASSWORD_PROPERTY, passwd ); 151 | } 152 | 153 | /** 154 | * Sets the DB access related fields in the Configuration. 155 | * 156 | * @param job the configuration 157 | * @param driverClass JDBC Driver class name 158 | * @param dbUrl JDBC DB access URL. 159 | */ 160 | public static void configureDB( Configuration job, String driverClass, String dbUrl ) 161 | { 162 | configureDB( job, driverClass, dbUrl, null, null ); 163 | } 164 | 165 | /** 166 | * Returns a connection object to the DB 167 | * 168 | * @throws ClassNotFoundException 169 | * @throws SQLException 170 | */ 171 | Connection getConnection() throws IOException 172 | { 173 | try 174 | { 175 | Class.forName( configuration.get( DBConfiguration.DRIVER_CLASS_PROPERTY ) ); 176 | } 177 | catch( ClassNotFoundException exception ) 178 | { 179 | throw new IOException( "unable to load database driver", exception ); 180 | } 181 | LOG.info( "opening db connection: " + configuration.get( DBConfiguration.URL_PROPERTY ) ); 182 | try 183 | { 184 | if( configuration.get( DBConfiguration.USERNAME_PROPERTY ) == null ) 185 | return DriverManager.getConnection( configuration.get( DBConfiguration.URL_PROPERTY ) ); 186 | 187 | else 188 | { 189 | return DriverManager.getConnection( configuration.get( DBConfiguration.URL_PROPERTY ), 190 | configuration.get( DBConfiguration.USERNAME_PROPERTY ), 191 | configuration.get( DBConfiguration.PASSWORD_PROPERTY ) ); 192 | } 193 | } 194 | catch( SQLException exception ) 195 | { 196 | throw new IOException( "unable to create connection", exception ); 197 | } 198 | } 199 | 200 | String getInputTableName() 201 | { 202 | return configuration.get( DBConfiguration.INPUT_TABLE_NAME_PROPERTY ); 203 | } 204 | 205 | void setInputTableName( String tableName ) 206 | { 207 | configuration.set( DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName ); 208 | } 209 | 210 | String[] getInputFieldNames() 211 | { 212 | return configuration.getStrings( DBConfiguration.INPUT_FIELD_NAMES_PROPERTY ); 213 | } 214 | 215 | void setInputFieldNames( String... fieldNames ) 216 | { 217 | configuration.setStrings( DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, fieldNames ); 218 | } 219 | 220 | String getInputConditions() 221 | { 222 | return configuration.get( DBConfiguration.INPUT_CONDITIONS_PROPERTY ); 223 | } 224 | 225 | void setInputConditions( String conditions ) 226 | { 227 | if( conditions != null && conditions.length() > 0 ) 228 | { 229 | configuration.set( DBConfiguration.INPUT_CONDITIONS_PROPERTY, conditions ); 230 | } 231 | } 232 | 233 | String getInputOrderBy() 234 | { 235 | return configuration.get( DBConfiguration.INPUT_ORDER_BY_PROPERTY ); 236 | } 237 | 238 | void setInputOrderBy( String orderby ) 239 | { 240 | if( orderby != null && orderby.length() > 0 ) 241 | { 242 | configuration.set( DBConfiguration.INPUT_ORDER_BY_PROPERTY, orderby ); 243 | } 244 | } 245 | 246 | Boolean getTableAlias() 247 | { 248 | return configuration.getBoolean( DBConfiguration.INPUT_TABLE_ALIAS, true ); 249 | } 250 | 251 | void setTableAlias( Boolean alias ) 252 | { 253 | configuration.setBoolean( DBConfiguration.INPUT_TABLE_ALIAS, alias ); 254 | } 255 | 256 | String getInputQuery() 257 | { 258 | return configuration.get( DBConfiguration.INPUT_QUERY ); 259 | } 260 | 261 | void setInputQuery( String query ) 262 | { 263 | if( query != null && query.length() > 0 ){ configuration.set( DBConfiguration.INPUT_QUERY, query ); } 264 | } 265 | 266 | long getInputLimit() 267 | { 268 | return configuration.getLong( DBConfiguration.INPUT_LIMIT, -1 ); 269 | } 270 | 271 | void setInputLimit( long limit ) 272 | { 273 | configuration.setLong( DBConfiguration.INPUT_LIMIT, limit ); 274 | } 275 | 276 | String getInputCountQuery() 277 | { 278 | return configuration.get( DBConfiguration.INPUT_COUNT_QUERY ); 279 | } 280 | 281 | void setInputCountQuery( String query ) 282 | { 283 | if( query != null && query.length() > 0 ) 284 | { 285 | configuration.set( DBConfiguration.INPUT_COUNT_QUERY, query ); 286 | } 287 | } 288 | 289 | Class getInputClass() 290 | { 291 | return configuration 292 | .getClass( DBConfiguration.INPUT_CLASS_PROPERTY, DBInputFormat.NullDBWritable.class ); 293 | } 294 | 295 | void setInputClass( Class inputClass ) 296 | { 297 | configuration.setClass( DBConfiguration.INPUT_CLASS_PROPERTY, inputClass, DBWritable.class ); 298 | } 299 | 300 | String getOutputTableName() 301 | { 302 | return configuration.get( DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY ); 303 | } 304 | 305 | void setOutputTableName( String tableName ) 306 | { 307 | configuration.set( DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName ); 308 | } 309 | 310 | String[] getOutputFieldNames() 311 | { 312 | return configuration.getStrings( DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY ); 313 | } 314 | 315 | void setOutputFieldNames( String... fieldNames ) 316 | { 317 | configuration.setStrings( DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, fieldNames ); 318 | } 319 | 320 | String[] getOutputFieldTypes() 321 | { 322 | return configuration.get( DBConfiguration.OUTPUT_FIELD_TYPES_PROPERTY ).split( SEPARATOR ); 323 | } 324 | 325 | void setOutputFieldTypes( String... fieldTypes ) 326 | { 327 | configuration.set( DBConfiguration.OUTPUT_FIELD_TYPES_PROPERTY, Util.join( fieldTypes, SEPARATOR ) ); 328 | } 329 | 330 | String[] getOutputUpdateFieldNames() 331 | { 332 | return configuration.getStrings( DBConfiguration.OUTPUT_UPDATE_FIELD_NAMES_PROPERTY ); 333 | } 334 | 335 | void setOutputUpdateFieldNames( String... fieldNames ) 336 | { 337 | configuration.setStrings( DBConfiguration.OUTPUT_UPDATE_FIELD_NAMES_PROPERTY, fieldNames ); 338 | } 339 | 340 | int getBatchStatementsNum() 341 | { 342 | return configuration.getInt( DBConfiguration.BATCH_STATEMENTS_PROPERTY, 1000 ); 343 | } 344 | 345 | void setBatchStatementsNum( int batchStatementsNum ) 346 | { 347 | configuration.setInt( DBConfiguration.BATCH_STATEMENTS_PROPERTY, batchStatementsNum ); 348 | } 349 | 350 | String [] getOutputPrimaryKeys() 351 | { 352 | String primaryKeys = configuration.get( OUTPUT_PRIMARY_KEYS_PROPERTY ); 353 | if( primaryKeys == null ) 354 | return null; 355 | return primaryKeys.split( SEPARATOR ); 356 | } 357 | 358 | void setOutputPrimaryKeys( String [] primaryKeys ) 359 | { 360 | if( primaryKeys != null ) 361 | configuration.set( OUTPUT_PRIMARY_KEYS_PROPERTY, Util.join( primaryKeys, SEPARATOR ) ); 362 | } 363 | 364 | int getMaxConcurrentReadsNum() 365 | { 366 | return configuration.getInt( DBConfiguration.CONCURRENT_READS_PROPERTY, 0 ); 367 | } 368 | 369 | void setMaxConcurrentReadsNum( int maxConcurrentReads ) 370 | { 371 | if( maxConcurrentReads < 0 ) 372 | throw new IllegalArgumentException( "maxConcurrentReads must be a positive value" ); 373 | 374 | configuration.setInt( DBConfiguration.CONCURRENT_READS_PROPERTY, maxConcurrentReads ); 375 | } 376 | 377 | TableDesc toTableDesc() 378 | { 379 | return new TableDesc( getOutputTableName(), getOutputFieldNames(), getOutputFieldTypes(), getOutputPrimaryKeys() ); 380 | } 381 | 382 | } 383 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/main/java/cascading/jdbc/db/DBWritable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 Concurrent, Inc. 3 | * 4 | * This work has been released into the public domain 5 | * by the copyright holder. This applies worldwide. 6 | * 7 | * In case this is not legally possible: 8 | * The copyright holder grants any entity the right 9 | * to use this work for any purpose, without any 10 | * conditions, unless such conditions are required by law. 11 | */ 12 | 13 | package cascading.jdbc.db; 14 | 15 | import org.apache.hadoop.io.Writable; 16 | 17 | import java.sql.PreparedStatement; 18 | import java.sql.ResultSet; 19 | import java.sql.SQLException; 20 | 21 | /** 22 | * Objects that are read from/written to a database should implement DBWritable. 23 | * DBWritable, is similar to {@link Writable} except that the {@link #write(PreparedStatement)} 24 | * method takes a {@link PreparedStatement}, and {@link #readFields(ResultSet)} takes a {@link 25 | * ResultSet}.

Implementations are responsible for writing the fields of the object to 26 | * PreparedStatement, and reading the fields of the object from the ResultSet.

Example:

27 | * If we have the following table in the database : 28 | *
29 |  * CREATE TABLE MyTable (
30 |  *   counter        INTEGER NOT NULL,
31 |  *   timestamp      BIGINT  NOT NULL,
32 |  * );
33 |  * 
34 | * then we can read/write the tuples from/to the table with : 35 | *

36 |  * public class MyWritable implements Writable, DBWritable {
37 |  *   // Some data
38 |  *   private int counter;
39 |  *   private long timestamp;
40 |  * 

41 | * //Writable#write() implementation 42 | * public void write(DataOutput out) throws IOException { 43 | * out.writeInt(counter); 44 | * out.writeLong(timestamp); 45 | * } 46 | *

47 | * //Writable#readFields() implementation 48 | * public void readFields(DataInput in) throws IOException { 49 | * counter = in.readInt(); 50 | * timestamp = in.readLong(); 51 | * } 52 | *

53 | * public void write(PreparedStatement statement) throws SQLException { 54 | * statement.setInt(1, counter); 55 | * statement.setLong(2, timestamp); 56 | * } 57 | *

58 | * public void readFields(ResultSet resultSet) throws SQLException { 59 | * counter = resultSet.getInt(1); 60 | * timestamp = resultSet.getLong(2); 61 | * } 62 | * } 63 | *

64 | */ 65 | public interface DBWritable { 66 | 67 | /** 68 | * Sets the fields of the object in the {@link PreparedStatement}. 69 | * 70 | * @param statement the statement that the fields are put into. 71 | * @throws SQLException 72 | */ 73 | public void write(PreparedStatement statement) throws SQLException; 74 | 75 | /** 76 | * Reads the fields of the object from the {@link ResultSet}. 77 | * 78 | * @param resultSet the {@link ResultSet} to get the fields from. 79 | * @throws SQLException 80 | */ 81 | public void readFields(ResultSet resultSet) throws SQLException; 82 | 83 | } 84 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/GenericJdbcCheck.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import static org.junit.Assert.fail; 23 | 24 | import org.junit.Before; 25 | 26 | public class GenericJdbcCheck extends JDBCTestingBase 27 | { 28 | 29 | public final static String JDBC_URL_PROPERTY_NAME = "cascading.jdbcurl"; 30 | 31 | public final static String JDBC_DRIVER_PROPERTY_NAME = "cascading.jdbcdriver"; 32 | 33 | @Before 34 | public void setUp() 35 | { 36 | if ( System.getProperty( JDBC_DRIVER_PROPERTY_NAME ) == null || System.getProperty( JDBC_URL_PROPERTY_NAME ) == null ) 37 | fail( String.format( "please set the '%s' and '%s' system properties", JDBC_DRIVER_PROPERTY_NAME, JDBC_URL_PROPERTY_NAME ) ); 38 | 39 | setJdbcurl( System.getProperty( JDBC_URL_PROPERTY_NAME ) ); 40 | setDriverName( System.getProperty( JDBC_DRIVER_PROPERTY_NAME ) ); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/InternalTypeMappingTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import static org.junit.Assert.*; 23 | 24 | import java.sql.Date; 25 | import java.sql.Time; 26 | import java.sql.Timestamp; 27 | 28 | import org.junit.Test; 29 | 30 | import cascading.lingual.type.SQLDateCoercibleType; 31 | import cascading.lingual.type.SQLTimeCoercibleType; 32 | import cascading.lingual.type.SQLTimestampCoercibleType; 33 | 34 | public class InternalTypeMappingTest 35 | { 36 | 37 | @Test 38 | public void testMappings() 39 | { 40 | assertEquals( "int not null", InternalTypeMapping.sqltypeForClass( int.class ) ); 41 | assertEquals( "int not null", InternalTypeMapping.sqltypeForClass( long.class ) ); 42 | assertEquals( "int", InternalTypeMapping.sqltypeForClass( Integer.class ) ); 43 | assertEquals( "int", InternalTypeMapping.sqltypeForClass( Long.class ) ); 44 | assertEquals( "varchar(256)", InternalTypeMapping.sqltypeForClass( String.class ) ); 45 | assertEquals( "timestamp", InternalTypeMapping.sqltypeForClass( Timestamp.class ) ); 46 | assertEquals( "time", InternalTypeMapping.sqltypeForClass( Time.class ) ); 47 | assertEquals( "date", InternalTypeMapping.sqltypeForClass( Date.class ) ); 48 | } 49 | 50 | @Test(expected = IllegalArgumentException.class) 51 | public void testUnknownClass() 52 | { 53 | InternalTypeMapping.sqltypeForClass( boolean.class ); 54 | } 55 | 56 | @Test 57 | public void testTypeWithCoercibles() 58 | { 59 | assertEquals( "date", InternalTypeMapping.sqltypeForClass( new SQLDateCoercibleType() ) ); 60 | assertEquals( "time", InternalTypeMapping.sqltypeForClass( new SQLTimeCoercibleType() ) ); 61 | assertEquals( "timestamp", InternalTypeMapping.sqltypeForClass( new SQLTimestampCoercibleType() ) ); 62 | } 63 | } -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/JDBCFactoryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import static org.junit.Assert.assertArrayEquals; 24 | import static org.junit.Assert.assertEquals; 25 | import static org.junit.Assert.assertNotNull; 26 | import static org.mockito.Mockito.mock; 27 | import static org.mockito.Mockito.when; 28 | 29 | import java.util.Properties; 30 | 31 | import org.junit.Test; 32 | 33 | import cascading.jdbc.JDBCFactory; 34 | import cascading.jdbc.JDBCScheme; 35 | import cascading.jdbc.JDBCTap; 36 | import cascading.jdbc.TableDesc; 37 | import cascading.jdbc.db.DBInputFormat; 38 | import cascading.scheme.Scheme; 39 | import cascading.tap.SinkMode; 40 | import cascading.tuple.Fields; 41 | 42 | /** 43 | * Tests for {@link JDBCFactory}. 44 | * 45 | * */ 46 | public class JDBCFactoryTest 47 | { 48 | 49 | @Test(expected = IllegalArgumentException.class) 50 | public void testCreateTapNoTableName() 51 | { 52 | String protocol = "jdbc"; 53 | String identifier = "jdbc:some:stuf//database"; 54 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 55 | 56 | JDBCFactory factory = new JDBCFactory(); 57 | 58 | Properties props = new Properties(); 59 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 60 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 61 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" ); 62 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" ); 63 | 64 | factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props ); 65 | } 66 | 67 | @Test(expected = IllegalArgumentException.class) 68 | public void testCreateTapEmptyTableName() 69 | { 70 | String protocol = "jdbc"; 71 | String identifier = "jdbc:some:stuf//database"; 72 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 73 | 74 | JDBCFactory factory = new JDBCFactory(); 75 | 76 | Properties props = new Properties(); 77 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 78 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 79 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" ); 80 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" ); 81 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "" ); 82 | 83 | factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props ); 84 | } 85 | 86 | 87 | @Test 88 | public void testCreateTapFullyWorking() 89 | { 90 | String protocol = "jdbc"; 91 | String identifier = "jdbc:some:stuf//database"; 92 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 93 | 94 | JDBCFactory factory = new JDBCFactory(); 95 | 96 | Properties props = new Properties(); 97 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 98 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 99 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" ); 100 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" ); 101 | 102 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" ); 103 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" ); 104 | 105 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" ); 106 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" ); 107 | 108 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props ); 109 | assertEquals( mockScheme, tap.getScheme() ); 110 | assertEquals( "myTable", tap.getTableName() ); 111 | assertEquals( SinkMode.REPLACE, tap.getSinkMode() ); 112 | TableDesc tdesc = tap.tableDesc; 113 | 114 | assertEquals( "myTable", tdesc.getTableName() ); 115 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() ); 116 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() ); 117 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() ); 118 | 119 | } 120 | 121 | @Test 122 | public void testCreateTapSinkModeOverwrite() 123 | { 124 | String protocol = "jdbc"; 125 | String identifier = "jdbc:some:stuf//database"; 126 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 127 | 128 | JDBCFactory factory = new JDBCFactory(); 129 | 130 | Properties props = new Properties(); 131 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 132 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 133 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "username" ); 134 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "password" ); 135 | 136 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" ); 137 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" ); 138 | 139 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" ); 140 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" ); 141 | props.setProperty( JDBCFactory.PROTOCOL_SINK_MODE, "KEEP" ); 142 | 143 | 144 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.REPLACE, props ); 145 | assertEquals( mockScheme, tap.getScheme() ); 146 | assertEquals( "myTable", tap.getTableName() ); 147 | assertEquals( SinkMode.KEEP, tap.getSinkMode() ); 148 | TableDesc tdesc = tap.tableDesc; 149 | 150 | assertEquals( "myTable", tdesc.getTableName() ); 151 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() ); 152 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() ); 153 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() ); 154 | 155 | } 156 | 157 | @Test 158 | public void testCreateTapFullyWorkingWithEmptyUserAndPass() 159 | { 160 | String protocol = "jdbc"; 161 | String identifier = "jdbc:some:stuf//database"; 162 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 163 | 164 | JDBCFactory factory = new JDBCFactory(); 165 | 166 | Properties props = new Properties(); 167 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 168 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 169 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "" ); 170 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "" ); 171 | 172 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" ); 173 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_NAMES, "id:name:lastname" ); 174 | 175 | props.setProperty( JDBCFactory.PROTOCOL_COLUMN_DEFS, "int:varchar(42):varchar(23)" ); 176 | props.setProperty( JDBCFactory.PROTOCOL_PRIMARY_KEYS, "id" ); 177 | 178 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.UPDATE, props ); 179 | assertEquals( mockScheme, tap.getScheme() ); 180 | assertEquals( "myTable", tap.getTableName() ); 181 | assertEquals( SinkMode.UPDATE, tap.getSinkMode() ); 182 | TableDesc tdesc = tap.tableDesc; 183 | 184 | assertEquals( "myTable", tdesc.getTableName() ); 185 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() ); 186 | assertArrayEquals( new String[] { "int", "varchar(42)", "varchar(23)" }, tdesc.getColumnDefs() ); 187 | assertArrayEquals( new String[] { "id" }, tdesc.getPrimaryKeys() ); 188 | 189 | } 190 | 191 | @Test 192 | public void testCreateTapWithMissingTableInformation() 193 | { 194 | String protocol = "jdbc"; 195 | String identifier = "jdbc:some:stuf//database"; 196 | JDBCScheme mockScheme = mock( JDBCScheme.class ); 197 | 198 | JDBCFactory factory = new JDBCFactory(); 199 | 200 | Properties props = new Properties(); 201 | props.setProperty( JDBCFactory.PROTOCOL_FIELD_SEPARATOR, ":" ); 202 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_DRIVER, "some.Driver" ); 203 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_USER, "" ); 204 | props.setProperty( JDBCFactory.PROTOCOL_JDBC_PASSWORD, "" ); 205 | 206 | props.setProperty( JDBCFactory.PROTOCOL_TABLE_NAME, "myTable" ); 207 | 208 | String[] columnNames = new String [] {"id", "name", "lastname"}; 209 | @SuppressWarnings("rawtypes") 210 | Class[] fieldTypes = new Class[] {int.class, String.class, String.class}; 211 | 212 | Fields fields = new Fields(columnNames, fieldTypes); 213 | when(mockScheme.getSinkFields()).thenReturn( fields ); 214 | 215 | JDBCTap tap = (JDBCTap) factory.createTap( protocol, mockScheme, identifier, SinkMode.UPDATE, props ); 216 | 217 | TableDesc tdesc = tap.tableDesc; 218 | assertEquals( "myTable", tdesc.getTableName() ); 219 | assertArrayEquals( new String[] { "id", "name", "lastname" }, tdesc.getColumnNames() ); 220 | assertArrayEquals( new String[] { "int not null", "varchar(256)", "varchar(256)" }, tdesc.getColumnDefs() ); 221 | 222 | } 223 | 224 | @Test 225 | public void testCreateScheme() 226 | { 227 | JDBCFactory factory = new JDBCFactory(); 228 | Fields fields = new Fields( "ONE", "TWO", "THREE" ); 229 | 230 | Properties schemeProperties = new Properties(); 231 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" ); 232 | 233 | Scheme scheme = factory.createScheme( "someFormat", fields, schemeProperties ); 234 | assertNotNull( scheme ); 235 | 236 | JDBCScheme jdbcScheme = (JDBCScheme) scheme; 237 | 238 | assertArrayEquals( jdbcScheme.getColumns(), new String[] { "one", "two", "three" } ); 239 | 240 | } 241 | 242 | 243 | @Test 244 | public void testCreateSchemeColumnsFromFields() 245 | { 246 | JDBCFactory factory = new JDBCFactory(); 247 | Fields fields = new Fields( "one", "two", "three" ); 248 | 249 | Properties schemeProperties = new Properties(); 250 | 251 | Scheme scheme = factory.createScheme( "someFormat", fields, schemeProperties ); 252 | assertNotNull( scheme ); 253 | 254 | JDBCScheme jdbcScheme = (JDBCScheme) scheme; 255 | 256 | assertArrayEquals( jdbcScheme.getColumns(), new String[] { "one", "two", "three" } ); 257 | 258 | } 259 | 260 | @Test(expected = IllegalArgumentException.class) 261 | public void testCreateSchemeWithSelectNoCount() 262 | { 263 | JDBCFactory factory = new JDBCFactory(); 264 | Fields fields = new Fields( "one", "two", "three" ); 265 | 266 | Properties schemeProperties = new Properties(); 267 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" ); 268 | schemeProperties.setProperty( JDBCFactory.FORMAT_SELECT_QUERY, "select one, two, three from table" ); 269 | 270 | factory.createScheme( "someFormat", fields, schemeProperties ); 271 | } 272 | 273 | @Test 274 | public void testCreateSchemeWithSelectAndCount() 275 | { 276 | JDBCFactory factory = new JDBCFactory(); 277 | Fields fields = new Fields( "one", "two", "three" ); 278 | 279 | Properties schemeProperties = new Properties(); 280 | schemeProperties.setProperty( JDBCFactory.FORMAT_COLUMNS, "one:two:three" ); 281 | schemeProperties.setProperty( JDBCFactory.FORMAT_SELECT_QUERY, "select one, two, three from table" ); 282 | schemeProperties.setProperty( JDBCFactory.FORMAT_COUNT_QUERY, "select count(*) from table" ); 283 | 284 | Scheme scheme = factory.createScheme( "someFormat", fields, schemeProperties ); 285 | assertNotNull( scheme ); 286 | } 287 | 288 | @Test 289 | public void testGetInputFormatClass() 290 | { 291 | assertEquals(DBInputFormat.class, new JDBCFactory().getInputFormatClass()); 292 | } 293 | 294 | } 295 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/JDBCSchemeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import static org.junit.Assert.*; 24 | import static org.mockito.Mockito.*; 25 | 26 | import org.apache.hadoop.mapred.JobConf; 27 | import org.junit.Test; 28 | 29 | import cascading.flow.FlowProcess; 30 | import cascading.jdbc.db.DBInputFormat; 31 | import cascading.jdbc.db.DBOutputFormat; 32 | import cascading.tuple.Fields; 33 | 34 | public class JDBCSchemeTest 35 | { 36 | 37 | @SuppressWarnings("unchecked") 38 | @Test 39 | public void testPresentSinkFields() 40 | { 41 | String[] columnNames = new String[]{ "id", "firstname", "lastname" }; 42 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, columnNames, null, null, -1, null, null, 43 | null ); 44 | 45 | @SuppressWarnings("rawtypes") 46 | Class[] fieldTypes = new Class[]{ int.class, String.class, String.class }; 47 | Fields fields = new Fields( columnNames, fieldTypes ); 48 | FlowProcess fp = mock( FlowProcess.class ); 49 | 50 | JDBCTap tap = mock( JDBCTap.class ); 51 | 52 | TableDesc desc = new TableDesc( "test_table" ); 53 | when( tap.getTableDesc() ).thenReturn( desc ); 54 | 55 | assertFalse( desc.hasRequiredTableInformation() ); 56 | 57 | scheme.presentSinkFields( fp, tap, fields ); 58 | 59 | assertTrue( desc.hasRequiredTableInformation() ); 60 | assertEquals( fields, scheme.getSinkFields() ); 61 | 62 | assertArrayEquals( columnNames, desc.getColumnNames() ); 63 | 64 | assertArrayEquals( new String[]{ "int not null", "varchar(256)", "varchar(256)" }, desc.getColumnDefs() ); 65 | 66 | } 67 | 68 | @SuppressWarnings("unchecked") 69 | @Test 70 | public void testPresentSinkFieldsWithNullColumns() 71 | { 72 | String[] columnNames = new String[]{ "id", "firstname", "lastname" }; 73 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, null, null, null, -1, null, null, null ); 74 | 75 | @SuppressWarnings("rawtypes") 76 | Class[] fieldTypes = new Class[]{ int.class, String.class, String.class }; 77 | Fields fields = new Fields( columnNames, fieldTypes ); 78 | FlowProcess fp = mock( FlowProcess.class ); 79 | 80 | JDBCTap tap = mock( JDBCTap.class ); 81 | 82 | TableDesc desc = new TableDesc( "test_table" ); 83 | when( tap.getTableDesc() ).thenReturn( desc ); 84 | 85 | scheme.presentSinkFields( fp, tap, fields ); 86 | 87 | assertTrue( desc.hasRequiredTableInformation() ); 88 | 89 | assertArrayEquals( columnNames, scheme.getColumns() ); 90 | 91 | } 92 | 93 | @SuppressWarnings("unchecked") 94 | @Test(expected = IllegalArgumentException.class) 95 | public void testPresentSinkFieldsWithFieldsMismatch() 96 | { 97 | String[] columnNames = new String[]{ "id", "firstname", "lastname" }; 98 | JDBCScheme scheme = new JDBCScheme( DBInputFormat.class, DBOutputFormat.class, Fields.UNKNOWN, columnNames, null, null, -1, null, null, 99 | null ); 100 | 101 | @SuppressWarnings("rawtypes") 102 | Class[] fieldTypes = new Class[]{ int.class, String.class }; 103 | Fields fields = new Fields( new String[]{ "id", "firstname" }, fieldTypes ); 104 | FlowProcess fp = mock( FlowProcess.class ); 105 | 106 | JDBCTap tap = mock( JDBCTap.class ); 107 | 108 | TableDesc desc = new TableDesc( "test_table" ); 109 | when( tap.getTableDesc() ).thenReturn( desc ); 110 | 111 | scheme.presentSinkFields( fp, tap, fields ); 112 | 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/TableDescTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.lingual.type.SQLDateCoercibleType; 24 | import cascading.tuple.Fields; 25 | import org.junit.Test; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | public class TableDescTest 30 | { 31 | 32 | @Test 33 | public void testHasRequiredTableInformation() 34 | { 35 | TableDesc desc = new TableDesc( "name" ); 36 | assertFalse( desc.hasRequiredTableInformation() ); 37 | 38 | desc = new TableDesc( "name", null, null, null ); 39 | assertFalse( desc.hasRequiredTableInformation() ); 40 | 41 | desc = new TableDesc( "name", new String[]{ "id" }, null, null ); 42 | assertFalse( desc.hasRequiredTableInformation() ); 43 | 44 | desc = new TableDesc( "name", new String[]{ "id" }, new String[]{ "int" }, new String[] {"foo"} ); 45 | assertTrue( desc.hasRequiredTableInformation() ); 46 | 47 | } 48 | 49 | @Test 50 | public void testCompleteFromFields() 51 | { 52 | TableDesc desc = new TableDesc( "name" ); 53 | assertFalse( desc.hasRequiredTableInformation() ); 54 | 55 | Fields fields = new Fields( "id", int.class ); 56 | desc.completeFromFields( fields ); 57 | 58 | assertTrue( desc.hasRequiredTableInformation() ); 59 | 60 | assertArrayEquals( new String[]{ "id" }, desc.getColumnNames() ); 61 | 62 | assertArrayEquals( new String[]{ "int not null" }, desc.getColumnDefs() ); 63 | } 64 | 65 | @Test 66 | public void testCompleteFromFieldsWithCoercibleType() 67 | { 68 | TableDesc desc = new TableDesc( "name" ); 69 | assertFalse( desc.hasRequiredTableInformation() ); 70 | 71 | Fields fields = new Fields( "creation_date", new SQLDateCoercibleType() ); 72 | desc.completeFromFields( fields ); 73 | 74 | assertTrue( desc.hasRequiredTableInformation() ); 75 | 76 | assertArrayEquals( new String[]{ "creation_date" }, desc.getColumnNames() ); 77 | 78 | assertArrayEquals( new String[]{ "date" }, desc.getColumnDefs() ); 79 | 80 | } 81 | 82 | @Test(expected = IllegalArgumentException.class) 83 | public void testCompleteFromFieldsMissingType() 84 | { 85 | TableDesc desc = new TableDesc( "name" ); 86 | assertFalse( desc.hasRequiredTableInformation() ); 87 | 88 | Fields fields = new Fields( "id" ); 89 | desc.completeFromFields( fields ); 90 | } 91 | 92 | @Test(expected = IllegalStateException.class) 93 | public void testCompleteFromFieldsWithUnknownFields() 94 | { 95 | TableDesc desc = new TableDesc( "name" ); 96 | assertFalse( desc.hasRequiredTableInformation() ); 97 | 98 | Fields fields = Fields.UNKNOWN; 99 | desc.completeFromFields( fields ); 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/java/cascading/jdbc/TupleRecordTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import static org.junit.Assert.*; 24 | import static org.mockito.Mockito.*; 25 | 26 | import java.sql.PreparedStatement; 27 | import java.sql.ResultSet; 28 | import java.sql.ResultSetMetaData; 29 | import java.sql.SQLException; 30 | 31 | import org.junit.Test; 32 | 33 | import cascading.tuple.Tuple; 34 | 35 | public class TupleRecordTest 36 | { 37 | 38 | @Test 39 | public void testTupleRecord() 40 | { 41 | Tuple tup = new Tuple(); 42 | TupleRecord tupleRecord = new TupleRecord(); 43 | 44 | tupleRecord.setTuple( tup ); 45 | assertSame( tup, tupleRecord.getTuple() ); 46 | 47 | } 48 | 49 | @Test 50 | public void testWrite() throws SQLException 51 | { 52 | Tuple t = new Tuple( "one", "two", "three" ); 53 | PreparedStatement stmt = mock( PreparedStatement.class ); 54 | TupleRecord tupleRecord = new TupleRecord( t ); 55 | tupleRecord.write( stmt ); 56 | verify( stmt ).setObject( 1, "one" ); 57 | verify( stmt ).setObject( 2, "two" ); 58 | verify( stmt ).setObject( 3, "three" ); 59 | verifyNoMoreInteractions( stmt ); 60 | } 61 | 62 | @Test 63 | public void testRead() throws SQLException 64 | { 65 | Tuple expectedTuple = new Tuple( "foo", "bar", "baz" ); 66 | 67 | ResultSet resultSet = mock( ResultSet.class ); 68 | ResultSetMetaData rsm = mock( ResultSetMetaData.class ); 69 | when( rsm.getColumnCount() ).thenReturn( 3 ); 70 | when( resultSet.getMetaData() ).thenReturn( rsm ); 71 | when( resultSet.getObject( 1 ) ).thenReturn( "foo" ); 72 | when( resultSet.getObject( 2 ) ).thenReturn( "bar" ); 73 | when( resultSet.getObject( 3 ) ).thenReturn( "baz" ); 74 | 75 | TupleRecord tupleRecord = new TupleRecord(); 76 | 77 | tupleRecord.readFields( resultSet ); 78 | 79 | Tuple result = tupleRecord.getTuple(); 80 | 81 | assertEquals( expectedTuple, result ); 82 | 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/resources/data/small.txt: -------------------------------------------------------------------------------- 1 | 1 a A 2 | 1 b B 3 | 1 c C 4 | 2 b B 5 | 2 c C 6 | 2 d D 7 | 3 c C 8 | 4 b B 9 | 4 c C 10 | 4 d D 11 | 5 a A 12 | 5 b B 13 | 5 e E 14 | -------------------------------------------------------------------------------- /cascading-jdbc-core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # log4j configuration used during build and unit tests 22 | 23 | log4j.rootLogger=info,stdout 24 | log4j.threshhold=ALL 25 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 27 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 28 | 29 | log4j.logger.cascading=INFO -------------------------------------------------------------------------------- /cascading-jdbc-derby/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | ext.derbyVersion = "10.11.1.1" 24 | 25 | dependencies{ 26 | compile project( ':cascading-jdbc-core' ) 27 | 28 | compile( group: 'org.apache.derby', name: 'derby', version: derbyVersion ) 29 | compile( group: 'org.apache.derby', name: 'derbyclient', version: derbyVersion ) 30 | compile( group: 'org.apache.derby', name: 'derbynet', version: derbyVersion ) 31 | 32 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 33 | } 34 | 35 | task configInfo() << { 36 | ( configurations.compile.minus([configurations.provided])).each{ entry -> 37 | println entry 38 | } 39 | } 40 | 41 | 42 | task myJar( type: Jar ) { 43 | classifier = 'provider' 44 | dependsOn configurations.runtime 45 | from { 46 | ( configurations.compile.minus([configurations.provided])).collect{ 47 | it.isDirectory() ? it : zipTree(it) 48 | } 49 | } { 50 | exclude "META-INF/*.SF" 51 | exclude "META-INF/*.DSA" 52 | exclude "META-INF/*.RSA" 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /cascading-jdbc-derby/src/main/java/cascading/jdbc/DerbyFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.db.DBInputFormat; 24 | import cascading.jdbc.db.DerbyDBInputFormat; 25 | 26 | /** 27 | * 28 | */ 29 | public class DerbyFactory extends JDBCFactory 30 | { 31 | @Override 32 | protected Class getInputFormatClass() 33 | { 34 | return DerbyDBInputFormat.class; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cascading-jdbc-derby/src/main/java/cascading/jdbc/db/DerbyDBInputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import java.io.IOException; 24 | import java.sql.SQLException; 25 | 26 | import org.apache.hadoop.io.LongWritable; 27 | import org.apache.hadoop.mapred.JobConf; 28 | import org.apache.hadoop.mapred.RecordReader; 29 | 30 | /** 31 | * 32 | */ 33 | public class DerbyDBInputFormat extends DBInputFormat 34 | { 35 | 36 | @Override 37 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 38 | { 39 | return new DerbyDBRecordReader( split, inputClass, job ); 40 | } 41 | 42 | class DerbyDBRecordReader extends DBInputFormat.DBRecordReader 43 | { 44 | protected DerbyDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 45 | { 46 | super( split, inputClass, job ); 47 | } 48 | 49 | /** Returns the query for selecting the records from an Oracle DB. */ 50 | protected String getSelectQuery() 51 | { 52 | StringBuilder query = new StringBuilder(); 53 | 54 | // derby-specific codepath 55 | if( dbConf.getInputQuery() == null ) 56 | { 57 | query.append( "SELECT " ); 58 | 59 | for( int i = 0; i < fieldNames.length; i++ ) 60 | { 61 | query.append( fieldNames[ i ] ); 62 | if( i != fieldNames.length - 1 ) 63 | { 64 | query.append( ", " ); 65 | } 66 | } 67 | 68 | query.append( " FROM " ).append( tableName ); 69 | if( conditions != null && conditions.length() > 0 ) 70 | query.append( " WHERE " ).append( conditions ); 71 | 72 | String orderBy = dbConf.getInputOrderBy(); 73 | if( orderBy != null && orderBy.length() > 0 ) 74 | query.append( " ORDER BY " ).append( orderBy ); 75 | } 76 | else 77 | { 78 | //PREBUILT QUERY 79 | query.append( dbConf.getInputQuery() ); 80 | } 81 | try 82 | { 83 | if( split.getLength() > 0 && split.getStart() >= 0 ) 84 | { 85 | query.append( " OFFSET " ).append( split.getStart() ) 86 | .append( " ROWS FETCH NEXT " ).append( split.getEnd() - split.getStart() ).append( " ROWS ONLY" ); 87 | } 88 | } 89 | catch( IOException ex ) 90 | { 91 | // ignore, will not throw. 92 | } 93 | 94 | return query.toString(); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /cascading-jdbc-derby/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=derby 23 | cascading.bind.provider.derby.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.derby.factory.classname=cascading.jdbc.DerbyFactory 27 | 28 | # the protocol is jdbc 29 | cascading.bind.provider.derby.protocol.names=jdbc 30 | cascading.bind.provider.derby.protocol.jdbc.schemes=derby 31 | cascading.bind.provider.derby.protocol.jdbc.jdbcdriver=org.apache.derby.jdbc.ClientDriver 32 | cascading.bind.provider.derby.protocol.jdbc.tabledescseparator=: 33 | cascading.bind.provider.derby.protocol.jdbc.jdbcuser= 34 | cascading.bind.provider.derby.protocol.jdbc.jdbcpassword= 35 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.tablename= 36 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.columnnames= 37 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.columndefs= 38 | cascading.bind.provider.derby.protocol.jdbc.tabledesc.primarykeys= 39 | cascading.bind.provider.derby.protocol.jdbc.sinkmode= 40 | 41 | # the format is derby 42 | cascading.bind.provider.derby.format.names=derby 43 | cascading.bind.provider.derby.format.derby.protocols=jdbc 44 | cascading.bind.provider.derby.format.derby.separator=: 45 | cascading.bind.provider.derby.format.derby.columnnames= 46 | cascading.bind.provider.derby.format.derby.orderBy= 47 | cascading.bind.provider.derby.format.derby.conditions= 48 | cascading.bind.provider.derby.format.derby.limit= 49 | cascading.bind.provider.derby.format.derby.updateBy= 50 | cascading.bind.provider.derby.format.derby.tableAlias= 51 | cascading.bind.provider.derby.format.derby.selectquery= 52 | cascading.bind.provider.derby.format.derby.countquery= 53 | -------------------------------------------------------------------------------- /cascading-jdbc-derby/src/test/java/cascading/jdbc/DerbyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.PrintWriter; 24 | import java.net.InetAddress; 25 | 26 | import cascading.jdbc.db.DerbyDBInputFormat; 27 | import org.apache.derby.drda.NetworkServerControl; 28 | import org.junit.After; 29 | import org.junit.Before; 30 | 31 | /** 32 | * This class runs the tests against an in network instance of apache derby: 33 | * http://db.apache.org/derby/ 34 | * */ 35 | public class DerbyTest extends JDBCTestingBase 36 | { 37 | 38 | private final int PORT = 9006; 39 | private NetworkServerControl serverControl; 40 | 41 | 42 | 43 | @Before 44 | public void setUp() throws Exception 45 | { 46 | System.setProperty( "derby.storage.rowLocking", "true" ); 47 | System.setProperty( "derby.locks.monitor", "true" ); 48 | System.setProperty( "derby.locks.deadlockTrace", "true" ); 49 | System.setProperty( "derby.system.home", "build/derby" ); 50 | 51 | serverControl = new NetworkServerControl( InetAddress.getByName( "localhost" ), PORT ); 52 | serverControl.start( new PrintWriter(System.out,true ) ); 53 | 54 | setDriverName( "org.apache.derby.jdbc.ClientDriver" ); 55 | setJdbcurl( String.format("jdbc:derby://localhost:%s/testing;create=true", PORT) ); 56 | setInputFormatClass( DerbyDBInputFormat.class ); 57 | setFactory( new DerbyFactory() ); 58 | 59 | } 60 | 61 | @After 62 | public void tearDown() throws Exception 63 | { 64 | serverControl.shutdown(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /cascading-jdbc-h2/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | dependencies{ 24 | compile project( ':cascading-jdbc-core' ) 25 | 26 | compile( group: 'com.h2database', name:'h2', version: '1.3.173' ) 27 | 28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 29 | } 30 | 31 | -------------------------------------------------------------------------------- /cascading-jdbc-h2/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=h2 23 | cascading.bind.provider.h2.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.h2.factory.classname=cascading.provider.JDBCFactory 27 | 28 | # the protocol is jdbc 29 | cascading.bind.provider.h2.protocol.names=jdbc 30 | cascading.bind.provider.h2.protocol.jdbc.schemes=h2 31 | cascading.bind.provider.h2.protocol.jdbc.jdbcdriver=org.h2.Driver 32 | cascading.bind.provider.h2.protocol.jdbc.tabledescseparator=: 33 | cascading.bind.provider.h2.protocol.jdbc.jdbcuser= 34 | cascading.bind.provider.h2.protocol.jdbc.jdbcpassword= 35 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.tablename= 36 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.columnnames= 37 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.columndefs= 38 | cascading.bind.provider.h2.protocol.jdbc.tabledesc.primarykeys= 39 | cascading.bind.provider.h2.protocol.jdbc.sinkmode= 40 | 41 | # the format is h2 42 | cascading.bind.provider.h2.format.names=h2 43 | cascading.bind.provider.h2.format.h2.protocols=jdbc 44 | cascading.bind.provider.h2.format.h2.separator=: 45 | cascading.bind.provider.h2.format.h2.columnnames= 46 | cascading.bind.provider.h2.format.h2.orderBy= 47 | cascading.bind.provider.h2.format.h2.conditions= 48 | cascading.bind.provider.h2.format.h2.limit= 49 | cascading.bind.provider.h2.format.h2.updateBy= 50 | cascading.bind.provider.h2.format.h2.tableAlias= 51 | cascading.bind.provider.h2.format.h2.selectquery= 52 | cascading.bind.provider.h2.format.h2.countquery= 53 | 54 | -------------------------------------------------------------------------------- /cascading-jdbc-h2/src/test/java/cascading/jdbc/H2Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import org.junit.Before; 24 | 25 | /** 26 | * Runs the tests against an instance of h2: 27 | * http://www.h2database.com/html/main.html 28 | * */ 29 | public class H2Test extends JDBCTestingBase 30 | { 31 | 32 | @Before 33 | public void setUp() 34 | { 35 | setDriverName( "org.h2.Driver" ); 36 | setJdbcurl( "jdbc:h2:mem:testing;DB_CLOSE_DELAY=-1;MVCC=true" ); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | dependencies{ 24 | compile project( ':cascading-jdbc-core' ) 25 | 26 | compile( group: 'mysql', name: 'mysql-connector-java', version: '5.1.34' ) 27 | 28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 29 | 30 | testRuntime ( group: 'mysql', name: 'mysql-connector-java', version: '5.1.34' ) 31 | } 32 | 33 | test{ 34 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.mysql" ) ) 35 | } 36 | 37 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/java/cascading/jdbc/MySqlFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.util.Properties; 24 | 25 | import cascading.jdbc.db.DBOutputFormat; 26 | import cascading.jdbc.db.DBInputFormat; 27 | import cascading.jdbc.db.MySqlDBOutputFormat; 28 | import cascading.jdbc.db.MySqlDBInputFormat; 29 | import cascading.scheme.Scheme; 30 | import cascading.tuple.Fields; 31 | 32 | /** 33 | * Subclass of JDBCFactory with mysql specific behaviour. 34 | */ 35 | public class MySqlFactory extends JDBCFactory 36 | { 37 | public static final String PROTOCOL_REPLACE_ON_INSERT = "replaceoninsert"; 38 | 39 | @Override 40 | protected Class getOutputFormClass() 41 | { 42 | return MySqlDBOutputFormat.class; 43 | } 44 | 45 | @Override 46 | protected Class getInputFormatClass() 47 | { 48 | return MySqlDBInputFormat.class; 49 | } 50 | 51 | protected Scheme createUpdatableScheme( Fields fields, long limit, String[] columnNames, Boolean tableAlias, String conditions, 52 | String[] updateBy, Fields updateByFields, String[] orderBy, Properties properties ) 53 | { 54 | boolean replaceOnInsert = false; 55 | String replaceOnInsertProperty = properties.getProperty( PROTOCOL_REPLACE_ON_INSERT ); 56 | if( replaceOnInsertProperty != null && !replaceOnInsertProperty.isEmpty() ) 57 | replaceOnInsert = Boolean.parseBoolean( replaceOnInsertProperty ); 58 | 59 | return new MySqlScheme( getInputFormatClass(), getOutputFormClass(), fields, columnNames, orderBy, conditions, limit, updateByFields, 60 | updateBy, tableAlias, replaceOnInsert ); 61 | } 62 | 63 | protected Scheme createScheme( Fields fields, String selectQuery, String countQuery, long limit, String[] columnNames, Boolean tableAlias ) 64 | { 65 | return new MySqlScheme( getInputFormatClass(), fields, columnNames, selectQuery, countQuery, limit, tableAlias ); 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/java/cascading/jdbc/MySqlScheme.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.db.DBOutputFormat; 24 | import cascading.tuple.Fields; 25 | import org.apache.hadoop.conf.Configuration; 26 | import org.apache.hadoop.mapred.JobConf; 27 | import org.apache.hadoop.mapred.OutputCollector; 28 | import org.apache.hadoop.mapred.RecordReader; 29 | 30 | import cascading.flow.FlowProcess; 31 | import cascading.jdbc.db.DBInputFormat; 32 | import cascading.jdbc.db.MySqlDBOutputFormat; 33 | import cascading.jdbc.db.MySqlDBConfiguration; 34 | import cascading.tap.Tap; 35 | 36 | public class MySqlScheme extends JDBCScheme 37 | { 38 | 39 | /** 40 | * If true, will use mysql's 'ON DUPLICATE KEY UPDATE' to update existing rows with the same key 41 | * with the new data. See http://dev.mysql.com/doc/refman/5.0/en/insert-on-duplicate.html. 42 | */ 43 | private boolean replaceOnInsert = false; 44 | 45 | /** 46 | * Constructor MySqlScheme creates a new MySqlScheme instance. 47 | * 48 | * Specify replaceOnInsert if you want to change the default insert behavior. 49 | * 50 | * @param inputFormatClass of type Class 51 | * @param columns of type String[] 52 | * @param orderBy of type String[] 53 | * @param conditions of type String 54 | * @param updateBy of type String[] 55 | * @param replaceOnInsert of type boolean 56 | */ 57 | public MySqlScheme( Class inputFormatClass, String[] columns, String[] orderBy, 58 | String conditions, String[] updateBy, boolean replaceOnInsert ) 59 | { 60 | super( inputFormatClass, MySqlDBOutputFormat.class, columns, orderBy, conditions, -1, updateBy ); 61 | this.replaceOnInsert = replaceOnInsert; 62 | } 63 | 64 | 65 | /** 66 | * Constructor MysqlScheme creates a new MysqlScheme instance. 67 | * 68 | * Specify replaceOnInsert if you want to change the default insert behavior. 69 | * 70 | * @param inputFormatClass of type Class 71 | * @param outputFormatClass of type Class 72 | * @param columnFields of type Fields 73 | * @param columnNames of type String[] 74 | * @param orderBy of type String[] 75 | * @param conditions of type String 76 | * @param limit of type long 77 | * @param updateByFields of type Fields 78 | * @param updateBy of type String[] 79 | * @param tableAlias of type boolean 80 | */ 81 | public MySqlScheme( Class inputFormatClass, Class outputFormatClass, 82 | Fields columnFields, String[] columnNames, String[] orderBy, String conditions, 83 | long limit, Fields updateByFields, String[] updateBy, boolean tableAlias, boolean replaceOnInsert ) 84 | { 85 | super( inputFormatClass, outputFormatClass, columnFields, columnNames, orderBy, conditions, limit, updateByFields, updateBy, tableAlias ); 86 | this.replaceOnInsert = replaceOnInsert; 87 | } 88 | 89 | public MySqlScheme( Class inputFormatClass, Fields fields, String[] columnNames, String selectQuery, 90 | String countQuery, long limit, boolean tableAlias ) 91 | { 92 | super( inputFormatClass, fields, columnNames, selectQuery, countQuery, limit, tableAlias ); 93 | } 94 | 95 | @Override 96 | public void sinkConfInit( FlowProcess process, Tap tap, Configuration configuration ) 97 | { 98 | MySqlDBConfiguration conf = new MySqlDBConfiguration( configuration ); 99 | conf.setReplaceOnInsert( replaceOnInsert ); 100 | 101 | super.sinkConfInit( process, tap, configuration ); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import org.apache.hadoop.conf.Configuration; 24 | 25 | public class MySqlDBConfiguration 26 | { 27 | 28 | /** Boolean to use ON DUPLICATE KEY UPDATE for INSERTs when outputting tuples to MySQL. */ 29 | public static final String REPLACE_ON_INSERT = "mapred.jdbc.output.replace.on.insert"; 30 | 31 | private Configuration configuration; 32 | 33 | public MySqlDBConfiguration( Configuration configuration ) 34 | { 35 | this.configuration = configuration; 36 | } 37 | 38 | public boolean getReplaceOnInsert() 39 | { 40 | return configuration.getBoolean( MySqlDBConfiguration.REPLACE_ON_INSERT, false ); 41 | } 42 | 43 | public void setReplaceOnInsert( boolean replaceOnInsert ) 44 | { 45 | configuration.setBoolean( MySqlDBConfiguration.REPLACE_ON_INSERT, replaceOnInsert ); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBInputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import java.sql.ResultSet; 24 | import java.sql.SQLException; 25 | import java.sql.Statement; 26 | 27 | import org.apache.hadoop.mapred.JobConf; 28 | import org.apache.hadoop.mapred.InputFormat; 29 | import org.apache.hadoop.mapred.RecordReader; 30 | import org.apache.hadoop.io.LongWritable; 31 | import org.apache.hadoop.io.Writable; 32 | import org.apache.hadoop.mapred.*; 33 | 34 | import java.io.IOException; 35 | 36 | public class MySqlDBInputFormat extends DBInputFormat 37 | { 38 | 39 | protected class MySqlDBRecordReader extends DBRecordReader 40 | { 41 | protected MySqlDBRecordReader( DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 42 | { 43 | super( split, inputClass, job ); 44 | } 45 | 46 | @Override 47 | protected Statement createStatement() throws SQLException 48 | { 49 | Statement statement = connection.createStatement( ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY ); 50 | statement.setFetchSize( Integer.MIN_VALUE ); 51 | return statement; 52 | } 53 | } 54 | 55 | @Override 56 | protected RecordReader getRecordReaderInternal( DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, 57 | IOException 58 | { 59 | return new MySqlDBRecordReader( split, inputClass, job ); 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/java/cascading/jdbc/db/MySqlDBOutputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import java.io.IOException; 24 | import java.util.Collections; 25 | 26 | import org.apache.hadoop.fs.FileSystem; 27 | import org.apache.hadoop.mapred.JobConf; 28 | import org.apache.hadoop.mapred.OutputFormat; 29 | import org.apache.hadoop.mapred.RecordWriter; 30 | import org.apache.hadoop.util.Progressable; 31 | 32 | public class MySqlDBOutputFormat extends DBOutputFormat 33 | { 34 | 35 | private boolean replaceOnInsert = false; 36 | 37 | /** {@inheritDoc} */ 38 | public RecordWriter getRecordWriter( FileSystem filesystem, JobConf job, String name, Progressable progress ) throws IOException 39 | { 40 | MySqlDBConfiguration dbConf = new MySqlDBConfiguration( job ); 41 | replaceOnInsert = dbConf.getReplaceOnInsert(); 42 | 43 | return super.getRecordWriter( filesystem, job, name, progress ); 44 | } 45 | 46 | /** {@inheritDoc} */ 47 | @Override 48 | protected String constructInsertQuery( String table, String[] fieldNames ) 49 | { 50 | StringBuilder query = new StringBuilder( super.constructInsertQuery( table, fieldNames ) ); 51 | if( replaceOnInsert ) 52 | { 53 | query.append( " ON DUPLICATE KEY UPDATE " ); 54 | for( int i = 0; i < fieldNames.length; i++ ) 55 | { 56 | query.append( String.format( "%s=VALUES(%s)", fieldNames[i], fieldNames[i] ) ); 57 | if( i != fieldNames.length - 1 ) 58 | { 59 | query.append( "," ); } 60 | } 61 | } 62 | return query.toString(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=mysql 23 | cascading.bind.provider.mysql.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.mysql.factory.classname=cascading.jdbc.MySqlFactory 27 | 28 | # protocol is jdbc 29 | cascading.bind.provider.mysql.protocol.names=jdbc 30 | cascading.bind.provider.mysql.protocol.jdbc.schemes=mysql 31 | cascading.bind.provider.mysql.protocol.jdbc.tableExistsQuery=SHOW TABLES LIKE '%s' 32 | cascading.bind.provider.mysql.protocol.jdbc.jdbcdriver=com.mysql.jdbc.Driver 33 | cascading.bind.provider.mysql.protocol.jdbc.tabledescseparator=: 34 | cascading.bind.provider.mysql.protocol.jdbc.jdbcuser= 35 | cascading.bind.provider.mysql.protocol.jdbc.jdbcpassword= 36 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.tablename= 37 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.columnnames= 38 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.columndefs= 39 | cascading.bind.provider.mysql.protocol.jdbc.tabledesc.primarykeys= 40 | cascading.bind.provider.mysql.protocol.jdbc.sinkmode= 41 | 42 | # format is mysql 43 | cascading.bind.provider.mysql.format.names=mysql 44 | cascading.bind.provider.mysql.format.mysql.protocols=jdbc 45 | cascading.bind.provider.mysql.format.mysql.separator=: 46 | cascading.bind.provider.mysql.format.mysql.columnnames= 47 | cascading.bind.provider.mysql.format.mysql.orderBy= 48 | cascading.bind.provider.mysql.format.mysql.conditions= 49 | cascading.bind.provider.mysql.format.mysql.limit= 50 | cascading.bind.provider.mysql.format.mysql.updateBy= 51 | cascading.bind.provider.mysql.format.mysql.tableAlias= 52 | cascading.bind.provider.mysql.format.mysql.selectquery= 53 | cascading.bind.provider.mysql.format.mysql.countquery= 54 | cascading.bind.provider.mysql.format.mysql.replaceoninsert=false 55 | 56 | -------------------------------------------------------------------------------- /cascading-jdbc-mysql/src/test/java/cascading/jdbc/MysqlTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.db.MySqlDBInputFormat; 24 | import org.junit.Before; 25 | 26 | /** 27 | * Runs the tests against an instance of mysql 28 | * */ 29 | public class MysqlTest extends JDBCTestingBase 30 | { 31 | 32 | @Before 33 | public void setUp() 34 | { 35 | setDriverName( "com.mysql.jdbc.Driver" ); 36 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) ); 37 | setFactory( new MySqlFactory() ); 38 | setInputFormatClass( MySqlDBInputFormat.class ); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | dependencies{ 24 | compile project( ':cascading-jdbc-core' ) 25 | 26 | compile( group: 'com.oracle', name: 'ojdbc6', version: '11.2.0.4' ) 27 | 28 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 29 | } 30 | 31 | test{ 32 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.oracle" ) ) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/src/main/java/cascading/jdbc/OracleJDBCFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import cascading.jdbc.db.DBInputFormat; 23 | import cascading.jdbc.db.OracleDBInputFormat; 24 | 25 | /** 26 | * Oracle specific subclass of {@link JDBCFactory}. 27 | * */ 28 | public class OracleJDBCFactory extends JDBCFactory 29 | { 30 | @Override 31 | protected Class getInputFormatClass() 32 | { 33 | return OracleDBInputFormat.class; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/src/main/java/cascading/jdbc/db/OracleDBInputFormat.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | /* 19 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 20 | * 21 | * Project and contact information: http://www.cascading.org/ 22 | * 23 | * This file is part of the Cascading project. 24 | * 25 | * Licensed under the Apache License, Version 2.0 (the "License"); 26 | * you may not use this file except in compliance with the License. 27 | * You may obtain a copy of the License at 28 | * 29 | * http://www.apache.org/licenses/LICENSE-2.0 30 | * 31 | * Unless required by applicable law or agreed to in writing, software 32 | * distributed under the License is distributed on an "AS IS" BASIS, 33 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 34 | * See the License for the specific language governing permissions and 35 | * limitations under the License. 36 | */ 37 | 38 | package cascading.jdbc.db; 39 | 40 | import java.io.IOException; 41 | import java.sql.SQLException; 42 | 43 | import org.apache.hadoop.io.LongWritable; 44 | import org.apache.hadoop.mapred.JobConf; 45 | import org.apache.hadoop.mapred.RecordReader; 46 | 47 | @SuppressWarnings("rawtypes") 48 | public class OracleDBInputFormat extends DBInputFormat 49 | { 50 | @Override 51 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 52 | { 53 | return new OracleDBRecordReader( split, inputClass, job ); 54 | } 55 | 56 | class OracleDBRecordReader extends DBInputFormat.DBRecordReader 57 | { 58 | protected OracleDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 59 | { 60 | super( split, inputClass, job ); 61 | } 62 | 63 | /** Returns the query for selecting the records from an Oracle DB. */ 64 | protected String getSelectQuery() 65 | { 66 | StringBuilder query = new StringBuilder(); 67 | 68 | // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET. 69 | if( dbConf.getInputQuery() == null ) 70 | { 71 | query.append( "SELECT " ); 72 | 73 | for( int i = 0; i < fieldNames.length; i++ ) 74 | { 75 | query.append( fieldNames[ i ] ); 76 | if( i != fieldNames.length - 1 ) 77 | { 78 | query.append( ", " ); 79 | } 80 | } 81 | 82 | query.append( " FROM " ).append( tableName ); 83 | if( conditions != null && conditions.length() > 0 ) 84 | query.append( " WHERE " ).append( conditions ); 85 | 86 | String orderBy = dbConf.getInputOrderBy(); 87 | if( orderBy != null && orderBy.length() > 0 ) 88 | query.append( " ORDER BY " ).append( orderBy ); 89 | 90 | } 91 | else 92 | { 93 | //PREBUILT QUERY 94 | query.append( dbConf.getInputQuery() ); 95 | } 96 | 97 | try 98 | { 99 | 100 | if( split.getLength() > 0 && split.getStart() >= 0 ) 101 | { 102 | String querystring = query.toString(); 103 | 104 | query = new StringBuilder(); 105 | query.append( "SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( " ); 106 | query.append( querystring ); 107 | query.append( " ) a WHERE rownum <= " ).append( split.getStart() ); 108 | query.append( " + " ).append( split.getLength() ); 109 | query.append( " ) WHERE dbif_rno >= " ).append( split.getStart() + 1 ); 110 | } 111 | } 112 | catch( IOException ex ) 113 | { 114 | // ignore, will not throw. 115 | } 116 | 117 | return query.toString(); 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=oracle 23 | cascading.bind.provider.oracle.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.oracle.factory.classname=cascading.jdbc.OracleJDBCFactory 27 | 28 | # protocol is jdbc 29 | cascading.bind.provider.oracle.protocol.names=jdbc 30 | cascading.bind.provider.oracle.protocol.jdbc.schemes=oracle 31 | cascading.bind.provider.oracle.protocol.jdbc.jdbcdriver=oracle.jdbc.OracleDriver 32 | cascading.bind.provider.oracle.protocol.jdbc.tabledescseparator=: 33 | cascading.bind.provider.oracle.protocol.jdbc.jdbcuser= 34 | cascading.bind.provider.oracle.protocol.jdbc.jdbcpassword= 35 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.tablename= 36 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.columnnames= 37 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.columndefs= 38 | cascading.bind.provider.oracle.protocol.jdbc.tabledesc.primarykeys= 39 | cascading.bind.provider.oracle.protocol.jdbc.sinkmode= 40 | 41 | # format is oracle 42 | cascading.bind.provider.oracle.format.names=oracle 43 | cascading.bind.provider.oracle.format.oracle.protocols=jdbc 44 | cascading.bind.provider.oracle.format.oracle.separator=: 45 | cascading.bind.provider.oracle.format.oracle.columnnames= 46 | cascading.bind.provider.oracle.format.oracle.orderBy= 47 | cascading.bind.provider.oracle.format.oracle.conditions= 48 | cascading.bind.provider.oracle.format.oracle.limit= 49 | cascading.bind.provider.oracle.format.oracle.updateBy= 50 | cascading.bind.provider.oracle.format.oracle.tableAlias= 51 | cascading.bind.provider.oracle.format.oracle.selectquery= 52 | cascading.bind.provider.oracle.format.oracle.countquery= 53 | 54 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/src/test/java/cascading/jdbc/OracleJDBCFactoryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import static org.junit.Assert.*; 23 | 24 | import org.junit.Test; 25 | 26 | import cascading.jdbc.db.OracleDBInputFormat; 27 | 28 | public class OracleJDBCFactoryTest 29 | { 30 | 31 | @Test 32 | public void testGetInputFormatClass() 33 | { 34 | assertEquals(OracleDBInputFormat.class, new OracleJDBCFactory().getInputFormatClass()); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /cascading-jdbc-oracle/src/test/java/cascading/jdbc/OracleTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | package cascading.jdbc; 21 | 22 | import org.junit.Before; 23 | 24 | import cascading.jdbc.db.OracleDBInputFormat; 25 | 26 | /** 27 | * Tests against an oracle database. 28 | * */ 29 | public class OracleTest extends JDBCTestingBase 30 | { 31 | 32 | @Before 33 | public void setUp() 34 | { 35 | setDriverName( "oracle.jdbc.OracleDriver" ); 36 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) ); 37 | setInputFormatClass( OracleDBInputFormat.class ); 38 | setFactory( new OracleJDBCFactory() ); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /cascading-jdbc-postgresql/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | dependencies{ 24 | compile project( ':cascading-jdbc-core' ) 25 | 26 | compile( group: 'postgresql', name: 'postgresql', version: '9.1-901-1.jdbc4' ) 27 | 28 | testCompile ( group: 'postgresql', name: 'postgresql', version: '9.1-901-1.jdbc4' ) 29 | 30 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 31 | } 32 | 33 | test{ 34 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.postgresql" ) ) 35 | } 36 | 37 | -------------------------------------------------------------------------------- /cascading-jdbc-postgresql/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=postgresql 23 | cascading.bind.provider.postgresql.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.postgresql.factory.classname=cascading.jdbc.JDBCFactory 27 | 28 | # protocol is jdbc 29 | cascading.bind.provider.postgresql.protocol.names=jdbc 30 | cascading.bind.provider.postgresql.protocol.jdbc.schemes=postgresql 31 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcdriver=org.postgresql.Driver 32 | cascading.bind.provider.postgresql.protocol.jdbc.tabledescseparator=: 33 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcuser= 34 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcpassword= 35 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.tablename= 36 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columnnames= 37 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columndefs= 38 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.primarykeys= 39 | cascading.bind.provider.postgresql.protocol.jdbc.sinkmode= 40 | 41 | # format is postgresql 42 | cascading.bind.provider.postgresql.format.names=postgresql 43 | cascading.bind.provider.postgresql.format.postgres.protocols=jdbc 44 | cascading.bind.provider.postgresql.format.postgres.separator=: 45 | cascading.bind.provider.postgresql.format.postgres.columnnames= 46 | cascading.bind.provider.postgresql.format.postgres.orderBy= 47 | cascading.bind.provider.postgresql.format.postgres.conditions= 48 | cascading.bind.provider.postgresql.format.postgres.limit= 49 | cascading.bind.provider.postgresql.format.postgres.updateBy= 50 | cascading.bind.provider.postgresql.format.postgres.tableAlias= 51 | cascading.bind.provider.postgresql.format.postgres.selectquery= 52 | cascading.bind.provider.postgresql.format.postgres.countquery= 53 | 54 | -------------------------------------------------------------------------------- /cascading-jdbc-postgresql/src/test/java/cascading/jdbc/PostgresTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import org.junit.Before; 24 | 25 | /** 26 | * Runs the tests against postgres. 27 | * */ 28 | public class PostgresTest extends JDBCTestingBase 29 | { 30 | 31 | @Before 32 | public void setUp() 33 | { 34 | setDriverName( "org.postgresql.Driver" ); 35 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) ); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | ext.redshiftPostgresVersion = '8.4-702.jdbc4' 24 | 25 | dependencies { 26 | 27 | compile project( ':cascading-jdbc-core' ) 28 | compile group: 'postgresql', name: 'postgresql', version: redshiftPostgresVersion 29 | 30 | testCompile ( group: 'postgresql', name: 'postgresql', version: redshiftPostgresVersion ) 31 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 32 | } 33 | 34 | configurations { 35 | sampleCode { 36 | extendsFrom compile 37 | } 38 | } 39 | 40 | test{ 41 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.redshift" ) ) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/AWSCredentials.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.Serializable; 24 | 25 | /** Holder for the AWS credentials. {@link Serializable} is required for EMR use. */ 26 | public class AWSCredentials implements Serializable 27 | { 28 | private String awsAccessKey; 29 | private String awsSecretKey; 30 | 31 | public final static AWSCredentials RUNTIME_DETERMINED = new AWSCredentials( AWSCredentials.class.getName(), AWSCredentials.class.getName() ); 32 | 33 | public AWSCredentials( String awsAccessKey, String awsSecretKey ) 34 | { 35 | this.awsAccessKey = awsAccessKey; 36 | this.awsSecretKey = awsSecretKey; 37 | } 38 | 39 | public String getAwsAccessKey() 40 | { 41 | return awsAccessKey; 42 | } 43 | 44 | public String getAwsSecretKey() 45 | { 46 | return awsSecretKey; 47 | } 48 | 49 | public boolean isBlank() { 50 | return awsAccessKey == null && awsSecretKey == null; 51 | } 52 | 53 | @Override 54 | public boolean equals( Object object ) 55 | { 56 | if( this == object ) 57 | return true; 58 | 59 | if( !( object instanceof AWSCredentials ) ) 60 | return false; 61 | 62 | AWSCredentials that = (AWSCredentials) object; 63 | 64 | if( awsAccessKey != null ? !awsAccessKey.equals( that.awsAccessKey ) : that.awsAccessKey != null ) 65 | return false; 66 | if( awsSecretKey != null ? !awsSecretKey.equals( that.awsSecretKey ) : that.awsSecretKey != null ) 67 | return false; 68 | 69 | return true; 70 | } 71 | 72 | @Override 73 | public int hashCode() 74 | { 75 | int result = awsAccessKey != null ? awsAccessKey.hashCode() : 0; 76 | result = 31 * result + ( awsSecretKey != null ? awsSecretKey.hashCode() : 0 ); 77 | return result; 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/InvalidCodepointForRedshiftException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | /** Indicates that a line had a codepoint that */ 24 | 25 | 26 | public class InvalidCodepointForRedshiftException extends RuntimeException 27 | { 28 | 29 | private final String originalString; 30 | 31 | public InvalidCodepointForRedshiftException( String originalString ) 32 | { 33 | this.originalString = originalString; 34 | } 35 | 36 | @Override 37 | public String getMessage() 38 | { 39 | return String.format( "The string contains characters not allowed in a Redshift DB. Original string: \"%s\"", originalString ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.util.HashMap; 24 | import java.util.Map; 25 | import java.util.Properties; 26 | 27 | import cascading.scheme.Scheme; 28 | import cascading.tap.SinkMode; 29 | import cascading.tap.Tap; 30 | import cascading.tuple.Fields; 31 | import cascading.util.Util; 32 | import org.slf4j.Logger; 33 | import org.slf4j.LoggerFactory; 34 | 35 | /** 36 | * The {@link RedshiftFactory} is a factory class to create {@link RedshiftTap}s 37 | * and {@link RedshiftScheme}s. The class is meant to be used by lingual for dynamically creating 39 | * Taps and Schemes, so that redshift can be used as a provider within lingual. 42 | */ 43 | public class RedshiftFactory extends JDBCFactory 44 | { 45 | 46 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftFactory.class ); 47 | 48 | /** environment variable for the aws access key */ 49 | private static final String SYSTEM_AWS_ACCESS_KEY = "AWS_ACCESS_KEY"; 50 | 51 | /** environment variable for the aws secret key */ 52 | private static final String SYSTEM_AWS_SECRET_KEY = "AWS_SECRET_KEY"; 53 | 54 | public static final String PROTOCOL_S3_OUTPUT_PATH = "s3outputpath"; 55 | public static final String PROTOCOL_AWS_ACCESS_KEY = "awsacceskey"; 56 | public static final String PROTOCOL_AWS_SECRET_KEY = "awssecretkey"; 57 | ; 58 | public static final String PROTOCOL_KEEP_DEBUG_HFS_DATA = "keepdebughfsdata"; 59 | public static final String PROTOCOL_USE_DIRECT_INSERT = "usedirectinsert"; 60 | 61 | public static final String FORMAT_DISTRIBUTION_KEY = "distributionkey"; 62 | public static final String FORMAT_SORT_KEYS = "sortkeys"; 63 | public static final String FORMAT_COPY_OPTIONS_PREFIX = "copyoptions."; 64 | public static final String FORMAT_FIELD_DELIMITER = "fielddelimiter"; 65 | public static final String FORMAT_QUOTE_CHARACTER = "quotecharacter"; 66 | 67 | @SuppressWarnings("unused") 68 | public String getDescription() 69 | { 70 | return getClass().getSimpleName(); 71 | } 72 | 73 | @SuppressWarnings("rawtypes") 74 | public Scheme createScheme( String format, Fields fields, Properties formatProperties ) 75 | { 76 | LOG.info( "creating RedshiftScheme for format {} with fields {} and properties {}", format, fields, formatProperties ); 77 | 78 | String delimiter = formatProperties.getProperty( FORMAT_FIELD_DELIMITER, RedshiftScheme.DEFAULT_DELIMITER ); 79 | String quoteCharacter = formatProperties.getProperty( FORMAT_QUOTE_CHARACTER, RedshiftScheme.DEFAULT_QUOTE ); 80 | 81 | RedshiftTableDesc redshiftTableDesc = createTableDescFromProperties( fields, formatProperties, true ); 82 | 83 | Map copyOptions = extractCopyOptions( formatProperties, FORMAT_COPY_OPTIONS_PREFIX ); 84 | 85 | boolean tableAlias = getTableAlias( formatProperties ); 86 | 87 | return new RedshiftScheme( fields, redshiftTableDesc, delimiter, quoteCharacter, copyOptions, tableAlias ); 88 | } 89 | 90 | @SuppressWarnings("rawtypes") 91 | public Tap createTap( String protocol, Scheme scheme, String identifier, SinkMode sinkMode, Properties protocolProperties ) 92 | { 93 | LOG.info( "creating RedshiftTap with properties {} in mode {}", protocolProperties, sinkMode ); 94 | 95 | String jdbcUserProperty = protocolProperties.getProperty( PROTOCOL_JDBC_USER ); 96 | String jdbcPasswordProperty = protocolProperties.getProperty( PROTOCOL_JDBC_PASSWORD ); 97 | 98 | String jdbcUser = null; 99 | if( !Util.isEmpty( jdbcUserProperty ) ) 100 | jdbcUser = jdbcUserProperty; 101 | 102 | String jdbcPassword = null; 103 | if( !Util.isEmpty( jdbcPasswordProperty ) ) 104 | jdbcPassword = jdbcPasswordProperty; 105 | 106 | String hfsStagingDir = protocolProperties.getProperty( PROTOCOL_S3_OUTPUT_PATH, "/tmp" ); 107 | 108 | AWSCredentials credentials = determineAwsCredentials( protocolProperties ); 109 | 110 | boolean keepDebugHdfsData = Boolean.parseBoolean( protocolProperties.getProperty( PROTOCOL_KEEP_DEBUG_HFS_DATA ) ); 111 | boolean useDirectInsert = Boolean.parseBoolean( protocolProperties.getProperty( PROTOCOL_USE_DIRECT_INSERT, "true" ) ); 112 | 113 | // source fields will be the JDBC-typed fields so use them as defaults. 114 | RedshiftTableDesc redshiftTableDesc = createTableDescFromProperties( scheme.getSourceFields(), protocolProperties, false ); 115 | 116 | Fields sinkFields = scheme.getSinkFields(); 117 | if( !redshiftTableDesc.hasRequiredTableInformation() && sinkFields != Fields.UNKNOWN && sinkFields != Fields.ALL && sinkFields != null 118 | && sinkFields.getTypes() != null ) 119 | { 120 | LOG.debug( "tabledesc information incomplete, falling back to sink-fields {}", scheme.getSinkFields() ); 121 | redshiftTableDesc.completeFromFields( scheme.getSinkFields() ); 122 | ( (JDBCScheme) scheme ).setColumns( redshiftTableDesc.getColumnNames() ); 123 | } 124 | 125 | // users can overwrite the sink mode. 126 | String sinkModeProperty = protocolProperties.getProperty( PROTOCOL_SINK_MODE ); 127 | if( !Util.isEmpty( sinkModeProperty ) ) 128 | sinkMode = SinkMode.valueOf( sinkModeProperty ); 129 | 130 | return new RedshiftTap( identifier, jdbcUser, jdbcPassword, hfsStagingDir, credentials, redshiftTableDesc, (RedshiftScheme) scheme, sinkMode, keepDebugHdfsData, useDirectInsert ); 131 | } 132 | 133 | private RedshiftTableDesc createTableDescFromProperties( Fields fields, Properties properties, boolean allowNullName ) 134 | { 135 | String tableName = properties.getProperty( PROTOCOL_TABLE_NAME, null ); 136 | 137 | if( !allowNullName ) 138 | if( Util.isEmpty( tableName ) ) 139 | throw new IllegalArgumentException( "no tablename given" ); 140 | 141 | String separator = properties.getProperty( PROTOCOL_FIELD_SEPARATOR, DEFAULT_SEPARATOR ); 142 | 143 | String[] columnNames = getColumnNames( fields, properties, separator ); 144 | 145 | String[] columnDefs = null; 146 | String columnDefsProperty = properties.getProperty( PROTOCOL_COLUMN_DEFS, null ); 147 | if( !Util.isEmpty( columnDefsProperty ) ) 148 | columnDefs = columnDefsProperty.split( separator ); 149 | 150 | String distributionKey = properties.getProperty( FORMAT_DISTRIBUTION_KEY ); 151 | 152 | String[] sortKeys = null; 153 | if( properties.containsKey( FORMAT_SORT_KEYS ) ) 154 | sortKeys = properties.getProperty( FORMAT_SORT_KEYS ).split( DEFAULT_SEPARATOR ); 155 | 156 | RedshiftTableDesc desc = new RedshiftTableDesc( tableName, columnNames, columnDefs, distributionKey, sortKeys ); 157 | return desc; 158 | } 159 | 160 | /** 161 | * Helper method that tries to determine the AWS credentials. It first tries 162 | * the {@link Properties} passed in, next it checks for the environment 163 | * variables AWS_ACCESS_KEY and AWS_SECRET_KEY. If 164 | * none of the above contains the credentials, the method returns 165 | * {@link AWSCredentials}. 166 | * 167 | * @param properties a {@link Properties} object, which can contain the AWS 168 | * credentials. 169 | * @return an {@link AWSCredentials} installed. 170 | */ 171 | private AWSCredentials determineAwsCredentials( Properties properties ) 172 | { 173 | // try to determine the aws credentials starting with the assumption 174 | // that they are available from the AWS environment 175 | AWSCredentials awsCredentials = AWSCredentials.RUNTIME_DETERMINED; 176 | 177 | // first try the properties 178 | String awsAccessKey = properties.getProperty( PROTOCOL_AWS_ACCESS_KEY ); 179 | String awsSecretKey = properties.getProperty( PROTOCOL_AWS_SECRET_KEY ); 180 | 181 | if( !Util.isEmpty( awsAccessKey ) && !Util.isEmpty( awsSecretKey ) ) 182 | awsCredentials = new AWSCredentials( awsAccessKey, awsSecretKey ); 183 | 184 | // next try environment variables 185 | if( awsCredentials == AWSCredentials.RUNTIME_DETERMINED ) 186 | { 187 | awsAccessKey = System.getenv( SYSTEM_AWS_ACCESS_KEY ); 188 | awsSecretKey = System.getenv( SYSTEM_AWS_SECRET_KEY ); 189 | if( !Util.isEmpty( awsAccessKey ) && !Util.isEmpty( awsSecretKey ) ) 190 | awsCredentials = new AWSCredentials( awsAccessKey, awsSecretKey ); 191 | } 192 | return awsCredentials; 193 | } 194 | 195 | public static Map extractCopyOptions( Properties properties, String copyOptionsPrefix ) 196 | { 197 | Map copyOptions = new HashMap(); 198 | for( CopyOption curOption : CopyOption.values() ) 199 | { 200 | String propConfName = copyOptionsPrefix + curOption.toString(); 201 | if( properties.containsKey( propConfName ) ) 202 | { 203 | String propValue = properties.get( propConfName ) != null ? properties.get( propConfName ).toString() : null; 204 | copyOptions.put( curOption, propValue ); 205 | } 206 | } 207 | return copyOptions; 208 | } 209 | 210 | /** Enum of all the COPY options supported by the Redshift load command and information about how to covert them to SQL commands. */ 211 | public static enum CopyOption 212 | { 213 | FIXEDWIDTH( "\'%s\'" ), 214 | DELIMITER( "\'%s\'" ), 215 | CSV( " QUOTE \'%s\' " ), 216 | ENCRYPTED, 217 | GZIP, 218 | LZOP, 219 | REMOVEQUOTES, 220 | EXPLICIT_IDS, 221 | ACCEPTINVCHARS( "\'%s\'" ), 222 | MAXERROR( "%s" ), 223 | DATEFORMAT( "\'%s\'" ), 224 | TIMEFORMAT( "\'%s\'" ), 225 | IGNOREHEADER( "%s" ), 226 | ACCEPTANYDATE, 227 | IGNOREBLANKLINES, 228 | TRUNCATECOLUMNS, 229 | FILLRECORD, 230 | TRIMBLANKS, 231 | NOLOAD, 232 | NULL( "\'%s\'" ), 233 | EMPTYASNULL, 234 | BLANKSASNULL, 235 | COMPROWS( "%s" ), 236 | COMPUPDATE( "%s" ), 237 | STATUPDATE( "%s" ), 238 | ESCAPE, 239 | ROUNDEC; 240 | 241 | private String formattableCommandString; 242 | 243 | CopyOption( String formattableCommandString ) 244 | { 245 | this.formattableCommandString = formattableCommandString; 246 | } 247 | 248 | CopyOption() 249 | { 250 | this.formattableCommandString = ""; 251 | } 252 | 253 | public String getArguments( String argument ) 254 | { 255 | if( this.equals( CSV ) && argument == null ) 256 | return " CSV "; 257 | 258 | if( formattableCommandString.length() == 0 || argument == null ) 259 | return formattableCommandString; 260 | 261 | return String.format( formattableCommandString + " ", argument ); 262 | } 263 | 264 | } 265 | 266 | 267 | } 268 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftSafeDelimitedParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.IOException; 24 | 25 | import cascading.scheme.util.DelimitedParser; 26 | import cascading.tap.TapException; 27 | import cascading.tuple.Fields; 28 | import org.apache.hadoop.util.StringUtils; 29 | 30 | /** {@link DelimitedParser} that treats the presence of characters that Redshift can't handle as an error in that line. */ 31 | 32 | public class RedshiftSafeDelimitedParser extends DelimitedParser 33 | { 34 | private static final char BACKSLASH = 0x5c; 35 | 36 | public RedshiftSafeDelimitedParser( String delimiter, String quote, Class[] types, boolean strict, boolean safe, Fields sourceFields, Fields sinkFields ) 37 | { 38 | super( delimiter, quote, types, strict, safe, sourceFields, sinkFields ); 39 | } 40 | 41 | public RedshiftSafeDelimitedParser( String delimiter, String quote ) 42 | { 43 | this( delimiter, quote, null, true, true, null, null ); 44 | } 45 | 46 | @Override 47 | public Appendable joinLine( Iterable iterable, Appendable buffer ) 48 | { 49 | try 50 | { 51 | return joinWithQuote( iterable, buffer ); 52 | } 53 | catch( IOException e ) 54 | { 55 | throw new TapException( "unable to append data", e ); 56 | } 57 | } 58 | 59 | protected Appendable joinWithQuote( Iterable tuple, Appendable buffer ) throws IOException 60 | { 61 | int count = 0; 62 | 63 | for( Object value : tuple ) 64 | { 65 | if( count != 0 ) 66 | buffer.append( delimiter ); 67 | 68 | if( value != null ) 69 | { 70 | if( value instanceof String ) 71 | { 72 | String valueString = value.toString(); 73 | 74 | if( containsAnyInvalidCodepoints( valueString ) ) 75 | { 76 | throw new InvalidCodepointForRedshiftException( valueString ); 77 | } 78 | 79 | String escaped = StringUtils.escapeString( valueString, BACKSLASH, new char[]{'"', '\''} ); 80 | buffer.append( quote ).append( escaped ).append( quote ); 81 | } 82 | else 83 | { 84 | buffer.append( value.toString() ); 85 | } 86 | } 87 | count++; 88 | } 89 | 90 | return buffer; 91 | } 92 | 93 | private boolean containsAnyInvalidCodepoints( String s ) 94 | { 95 | for( int i = 0; i < s.length(); i++ ) 96 | { 97 | if( isExcludedCodepoint( s.codePointAt( i ) ) ) 98 | { 99 | return true; 100 | } 101 | } 102 | return false; 103 | } 104 | 105 | private boolean isExcludedCodepoint( int codepoint ) 106 | { 107 | if( codepoint >= 0xD800 && codepoint <= 0xDFFF ) 108 | { 109 | return true; 110 | } 111 | if( codepoint >= 0xFDD0 && codepoint <= 0xFDEF ) 112 | { 113 | return true; 114 | } 115 | if( codepoint >= 0xFFFE && codepoint <= 0xFFFF ) 116 | { 117 | return true; 118 | } 119 | return false; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftScheme.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.lang.String; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | import cascading.flow.FlowProcess; 28 | import cascading.jdbc.db.DBInputFormat; 29 | import cascading.scheme.Scheme; 30 | import cascading.scheme.hadoop.TextDelimited; 31 | import cascading.tap.Tap; 32 | import cascading.tuple.Fields; 33 | import org.apache.hadoop.conf.Configuration; 34 | import org.apache.hadoop.mapred.JobConf; 35 | import org.apache.hadoop.mapred.OutputCollector; 36 | import org.apache.hadoop.mapred.RecordReader; 37 | import org.slf4j.Logger; 38 | import org.slf4j.LoggerFactory; 39 | 40 | /** 41 | * This class and {@link RedshiftTap} manage the ability to read and write data to Amazon's Redshift via EMR. 42 | * Because Redshift data is loaded into Redshift via S3 but or JDBC but always read via JDBC these classes wrap the 43 | * pairing of an HFS {@link Tap} and a JDBC {@link Tap} for reading behind one {@link Scheme} 44 | * object. 45 | */ 46 | public class RedshiftScheme extends JDBCScheme 47 | { 48 | 49 | public static final String DEFAULT_DELIMITER = ","; 50 | public static final String DEFAULT_QUOTE = "\""; 51 | 52 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftScheme.class ); 53 | 54 | private TextDelimited textDelimited; 55 | private Scheme sinkScheme; 56 | private RedshiftTableDesc redshiftTableDesc; 57 | private Map copyOptions = new HashMap(); 58 | 59 | /** 60 | * The primary constructor. Any temporary scratch files will be created with default values for filed delimiters. This 61 | * will work fine for csv, tab delimited and so on but may lead to errors if there is binary data stored in the files. 62 | * 63 | * @param redshiftTableDesc description of the table structure. 64 | */ 65 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc ) 66 | { 67 | this( fields, redshiftTableDesc, DEFAULT_DELIMITER, DEFAULT_QUOTE, null, false ); 68 | } 69 | 70 | /** 71 | * Like primary constructor, but also takes a String conditions allowing the query to be restricted to a subset of the table. 72 | * 73 | * @param redshiftTableDesc description of the table structure. 74 | * @param conditions where clause to restrict the query. 75 | */ 76 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String conditions ) 77 | { 78 | this( fields, redshiftTableDesc, DEFAULT_DELIMITER, DEFAULT_QUOTE, null, conditions, false ); 79 | } 80 | 81 | /** 82 | * Use this constructor if you need fine-grained control over the temporary file used to stage data for uploading. You 83 | * almost certainly don't want to do this unless you know for a fact that your data contains, ex. binary data that might 84 | * cause issues with default column detection (ex. if you use the \001 character). 85 | * 86 | * @param redshiftTableDesc description of the table structure. 87 | * @param delimiter single character indicating the separator between fields in a file to load 88 | * @param quoteCharacter single character to enclose data within a field in cases where the field contains a delimiter 89 | * @param copyOptions custom arguments passed to the COPY command for processing. In most cases, proper cleaning of the data 90 | * before sending it to this Tap is a better alternative. 91 | */ 92 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String delimiter, String quoteCharacter, Map copyOptions, Boolean tableAlias ) 93 | { 94 | this(fields, redshiftTableDesc, delimiter, quoteCharacter, copyOptions, null, tableAlias); 95 | } 96 | 97 | public RedshiftScheme( Fields fields, RedshiftTableDesc redshiftTableDesc, String delimiter, String quoteCharacter, Map copyOptions, String conditions, Boolean tableAlias ) 98 | { 99 | super( fields, redshiftTableDesc.getColumnNames(), conditions ); 100 | super.tableAlias = tableAlias; 101 | // from the perspective of the JDBC-based parent class flag all fields as JDBC types. 102 | // for the internally managed S3 sink, use HFS tables (where Date is a String) so that the Tap doesn't 103 | // write out the integer representation. 104 | this.redshiftTableDesc = redshiftTableDesc; 105 | this.textDelimited = new TextDelimited( redshiftTableDesc.getHFSFields(), false, new RedshiftSafeDelimitedParser( delimiter, quoteCharacter ) ); 106 | textDelimited.setSinkFields( getSinkFields() ); 107 | this.sinkScheme = this; 108 | if( copyOptions != null ) 109 | this.copyOptions.putAll( copyOptions ); 110 | 111 | if( !this.copyOptions.containsKey( RedshiftFactory.CopyOption.DELIMITER ) ) 112 | this.copyOptions.put( RedshiftFactory.CopyOption.DELIMITER, DEFAULT_DELIMITER ); 113 | 114 | this.copyOptions.put( RedshiftFactory.CopyOption.REMOVEQUOTES, null ); 115 | } 116 | 117 | public RedshiftScheme( String[] columns, String[] orderBy, String[] updateBy ) 118 | { 119 | super( columns, orderBy, updateBy ); 120 | } 121 | 122 | public RedshiftScheme( Class inputFormat, Fields fields, String[] columns ) 123 | { 124 | super( inputFormat, fields, columns ); 125 | } 126 | 127 | public RedshiftScheme( String[] columnsNames, String contentsQuery, String countStarQuery ) 128 | { 129 | super( columnsNames, contentsQuery, countStarQuery ); 130 | } 131 | 132 | public TextDelimited getTextDelimited() 133 | { 134 | return textDelimited; 135 | } 136 | 137 | public TableDesc getRedshiftTableDesc() 138 | { 139 | return redshiftTableDesc; 140 | } 141 | 142 | public Map getCopyOptions() 143 | { 144 | return copyOptions; 145 | } 146 | 147 | @Override 148 | public void sinkConfInit( FlowProcess flowProcess, Tap tap, Configuration jobConf ) 149 | { 150 | if( ( (RedshiftTap) tap ).isUseDirectInsert() ) 151 | { 152 | sinkScheme = this; 153 | super.sinkConfInit( flowProcess, tap, jobConf ); 154 | } 155 | else 156 | { 157 | sinkScheme = textDelimited; 158 | sinkScheme.sinkConfInit( flowProcess, tap, jobConf ); 159 | } 160 | } 161 | 162 | @Override 163 | public String toString() 164 | { 165 | if( getSinkFields().equals( getSourceFields() ) ) 166 | return getClass().getSimpleName() + "[" + getSourceFields().print() + "]"; 167 | else 168 | return getClass().getSimpleName() + "[" + getSourceFields().print() + "->" + getSinkFields().print() + "]"; 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftTableDesc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.lang.reflect.Type; 24 | import java.sql.Time; 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | 28 | import cascading.tuple.Fields; 29 | import cascading.util.Util; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | /** 34 | * Adds in the Distribution Key and Sort Keys columns that are specific to Redshift. See AWS's docs for info. Note that 35 | * these columns must exist as defined column; they can't be keys that aren't in the columnNames list. 36 | */ 37 | public class RedshiftTableDesc extends TableDesc 38 | { 39 | 40 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftTap.class ); 41 | 42 | private String distributionkey; 43 | private String[] sortKeys; 44 | 45 | 46 | public RedshiftTableDesc( String tableName, String[] columnNames, String[] columnDefs, String distributionkey, String[] sortKeys ) 47 | { 48 | super( tableName, columnNames, columnDefs, null ); 49 | this.distributionkey = distributionkey; 50 | this.sortKeys = sortKeys; 51 | } 52 | 53 | @Override 54 | public String getCreateTableStatement() 55 | { 56 | List createTableStatement = new ArrayList(); 57 | 58 | createTableStatement = addCreateTableBodyTo( createTableStatement ); 59 | String createTableCommand = String.format( getCreateTableFormat(), getTableName(), Util.join( createTableStatement, ", " ), getRedshiftTableKeys() ); 60 | LOG.info( "Creating table: " + createTableCommand ); 61 | return createTableCommand; 62 | } 63 | 64 | @Override 65 | public String[] getPrimaryKeys() 66 | { 67 | return null; 68 | } 69 | 70 | @Override 71 | protected List addCreateTableBodyTo( List createTableStatement ) 72 | { 73 | createTableStatement = addDefinitionsTo( createTableStatement ); 74 | 75 | return createTableStatement; 76 | } 77 | 78 | public Fields getHFSFields() 79 | { 80 | String[] columnDefs = getColumnDefs(); 81 | if (columnDefs == null) 82 | return Fields.ALL; 83 | 84 | Type[] types = new Type[ columnDefs.length ]; 85 | 86 | for( int i = 0; i < columnDefs.length; i++ ) 87 | try 88 | { 89 | types[ i ] = findHFSTypeFor( columnDefs[ i ] ); 90 | } 91 | catch( ClassNotFoundException exception ) 92 | { 93 | LOG.error( "unable to find HFS type for: {}. defaulting to string", columnDefs[ i ] ); 94 | types[ i ] = String.class; 95 | } 96 | 97 | return new Fields( getColumnNames(), types ); 98 | } 99 | 100 | public static Type findHFSTypeFor( String fieldName ) throws ClassNotFoundException 101 | { 102 | if( "int".equals( fieldName ) ) 103 | return int.class; 104 | else if( "int not null".equalsIgnoreCase( fieldName ) ) 105 | return Integer.class; 106 | else if( fieldName != null && fieldName.startsWith( "varchar" ) ) 107 | return String.class; 108 | else if( "time".equalsIgnoreCase( fieldName ) ) 109 | return Time.class; 110 | else if( "date".equalsIgnoreCase( fieldName ) ) 111 | return String.class; 112 | else if( "timestamp".equalsIgnoreCase( fieldName ) ) 113 | return String.class; 114 | else 115 | return String.class; 116 | } 117 | 118 | protected String getCreateTableFormat() 119 | { 120 | return "CREATE TABLE %s ( %s ) %s"; 121 | } 122 | 123 | private String getRedshiftTableKeys() 124 | { 125 | StringBuilder sb = new StringBuilder().append( "" ); 126 | 127 | if( distributionkey != null ) 128 | sb.append( " DISTKEY (" ).append( distributionkey ).append( ") " ); 129 | 130 | if( sortKeys != null && sortKeys.length > 0 ) 131 | sb.append( " SORTKEY (" ).append( Util.join( sortKeys, "," ) ).append( ") " ); 132 | 133 | return sb.toString(); 134 | } 135 | 136 | 137 | } 138 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/java/cascading/jdbc/RedshiftTap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import java.io.IOException; 24 | import java.util.Map; 25 | import java.util.UUID; 26 | 27 | import cascading.flow.FlowProcess; 28 | import cascading.jdbc.db.DBConfiguration; 29 | import cascading.tap.SinkMode; 30 | import cascading.tap.Tap; 31 | import cascading.tap.hadoop.Hfs; 32 | import cascading.tuple.TupleEntryCollector; 33 | import org.apache.hadoop.conf.Configuration; 34 | import org.apache.hadoop.mapred.OutputCollector; 35 | import org.slf4j.Logger; 36 | import org.slf4j.LoggerFactory; 37 | 38 | /** 39 | * This class and {@link RedshiftScheme} manage the ability to read and write data to Amazon's Redshift via EMR. 40 | * Because Redshift data is loaded into Redshift via S3 but read from it via JDBC both these classes wrap the 41 | * pairing of an HFS {@link Tap} (for writing) and a JDBC {@link Tap} for reading behind one {@link cascading.scheme.Scheme} 42 | * object. 43 | */ 44 | public class RedshiftTap extends JDBCTap 45 | { 46 | 47 | private static final Logger LOG = LoggerFactory.getLogger( RedshiftTap.class ); 48 | 49 | public static final String DB_DRIVER = "org.postgresql.Driver"; 50 | 51 | private AWSCredentials awsCredentials; 52 | private RedshiftScheme redshiftScheme; 53 | private Hfs hfsStagingDir; 54 | private String s3WorkingDir; 55 | private boolean keepDebugHfsData; 56 | private boolean useDirectInsert; 57 | 58 | 59 | /** 60 | * Redshift tap to stage data to S3 and then issue a JDBC COPY command to specified Redshift table 61 | * 62 | * @param sinkMode use {@link SinkMode#REPLACE} to drop Redshift table before loading; 63 | * {@link SinkMode#UPDATE} to not drop table for incremental loading 64 | */ 65 | public RedshiftTap( String connectionUrl, String username, String password, String hfsStagingDir, AWSCredentials awsCredentials, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode, boolean keepDebugHfsData, boolean useDirectInsert ) 66 | { 67 | super( connectionUrl, username, password, DB_DRIVER, redshiftTableDesc, redshiftScheme, sinkMode ); 68 | this.redshiftScheme = redshiftScheme; 69 | String workingDirPath = hfsStagingDir + "/" + UUID.randomUUID(); 70 | this.s3WorkingDir = workingDirPath.replaceAll( "s3n://", "s3://" ); 71 | this.hfsStagingDir = new Hfs( redshiftScheme.getTextDelimited(), workingDirPath ); 72 | this.awsCredentials = awsCredentials; 73 | this.keepDebugHfsData = keepDebugHfsData; 74 | this.useDirectInsert = useDirectInsert; 75 | LOG.info( "created {} ", toString() ); 76 | } 77 | 78 | /** 79 | * Redshift tap to stage data to S3 and then issue a JDBC COPY command to specified Redshift table 80 | * 81 | * @param sinkMode use {@link SinkMode#REPLACE} to drop Redshift table before loading; 82 | * {@link SinkMode#UPDATE} to not drop table for incremental loading 83 | */ 84 | public RedshiftTap( String connectionUrl, String username, String password, String hfsStagingDir, AWSCredentials awsCredentials, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode ) 85 | { 86 | this( connectionUrl, username, password, hfsStagingDir, awsCredentials, redshiftTableDesc, redshiftScheme, sinkMode, false, true ); 87 | } 88 | 89 | /** 90 | * Simplified constructor for testing 91 | */ 92 | protected RedshiftTap( String connectionUrl, RedshiftTableDesc redshiftTableDesc, RedshiftScheme redshiftScheme, SinkMode sinkMode ) 93 | { 94 | this( connectionUrl, null, null, null, null, redshiftTableDesc, redshiftScheme, sinkMode, false, true ); 95 | } 96 | 97 | /** 98 | * Simplified constructor for testing 99 | */ 100 | protected RedshiftTap( String connectionUrl, RedshiftScheme redshiftScheme ) 101 | { 102 | this( connectionUrl, null, null, null, null, null, redshiftScheme, null, false, true ); 103 | } 104 | 105 | @Override 106 | public void sourceConfInit( FlowProcess process, Configuration configuration ) 107 | { 108 | if( username == null ) 109 | DBConfiguration.configureDB( configuration, driverClassName, connectionUrl ); 110 | else 111 | DBConfiguration.configureDB( configuration, driverClassName, connectionUrl, username, password ); 112 | 113 | super.sourceConfInit( process, configuration ); 114 | } 115 | 116 | @Override 117 | public void sinkConfInit( FlowProcess process, Configuration conf ) 118 | { 119 | if (!useDirectInsert) { 120 | // if we haven't set the credentials beforehand try to set them from the job conf 121 | if( awsCredentials.equals( AWSCredentials.RUNTIME_DETERMINED ) ) 122 | { 123 | String accessKey = conf.get( "fs.s3n.awsAccessKeyId", null ); 124 | String secretKey = conf.get( "fs.s3n.awsSecretAccessKey", null ); 125 | awsCredentials = new AWSCredentials( accessKey, secretKey ); 126 | } 127 | // make the credentials to be used available to the JobConf if they were set differently 128 | conf.set( "fs.s3n.awsAccessKeyId", awsCredentials.getAwsAccessKey() ); 129 | conf.set( "fs.s3n.awsSecretAccessKey", awsCredentials.getAwsSecretKey() ); 130 | } 131 | super.sinkConfInit( process, conf ); 132 | } 133 | 134 | @Override 135 | public TupleEntryCollector openForWrite( FlowProcess flowProcess, OutputCollector outputCollector ) throws IOException 136 | { 137 | // force a table creation if one does not exist 138 | LOG.info( "creating db table: " + getTableName() ); 139 | super.createResource( flowProcess ); 140 | if( useDirectInsert ) 141 | { 142 | return super.openForWrite( flowProcess, outputCollector ); 143 | } 144 | else 145 | { 146 | LOG.info( "Creating scratch dir: " + hfsStagingDir.getIdentifier() ); 147 | hfsStagingDir.createResource( flowProcess ); 148 | return hfsStagingDir.openForWrite( flowProcess ); 149 | } 150 | } 151 | 152 | @Override 153 | public boolean createResource( Configuration configuration ) throws IOException 154 | { 155 | LOG.info( "creating resources" ); 156 | boolean createSuccess = true; 157 | if( !useDirectInsert ) 158 | { 159 | LOG.info( "creating hfs scratch space: {}", hfsStagingDir.getIdentifier() ); 160 | createSuccess = hfsStagingDir.createResource( configuration ); 161 | } 162 | if( createSuccess ) 163 | { 164 | LOG.info( "creating DB table: {}", super.getIdentifier() ); 165 | createSuccess = super.createResource( configuration ); 166 | } 167 | return createSuccess; 168 | } 169 | 170 | @Override 171 | public boolean deleteResource( Configuration configuration ) throws IOException 172 | { 173 | LOG.info( "deleting resources" ); 174 | boolean deleteSuccsess; 175 | LOG.info( "deleting DB table: {}", super.getIdentifier() ); 176 | deleteSuccsess = super.deleteResource( configuration ); 177 | if( deleteSuccsess && hfsStagingDir.resourceExists( configuration ) ) 178 | { 179 | LOG.info( "deleting hfs scratch space: {}", hfsStagingDir.getIdentifier() ); 180 | deleteSuccsess = hfsStagingDir.deleteResource( configuration ); 181 | } 182 | return deleteSuccsess; 183 | } 184 | 185 | @Override 186 | public boolean commitResource( Configuration configuration ) throws IOException 187 | { 188 | if( !useDirectInsert ) 189 | { 190 | String copyCommand = buildCopyFromS3Command(); 191 | try 192 | { 193 | int results = super.executeUpdate( copyCommand ); 194 | if( results != 0 ) 195 | LOG.info( "Copy return code: {} ( expected: 0 )", results ); 196 | } 197 | finally 198 | { 199 | // clean scratch resources even if load failed. 200 | if( !keepDebugHfsData && hfsStagingDir.resourceExists( configuration ) ) 201 | hfsStagingDir.deleteResource( configuration ); 202 | } 203 | } 204 | return true; 205 | } 206 | 207 | @Override 208 | public long getModifiedTime( Configuration configuration ) throws IOException 209 | { 210 | if( hfsStagingDir.resourceExists( configuration ) ) 211 | return hfsStagingDir.getModifiedTime( configuration ); 212 | return super.getModifiedTime( configuration ); 213 | } 214 | 215 | public boolean isUseDirectInsert() 216 | { 217 | return useDirectInsert; 218 | } 219 | 220 | public String buildCopyFromS3Command() 221 | { 222 | return String.format( "COPY %s from '%s' %s %s ;", 223 | redshiftScheme.getRedshiftTableDesc().getTableName(), 224 | s3WorkingDir, 225 | buildAuthenticationOptions(), 226 | buildCopyOptions() ); 227 | } 228 | 229 | protected String buildAuthenticationOptions() 230 | { 231 | return String.format( " CREDENTIALS 'aws_access_key_id=%s;aws_secret_access_key=%s' ", 232 | awsCredentials.getAwsAccessKey(), 233 | awsCredentials.getAwsSecretKey() ); 234 | } 235 | 236 | private String buildCopyOptions() 237 | { 238 | StringBuilder builder = new StringBuilder(); 239 | for( Map.Entry copyOption : redshiftScheme.getCopyOptions().entrySet() ) 240 | { 241 | builder.append( " " ); 242 | if( copyOption.getValue() == null ) 243 | builder.append( copyOption.getKey().toString() ); 244 | else 245 | builder.append( copyOption.getKey().toString() ).append( " " ).append( copyOption.getKey().getArguments( copyOption.getValue() ) ); 246 | } 247 | return builder.toString(); 248 | } 249 | 250 | @Override 251 | public String toString() 252 | { 253 | if( getIdentifier() != null ) 254 | return getClass().getSimpleName() + "[\"" + getScheme() + "\"]" + "[->\"" + hfsStagingDir.getIdentifier() + "\"->\"" + super.getIdentifier() + "\"]"; // sanitize 255 | else 256 | return getClass().getSimpleName() + "[\"" + getScheme() + "\"]" + "[no more info]"; 257 | } 258 | 259 | } 260 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | 23 | cascading.bind.provider.names=redshift 24 | cascading.bind.provider.redshift.platforms=hadoop,hadoop2-mr1 25 | 26 | # one or the other 27 | cascading.bind.provider.redshift.factory.classname=cascading.jdbc.RedshiftFactory 28 | 29 | 30 | # define protocols differentiated by properties 31 | cascading.bind.provider.redshift.protocol.names=jdbc 32 | cascading.bind.provider.redshift.protocol.jdbc.schemes=postgresql 33 | cascading.bind.provider.postgresql.protocol.jdbc.tabledescseparator=: 34 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.tablename= 35 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columnnames= 36 | cascading.bind.provider.postgresql.protocol.jdbc.tabledesc.columndefs= 37 | cascading.bind.provider.postgresql.protocol.jdbc.s3outputpath= 38 | cascading.bind.provider.postgresql.protocol.jdbc.awsaccesskey= 39 | cascading.bind.provider.postgresql.protocol.jdbc.awssecretkey= 40 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcuser= 41 | cascading.bind.provider.postgresql.protocol.jdbc.jdbcpassword= 42 | cascading.bind.provider.postgresql.protocol.jdbc.keepdebughfsdata= 43 | 44 | # 45 | cascading.bind.provider.redshift.format.names=postgresql 46 | cascading.bind.provider.redshift.format.postgresql.protocols=jdbc 47 | cascading.bind.provider.postgresql.format.postgresql.separator=: 48 | cascading.bind.provider.postgresql.format.postgresql.columnnames= 49 | cascading.bind.provider.postgresql.format.postgresql.orderBy= 50 | cascading.bind.provider.postgresql.format.postgresql.limit= 51 | cascading.bind.provider.postgresql.format.postgresql.updateBy= 52 | cascading.bind.provider.postgresql.format.postgresql.tableAlias= 53 | cascading.bind.provider.postgresql.format.postgresql.selectquery= 54 | cascading.bind.provider.postgresql.format.postgresql.countquery= 55 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/test/java/cascading/jdbc/RedshiftSafeDelimitedParserTest.java: -------------------------------------------------------------------------------- 1 | package cascading.jdbc; 2 | 3 | import java.io.UnsupportedEncodingException; 4 | 5 | import cascading.tuple.Tuple; 6 | import org.junit.Test; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class RedshiftSafeDelimitedParserTest { 11 | @Test 12 | public void shouldJoinValuesAndQuoteStringField() { 13 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\""); 14 | StringBuffer buf = new StringBuffer(); 15 | 16 | parser.joinLine(new Tuple("Hello", "world"), buf); 17 | 18 | assertEquals("\"Hello\",\"world\"", buf.toString()); 19 | } 20 | 21 | @Test 22 | public void shouldJoinValuesWithoutQuotingNumeric() { 23 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\""); 24 | StringBuffer buf = new StringBuffer(); 25 | 26 | parser.joinLine(new Tuple("Hello", 102), buf); 27 | 28 | assertEquals("\"Hello\",102", buf.toString()); 29 | } 30 | 31 | @Test 32 | public void shouldEscapeSingleQuotes() { 33 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\""); 34 | StringBuffer buf = new StringBuffer(); 35 | 36 | parser.joinLine(new Tuple("Some", "'name"), buf); 37 | 38 | assertEquals("\"Some\",\"\\'name\"", buf.toString()); 39 | } 40 | 41 | @Test(expected=InvalidCodepointForRedshiftException.class) 42 | public void shouldThrowErrorWithInvalidCodepointCharacter() throws UnsupportedEncodingException { 43 | RedshiftSafeDelimitedParser parser = new RedshiftSafeDelimitedParser(",", "\""); 44 | StringBuffer buf = new StringBuffer(); 45 | 46 | byte[] characterBytes = new byte[] {(byte) 0xED, (byte) 0xA0, (byte) 0x80}; 47 | 48 | parser.joinLine(new Tuple(new String(characterBytes, "UTF-8")), buf); 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /cascading-jdbc-redshift/src/test/java/cascading/jdbc/RedshiftTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | /** 24 | * Tests against Postgres database since that's the Redshift API . 25 | **/ 26 | 27 | import java.util.Properties; 28 | 29 | import cascading.tap.SinkMode; 30 | import cascading.tuple.Fields; 31 | import org.junit.Before; 32 | 33 | public class RedshiftTest extends JDBCTestingBase 34 | { 35 | 36 | @Before 37 | public void setUp() 38 | { 39 | setDriverName( RedshiftTap.DB_DRIVER ); 40 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) ); 41 | setJDBCFactory( new RedshiftFactory() ); 42 | } 43 | 44 | @Override 45 | protected RedshiftScheme getNewJDBCScheme( Fields fields, String[] columnNames ) 46 | { 47 | return new RedshiftScheme( inputFormatClass, fields, columnNames ); 48 | } 49 | 50 | @Override 51 | protected RedshiftScheme getNewJDBCScheme( String[] columns, String[] orderBy, String[] updateBy ) 52 | { 53 | return new RedshiftScheme( columns, orderBy, updateBy ); 54 | } 55 | 56 | @Override 57 | protected RedshiftScheme getNewJDBCScheme( String[] columnsNames, String contentsQuery, String countStarQuery ) 58 | { 59 | return new RedshiftScheme( columnsNames, contentsQuery, countStarQuery ); 60 | } 61 | 62 | @Override 63 | protected RedshiftTableDesc getNewTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys ) 64 | { 65 | return new RedshiftTableDesc( tableName, columnNames, columnDefs, null, null ); 66 | } 67 | 68 | @Override 69 | protected RedshiftTap getNewJDBCTap( TableDesc tableDesc, JDBCScheme jdbcScheme, SinkMode sinkMode ) 70 | { 71 | return new RedshiftTap( jdbcurl, (RedshiftTableDesc) tableDesc, (RedshiftScheme) jdbcScheme, sinkMode ); 72 | } 73 | 74 | @Override 75 | protected RedshiftTap getNewJDBCTap( JDBCScheme jdbcScheme ) 76 | { 77 | return new RedshiftTap( jdbcurl, (RedshiftScheme) jdbcScheme ); 78 | } 79 | 80 | @Override 81 | protected SinkMode getSinkModeForReset() 82 | { 83 | return SinkMode.REPLACE; 84 | } 85 | 86 | @Override 87 | protected Properties createProperties() 88 | { 89 | Properties properties = super.createProperties(); 90 | properties.put( RedshiftFactory.PROTOCOL_USE_DIRECT_INSERT, "true" ); 91 | return properties; 92 | } 93 | } 94 | 95 | 96 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | evaluationDependsOn( ":cascading-jdbc-core" ) 22 | 23 | ext.teradataVersion = "14.10.00.39" 24 | 25 | dependencies { 26 | compile project( ':cascading-jdbc-core' ) 27 | 28 | compile( group: 'com.teradata', name: 'terajdbc4', version: teradataVersion ) 29 | compile( group: 'com.teradata', name: 'tdgssconfig', version: teradataVersion ) 30 | 31 | testCompile project( ':cascading-jdbc-core' ).sourceSets.test.runtimeClasspath 32 | } 33 | 34 | test { 35 | systemProperty( "cascading.jdbcurl", System.getProperty( "cascading.jdbc.url.teradata" ) ) 36 | } 37 | 38 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/main/java/cascading/jdbc/TeradataJDBCFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.db.DBInputFormat; 24 | import cascading.jdbc.db.TeradataDBInputFormat; 25 | 26 | /** 27 | * Teradata specific subclass of {@link cascading.jdbc.JDBCFactory} 28 | * */ 29 | public class TeradataJDBCFactory extends JDBCFactory 30 | { 31 | @Override 32 | protected Class getInputFormatClass() 33 | { 34 | return TeradataDBInputFormat.class; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/main/java/cascading/jdbc/TeradataTableDesc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2009 Concurrent, Inc. 3 | * 4 | * This work has been released into the public domain 5 | * by the copyright holder. This applies worldwide. 6 | * 7 | * In case this is not legally possible: 8 | * The copyright holder grants any entity the right 9 | * to use this work for any purpose, without any 10 | * conditions, unless such conditions are required by law. 11 | */ 12 | 13 | package cascading.jdbc; 14 | 15 | import java.io.Serializable; 16 | import java.lang.reflect.Type; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.Arrays; 20 | 21 | import cascading.tuple.Fields; 22 | 23 | /** 24 | * Class TeradataTableDesc extends TableDesc which describes a SQL based table, 25 | * this description is used by the 26 | * {@link JDBCTap} when creating a missing table and by the JDBCScheme, for the 27 | * correct type coercion. 28 | *

29 | * This class is used to override completeFromFields to use TeradataInternalMapping.java 30 | * 31 | * @see JDBCTap 32 | * @see JDBCScheme 33 | */ 34 | public class TeradataTableDesc extends TableDesc implements Serializable 35 | { 36 | private static final long serialVersionUID = 5009899098019404131L; 37 | 38 | /** 39 | * Field columnNames 40 | */ 41 | String[] columnNames; 42 | /** 43 | * Field primaryKeys 44 | */ 45 | String[] primaryKeys; 46 | 47 | /** 48 | * Constructor TeradataTableDesc creates a new TeradataTableDesc instance. 49 | * 50 | * @param tableName of type String 51 | * @param columnNames of type String[] 52 | * @param columnDefs of type String[] 53 | * @param primaryKeys of type String 54 | */ 55 | public TeradataTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys ) 56 | { 57 | super( tableName, columnNames, columnDefs, primaryKeys ); 58 | this.columnNames = columnNames; 59 | this.primaryKeys = primaryKeys; 60 | } 61 | 62 | /** 63 | * {@inheritDoc} 64 | */ 65 | @Override 66 | public void completeFromFields( Fields fields ) 67 | { 68 | if( !hasRequiredTableInformation() ) 69 | { 70 | List names = new ArrayList(); 71 | List defs = new ArrayList(); 72 | 73 | for( int i = 0; i < fields.size(); i++ ) 74 | { 75 | Comparable cmp = fields.get( i ); 76 | names.add( cmp.toString() ); 77 | Type internalType = InternalTypeMapping.findInternalType( fields.getType( i ) ); 78 | String type = InternalTypeMapping.sqltypeForClass( internalType ); 79 | defs.add( type ); 80 | } 81 | if( columnNames == null || columnNames.length == 0 ) 82 | columnNames = names.toArray( new String[ names.size() ] ); 83 | if( columnDefs == null || columnDefs.length == 0 ) 84 | columnDefs = defs.toArray( new String[ defs.size() ] ); 85 | 86 | for( int i = 0; i < columnNames.length; i++ ) 87 | { 88 | if( Arrays.asList( primaryKeys ).contains( columnNames[ i ] ) ) 89 | { 90 | if( columnDefs[ i ].equalsIgnoreCase( "varchar(256)" ) ) 91 | columnDefs[ i ] = "varchar(256) not null"; 92 | } 93 | } 94 | 95 | // now it has to be complete and usable, if not bail out. 96 | if( !hasRequiredTableInformation() ) 97 | throw new IllegalStateException( "could not derive TableDesc from given fields." ); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/main/java/cascading/jdbc/db/TeradataDBInputFormat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc.db; 22 | 23 | import java.io.IOException; 24 | import java.sql.Connection; 25 | import java.sql.SQLException; 26 | 27 | import cascading.CascadingException; 28 | import org.apache.hadoop.io.LongWritable; 29 | import org.apache.hadoop.mapred.JobConf; 30 | import org.apache.hadoop.mapred.RecordReader; 31 | 32 | /** 33 | * Teradata specific sub-class of DBInputFormat that provides a special select query for getting the data from a 34 | * Teradata instance. 35 | */ 36 | @SuppressWarnings("rawtypes") 37 | public class TeradataDBInputFormat extends DBInputFormat 38 | { 39 | @Override 40 | protected RecordReader getRecordReaderInternal( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 41 | { 42 | return new TeradataDBRecordReader( split, inputClass, job ); 43 | } 44 | 45 | class TeradataDBRecordReader extends DBInputFormat.DBRecordReader 46 | { 47 | protected TeradataDBRecordReader( cascading.jdbc.db.DBInputFormat.DBInputSplit split, Class inputClass, JobConf job ) throws SQLException, IOException 48 | { 49 | super( new cascading.jdbc.db.DBInputFormat.DBInputSplit(), inputClass, job ); 50 | } 51 | 52 | /** Returns the query for selecting the records from an Teradata DB. 53 | * omits the LIMIT and OFFSET for FASTEXPORT 54 | */ 55 | public String getSelectQuery() 56 | { 57 | StringBuilder query = new StringBuilder(); 58 | 59 | if( dbConf.getInputQuery() == null ) 60 | { 61 | query.append( "SELECT " ); 62 | 63 | for( int i = 0; i < fieldNames.length; i++ ) 64 | { 65 | query.append( fieldNames[ i ] ); 66 | 67 | if( i != fieldNames.length - 1 ) 68 | query.append( ", " ); 69 | } 70 | query.append( " FROM " ).append( tableName ); 71 | 72 | if( conditions != null && conditions.length() > 0 ) 73 | query.append( " WHERE (" ).append( conditions ).append( ")" ); 74 | 75 | String orderBy = dbConf.getInputOrderBy(); 76 | 77 | if( orderBy != null && orderBy.length() > 0 ) 78 | query.append( " ORDER BY " ).append( orderBy ); 79 | } 80 | else 81 | query.append( dbConf.getInputQuery() ); 82 | 83 | return query.toString(); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/main/resources/cascading/bind/provider.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # default name of provider 22 | cascading.bind.provider.names=teradata 23 | cascading.bind.provider.teradata.platforms=hadoop,hadoop2-mr1 24 | 25 | # factory 26 | cascading.bind.provider.teradata.factory.classname=cascading.jdbc.TeradataJDBCFactory 27 | 28 | # protocol is jdbc 29 | cascading.bind.provider.teradata.protocol.names=jdbc 30 | cascading.bind.provider.teradata.protocol.jdbc.schemes=teradata 31 | cascading.bind.provider.teradata.protocol.jdbc.jdbcdriver=com.teradata.jdbc.TeraDriver 32 | cascading.bind.provider.teradata.protocol.jdbc.tabledescseparator=: 33 | cascading.bind.provider.teradata.protocol.jdbc.jdbcuser= 34 | cascading.bind.provider.teradata.protocol.jdbc.jdbcpassword= 35 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.tablename= 36 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.columnnames= 37 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.columndefs= 38 | cascading.bind.provider.teradata.protocol.jdbc.tabledesc.primarykeys= 39 | cascading.bind.provider.teradata.protocol.jdbc.sinkmode= 40 | 41 | # format is teradata 42 | cascading.bind.provider.teradata.format.names=teradata 43 | cascading.bind.provider.teradata.format.teradata.protocols=jdbc 44 | cascading.bind.provider.teradata.format.teradata.separator=: 45 | cascading.bind.provider.teradata.format.teradata.columnnames= 46 | cascading.bind.provider.teradata.format.teradata.orderBy= 47 | cascading.bind.provider.teradata.format.teradata.conditions= 48 | cascading.bind.provider.teradata.format.teradata.limit= 49 | cascading.bind.provider.teradata.format.teradata.updateBy= 50 | cascading.bind.provider.teradata.format.teradata.tableAlias= 51 | cascading.bind.provider.teradata.format.teradata.selectquery= 52 | cascading.bind.provider.teradata.format.teradata.countquery= 53 | cascading.bind.provider.teradata.format.teradata.replaceoninsert=false 54 | 55 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/test/java/cascading/jdbc/TeradataJDBCFactoryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.db.TeradataDBInputFormat; 24 | import org.junit.Test; 25 | 26 | import static org.junit.Assert.*; 27 | 28 | public class TeradataJDBCFactoryTest 29 | { 30 | 31 | @Test 32 | public void testGetInputFormatClass() 33 | { 34 | assertEquals( TeradataDBInputFormat.class, new TeradataJDBCFactory().getInputFormatClass() ); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cascading-jdbc-teradata/src/test/java/cascading/jdbc/TeradataTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.jdbc; 22 | 23 | import cascading.jdbc.TeradataTableDesc; 24 | import cascading.jdbc.db.TeradataDBInputFormat; 25 | import org.junit.Before; 26 | 27 | public class TeradataTest extends JDBCTestingBase 28 | { 29 | @Before 30 | public void setUp() 31 | { 32 | setDriverName( "com.teradata.jdbc.TeraDriver" ); 33 | setJdbcurl( System.getProperty( "cascading.jdbcurl" ) ); 34 | setInputFormatClass( TeradataDBInputFormat.class ); 35 | setFactory( new TeradataJDBCFactory() ); 36 | } 37 | 38 | @Override 39 | public TeradataTableDesc getNewTableDesc( String tableName, String[] columnNames, String[] columnDefs, String[] primaryKeys ) 40 | { 41 | return new TeradataTableDesc( tableName, columnNames, columnDefs, primaryKeys ); 42 | } 43 | } -------------------------------------------------------------------------------- /etc/properties.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | if( project.properties[ 'teamcity' ] ) // make them system properties 22 | System.properties.putAll( project.properties[ 'teamcity' ] ) 23 | 24 | if( System.properties[ 'aws.properties' ] ) 25 | { 26 | file( System.properties[ 'aws.properties' ] ).withReader { reader -> 27 | def awsProperties = new Properties() 28 | awsProperties.load( reader ) 29 | System.properties.putAll( awsProperties ) 30 | } 31 | } 32 | 33 | ext.repoUrl = 'http://conjars.org/repo/' 34 | ext.repoUserName = System.properties[ 'publish.repo.userName' ] 35 | ext.repoPassword = System.properties[ 'publish.repo.password' ] 36 | 37 | -------------------------------------------------------------------------------- /etc/s3Upload.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import com.monochromeroad.gradle.plugin.aws.s3.S3Sync 22 | import com.monochromeroad.gradle.plugin.aws.s3.ACL 23 | 24 | buildscript { 25 | repositories { 26 | mavenLocal() 27 | mavenCentral() 28 | maven { url 'http://repository-monochromeroad.forge.cloudbees.com/release/' } 29 | maven { url 'http://conjars.org/repo/' } // use conjars if cloudbees is down 30 | } 31 | dependencies { 32 | classpath 'com.monochromeroad.gradle:gradle-aws-s3-sync:0.5' 33 | } 34 | } 35 | 36 | task s3Upload( type: S3Sync ) { 37 | 38 | accessKey = rootProject.awsAccessId 39 | secretKey = rootProject.awsSecretKey 40 | 41 | keepFiles = true // prevents deletion from bucket 42 | 43 | acl ACL.PublicRead 44 | 45 | configFile "${rootProject.projectDir}/etc/synchronizer.properties" 46 | 47 | ext.source = "${buildDir}/publish" 48 | 49 | ext.destination = "${project.s3Bucket}/${project.name}/${majorVersion}/" 50 | 51 | from source 52 | into destination 53 | } 54 | -------------------------------------------------------------------------------- /etc/synchronizer.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # see http://jets3t.s3.amazonaws.com/toolkit/configuration.html 22 | 23 | #s3service.default-bucket-location=Tokyo 24 | # httpclient.max-connections=2 25 | # threaded-service.admin-max-thread-count=5 26 | 27 | ### 28 | # File/Object comparison properties 29 | ### 30 | 31 | filecomparer.skip-symlinks=true 32 | #filecomparer.use-md5-files=true 33 | #filecomparer.generate-md5-files=true 34 | #filecomparer.md5-files-root-dir=.cache 35 | filecomparer.skip-upload-of-md5-files=true 36 | filecomparer.assume-local-latest-in-mismatch=false 37 | 38 | # Page Caching - none 39 | upload.metadata.Cache-Control=no-cache 40 | 41 | upload.transformed-files-batch-size=1000 42 | -------------------------------------------------------------------------------- /etc/version.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | apply from: './etc/properties.gradle' 22 | 23 | project.ext.currentCommit = System.properties[ 'build.vcs.number' ]; 24 | 25 | if( !currentCommit ) 26 | { 27 | def commitPath = File.createTempFile( "commit", "tmp" ) 28 | 29 | ant.exec( dir: '.', executable: "git", output: commitPath ) { 30 | arg( line: 'rev-parse HEAD' ) 31 | } 32 | 33 | currentCommit = commitPath.readLines().get( 0 ) 34 | 35 | commitPath.delete() 36 | } 37 | 38 | def versionProperties = new Properties() 39 | file( 'version.properties' ).withInputStream { versionProperties.load( it ) } 40 | 41 | ext.majorVersion = versionProperties[ 'cascading-jdbc.release.major' ] 42 | ext.minorVersion = versionProperties[ 'cascading-jdbc.release.minor' ] 43 | 44 | ext.buildNumber = System.getProperty( 'build.number', 'dev' ) 45 | 46 | if( System.properties[ 'cascading-jdbc.release.private' ] ) 47 | buildNumber = "priv-${buildNumber}" 48 | else if( !System.properties[ 'cascading-jdbc.release.final' ] ) 49 | buildNumber = "wip-${buildNumber}" 50 | 51 | ext.releaseTag = "${majorVersion}-${buildNumber}" 52 | 53 | if( !System.properties[ 'build.number' ] ) 54 | releaseTag = "wip-${majorVersion}" 55 | 56 | ext.releaseVersion = majorVersion 57 | 58 | if( minorVersion ) 59 | releaseVersion = "${releaseVersion}.${minorVersion}" 60 | 61 | if( !System.properties[ 'cascading-jdbc.release.final' ] ) 62 | releaseVersion = "${releaseVersion}-${buildNumber}" 63 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | include 'cascading-jdbc-core' 22 | include 'cascading-jdbc-derby' 23 | include 'cascading-jdbc-h2' 24 | 25 | def optional = ["mysql", "postgresql", "oracle", "redshift", "teradata"] 26 | 27 | for ( dbsystem in optional ) { 28 | if ( System.getProperty( "cascading.jdbc.url.${dbsystem}" ) || System.getProperty( "dev" ) ) 29 | include "cascading-jdbc-${dbsystem}" 30 | else 31 | logger.warn("excluding cascading-jdbc-${dbsystem} due to missing cascading.jdbc.url.${dbsystem} property") 32 | 33 | } 34 | -------------------------------------------------------------------------------- /version.properties: -------------------------------------------------------------------------------- 1 | cascading-jdbc.release.major=3.0 2 | cascading-jdbc.release.minor=0 3 | --------------------------------------------------------------------------------