├── .gitignore ├── data-generator ├── pom.xml └── src │ └── main │ ├── java │ └── kafka │ │ ├── AvroSchemaRegistryTest.java │ │ ├── JsonCurrencySender.java │ │ ├── JsonOrderSender.java │ │ └── KafkaGenerator.java │ └── resources │ ├── avro │ └── UserAvro.avsc │ ├── dynamic_index.csv │ ├── hive_read.csv │ ├── 1 │ ├── 2 │ ├── 3 │ └── 4 │ ├── src.csv │ ├── test.csv │ ├── test.json │ ├── test1.csv │ ├── test15.csv │ ├── test_csv.csv │ ├── testdata.avro │ ├── user.avro │ └── part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0 │ ├── user.csv │ ├── user19.json │ ├── user2.csv │ ├── user3.csv │ ├── user4.json │ └── user_part.csv ├── etl-job ├── pom.xml └── src │ └── main │ ├── java │ ├── Test.java │ ├── TestGen.java │ ├── constants │ │ ├── FlinkSqlConstants.java │ │ └── UnboundedFlinkSqlConstants.java │ ├── kafka │ │ └── UserAvro.java │ ├── kafka2es │ │ ├── Kafak2DynamicIndexEs.java │ │ ├── Kafka2AppendEs.java │ │ ├── Kafka2UpsertEs.java │ │ └── Kafka2dynamicEsSQL.java │ ├── kafka2file │ │ ├── EventTimeBucketAssigner.java │ │ ├── ReadHiveDataETL.java │ │ ├── StreamETLKafka2Hdfs.java │ │ ├── StreamETLKafka2HdfsSQL.java │ │ ├── TestCsv2Csv.java │ │ ├── TestCsv2Csv1.java │ │ ├── TestCsvError.java │ │ ├── TestFileSink.scala │ │ └── Write2Kafka.java │ ├── kafka2hbase │ │ ├── KafkaJoinHbaseJoinMysql2Hbase.java │ │ ├── TestHbase.java │ │ └── UnboundedKafkaJoinHbase2Hbase.java │ ├── kafka2jdbc │ │ ├── KafkaJoinJdbc2Jdbc.java │ │ ├── KafkaJoinJdbc2JdbcProc.java │ │ ├── TestJdbc.java │ │ ├── UnboundedKafkaJoinJdbc2Jdbc.java │ │ ├── retract2Mysql.java │ │ └── testNonExistedTable.java │ ├── kafka2kafka │ │ ├── ConsumeConfluentAvroTest.java │ │ ├── KafkaAvro2Kafka.java │ │ ├── KafkaCsv2Kafka.java │ │ ├── KafkaJoinJdbc2Kafka.java │ │ ├── KafkaJoinKafka2Kafka.java │ │ └── KafkaJson2Kafka.java │ ├── pge2e │ │ └── PgCatalogTest.java │ └── usercase │ │ ├── TestUserIssue.java │ │ ├── TestUserIssue10.java │ │ ├── TestUserIssue11.java │ │ ├── TestUserIssue12.java │ │ ├── TestUserIssue13.java │ │ ├── TestUserIssue14.java │ │ ├── TestUserIssue15.java │ │ ├── TestUserIssue16.java │ │ ├── TestUserIssue17.java │ │ ├── TestUserIssue18.java │ │ ├── TestUserIssue19.java │ │ ├── TestUserIssue2.java │ │ ├── TestUserIssue20.java │ │ ├── TestUserIssue21.java │ │ ├── TestUserIssue22.java │ │ ├── TestUserIssue3.java │ │ ├── TestUserIssue4.java │ │ ├── TestUserIssue5.java │ │ ├── TestUserIssue6.java │ │ ├── TestUserIssue7.java │ │ ├── TestUserIssue8.java │ │ └── TestUserIssue9.java │ └── resources │ ├── job-scripts │ └── kafak2kafka_etl_run.sh │ ├── job-sql-1.10 │ ├── kafka2es │ │ ├── Kafka2AppendEs.sql │ │ ├── Kafka2DynamicIndexEs.sql │ │ └── Kafka2UpsertEs.sql │ ├── kafka2filesystemandhive │ │ ├── Csv2HivePartition.sql │ │ ├── Csv2HiveSink.sql │ │ ├── FileSystem2FileSystem.sql │ │ └── Kafka2HiveSink.sql │ ├── kafka2hbase │ │ ├── KafkaJoinHbaseJoinMysql2Hbase.sql │ │ └── UnboundedKafkaJoinHbase2Hbase.sql │ ├── kafka2jdbc │ │ ├── KafkaJoinJdbc2Jdbc.sql │ │ └── UnboundedKafkaJoinJdbc2Jdbc.sql │ └── kafka2kafka │ │ ├── KafkaAvro2Kafka.sql │ │ ├── KafkaCsv2Kafka.sql │ │ ├── KafkaJoinJdbc2Kafka.sql │ │ └── kafkaJson2kafka.sql │ ├── job-sql-1.11 │ ├── catalog │ │ └── PgcatalogE2eTest.sql │ ├── jdbc │ │ ├── kafka2mysql.sql │ │ └── kafkajoinmysql.sql │ ├── kafka2es │ │ ├── Kafka2AppendEs.sql │ │ ├── Kafka2DynamicIndexEs.sql │ │ └── Kafka2UpsertEs.sql │ └── kafka2hbase │ │ ├── KafkaJoinHbaseJoinMysql2Hbase.sql │ │ ├── UnboundedKafkaJoinHbase2Hbase.sql │ │ └── hbase_cdc │ ├── job-sql-1.12 │ ├── hbase2_test.sql │ ├── hive_latest_dim.sql │ └── upsert-kafka.sql │ ├── log4j.properties │ ├── pictures │ └── CURRRENT_TIMESTAMP.png │ └── readme.md ├── flink-demo ├── flink-jdbc-demo │ ├── README.md │ ├── docker-compose-flink-demo.yaml │ ├── flink-demo-udf.jar │ └── sql-client-defaults.yaml ├── flink-temporal-join-demo │ ├── README.md │ ├── register-mysql.json │ ├── sql-client-defaults.yaml │ └── temporal-join-versioned-table.yaml ├── pom.xml └── src │ └── main │ └── java │ └── udf │ └── Int2DateUDF.java ├── pom.xml ├── sql-avro ├── pom.xml └── src │ └── main │ └── java │ └── TestUserIssue12.java └── state-process ├── pom.xml └── src └── main └── java ├── org └── apache │ └── flink │ └── state │ └── api │ └── runtime │ └── metadata │ └── SavepointMetadata.java └── state └── CdcSourceStateAnalysis.java /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | scalastyle-output.xml 3 | .classpath 4 | .idea 5 | .metadata 6 | .settings 7 | .project 8 | .version.properties 9 | filter.properties 10 | logs.zip 11 | target 12 | tmp 13 | *.class 14 | *.iml 15 | *.swp 16 | *.jar 17 | !flink-demo-udf.jar 18 | *.zip 19 | *.log 20 | *.pyc 21 | .DS_Store 22 | build-target 23 | flink-end-to-end-tests/flink-datastream-allround-test/src/main/java/org/apache/flink/streaming/tests/avro/ 24 | flink-formats/flink-avro/src/test/java/org/apache/flink/formats/avro/generated/ 25 | flink-formats/flink-parquet/src/test/java/org/apache/flink/formats/parquet/generated/ 26 | flink-runtime-web/web-dashboard/node/ 27 | flink-runtime-web/web-dashboard/node_modules/ 28 | flink-runtime-web/web-dashboard/web/ 29 | flink-python/dist/ 30 | flink-python/build/ 31 | flink-python/pyflink.egg-info/ 32 | flink-python/apache_flink.egg-info/ 33 | flink-python/docs/_build 34 | flink-python/.tox/ 35 | flink-python/dev/download 36 | flink-python/dev/.conda/ 37 | flink-python/dev/log/ 38 | flink-python/dev/.stage.txt 39 | flink-python/.eggs/ 40 | atlassian-ide-plugin.xml 41 | out/ 42 | /docs/api 43 | /docs/content 44 | /docs/.bundle 45 | /docs/.rubydeps 46 | /docs/ruby2/.bundle 47 | /docs/ruby2/.rubydeps 48 | /docs/.jekyll-metadata 49 | *.ipr 50 | *.iws 51 | tools/flink 52 | tools/flink-* 53 | tools/releasing/release 54 | tools/japicmp-output 55 | -------------------------------------------------------------------------------- /data-generator/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | flink-sql-etl 7 | org.example 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | data-generator 13 | 14 | 15 | org.apache.kafka 16 | kafka_2.11 17 | 1.0.0 18 | 19 | 20 | io.confluent 21 | kafka-avro-serializer 22 | 5.3.0 23 | 24 | 25 | org.apache.avro 26 | avro 27 | 1.8.2 28 | 29 | 30 | org.apache.avro 31 | avro-maven-plugin 32 | 1.8.2 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.apache.avro 41 | avro-maven-plugin 42 | 1.8.2 43 | 44 | 45 | generate-sources 46 | 47 | schema 48 | 49 | 50 | src/main/resources/avro 51 | ${project.build.directory}/generated-sources 52 | String 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /data-generator/src/main/java/kafka/AvroSchemaRegistryTest.java: -------------------------------------------------------------------------------- 1 | //package kafka; 2 | // 3 | //import io.confluent.kafka.serializers.KafkaAvroDeserializer; 4 | //import io.confluent.kafka.serializers.KafkaAvroSerializer; 5 | //import org.apache.kafka.clients.consumer.ConsumerConfig; 6 | //import org.apache.kafka.clients.consumer.ConsumerRecord; 7 | //import org.apache.kafka.clients.consumer.ConsumerRecords; 8 | //import org.apache.kafka.clients.consumer.KafkaConsumer; 9 | //import org.apache.kafka.clients.producer.KafkaProducer; 10 | //import org.apache.kafka.clients.producer.ProducerConfig; 11 | //import org.apache.kafka.clients.producer.ProducerRecord; 12 | //import org.apache.kafka.common.serialization.StringDeserializer; 13 | //import org.apache.kafka.common.serialization.StringSerializer; 14 | // 15 | //import java.io.IOException; 16 | //import java.text.DateFormat; 17 | //import java.text.SimpleDateFormat; 18 | //import java.util.Collections; 19 | //import java.util.Date; 20 | //import java.util.Properties; 21 | //import java.util.Random; 22 | //import java.util.stream.IntStream; 23 | // 24 | //public class AvroSchemaRegistryTest { 25 | // public static final String WIKIPEDIA_FEED = "WikipediaFeed2_filtered"; 26 | // 27 | // public static void main(final String[] args) throws IOException { 28 | // produceInputs(); 29 | // consumeOutput(); 30 | // } 31 | // 32 | // private static void produceInputs() throws IOException { 33 | // final String[] users = {"leonard", "bob", "joe", "damian", "tania", "phil", "sam", "lauren", "joseph"}; 34 | // final Properties props = new Properties(); 35 | // props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); 36 | // props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 37 | // props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class); 38 | // props.put("schema.registry.url", "http://localhost:8081"); 39 | // final KafkaProducer producer = new KafkaProducer<>(props); 40 | // final Random random = new Random(); 41 | // 42 | // IntStream.range(0, 10) 43 | // .mapToObj(value -> new UserAvro(users[random.nextInt(users.length)], true, "content")) 44 | // .forEach( 45 | // record -> { 46 | // System.out.println(record.toString()) ; 47 | // producer.send(new ProducerRecord<>(WIKIPEDIA_FEED, record.getUserName(), record)); 48 | // }); 49 | // 50 | // producer.flush(); 51 | // } 52 | // 53 | // private static void consumeOutput() { 54 | // final Properties consumerProperties = new Properties(); 55 | // consumerProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); 56 | // consumerProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); 57 | // consumerProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class); 58 | // consumerProperties.put("schema.registry.url", "http://localhost:8081"); 59 | // consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG, "wikipedia-feed-example-consumer3"); 60 | // consumerProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); 61 | // final KafkaConsumer consumer = new KafkaConsumer<>(consumerProperties); 62 | // consumer.subscribe(Collections.singleton(WIKIPEDIA_FEED)); 63 | // while (true) { 64 | // final ConsumerRecords consumerRecords = consumer.poll(Long.MAX_VALUE); 65 | // for (final ConsumerRecord consumerRecord : consumerRecords) { 66 | // 67 | // System.out.println(consumerRecord.key() + "=" + consumerRecord.value()); 68 | // } 69 | // } 70 | // } 71 | // 72 | //} 73 | -------------------------------------------------------------------------------- /data-generator/src/main/java/kafka/JsonCurrencySender.java: -------------------------------------------------------------------------------- 1 | package kafka; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import org.apache.kafka.clients.producer.Callback; 6 | import org.apache.kafka.clients.producer.KafkaProducer; 7 | import org.apache.kafka.clients.producer.ProducerRecord; 8 | import org.apache.kafka.clients.producer.RecordMetadata; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.text.DateFormat; 13 | import java.text.SimpleDateFormat; 14 | import java.util.Date; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.Properties; 18 | 19 | public class JsonCurrencySender { 20 | private static final Logger logger = LoggerFactory.getLogger(JsonCurrencySender.class); 21 | private static final ObjectMapper objectMapper = new ObjectMapper(); 22 | private static final SendCallBack sendCallBack = new SendCallBack(); 23 | private static final String topicName = "flink_currency1"; 24 | private static final Map currency2rates = initCurrency2rates(); 25 | private static final Map country2currency = initCountry2Currency(); 26 | 27 | public static void sendMessage(Properties kafkaProperties, int continueMinutes) throws InterruptedException, JsonProcessingException { 28 | KafkaProducer producer = new KafkaProducer<>(kafkaProperties); 29 | //update currency per 30 seconds 30 | for (int i = 0; i < (continueMinutes * 60 / 30); i++) { 31 | long timestart = System.currentTimeMillis(); 32 | for (Map.Entry entry : country2currency.entrySet()) { 33 | Map map = new HashMap<>(); 34 | map.put("country", entry.getKey()); 35 | map.put("currency", entry.getValue()); 36 | map.put("rate", currency2rates.get(entry.getValue()) + 1); 37 | DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); 38 | Long time = System.currentTimeMillis(); 39 | Date date = new Date(time); 40 | String jsonSchemaDate = dateFormat.format(date); 41 | map.put("currency_time", jsonSchemaDate); 42 | producer.send( 43 | new ProducerRecord<>( 44 | topicName, 45 | String.valueOf(time), 46 | objectMapper.writeValueAsString(map) 47 | ), sendCallBack 48 | 49 | ); 50 | } 51 | long timecast = System.currentTimeMillis() - timestart; 52 | System.out.println((i + 1) * currency2rates.size() + " has sent to topic:[" + topicName + "] in " + timecast + "ms"); 53 | if (timecast < 30 * 1000) { 54 | Thread.sleep(30 * 1000 - timecast); 55 | } 56 | } 57 | } 58 | 59 | static class SendCallBack implements Callback { 60 | 61 | @Override 62 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 63 | if (e != null) { 64 | logger.error(e.getMessage(), e); 65 | } 66 | } 67 | } 68 | 69 | private static Map initCurrency2rates() { 70 | final Map map = new HashMap<>(); 71 | map.put("US Dollar", 102); 72 | map.put("Euro", 114); 73 | map.put("Yen", 1); 74 | map.put("RMB", 16); 75 | return map; 76 | } 77 | 78 | private static Map initCountry2Currency() { 79 | final Map map = new HashMap<>(); 80 | map.put("America", "US Dollar"); 81 | map.put("German", "Euro"); 82 | map.put("Japan", "Yen"); 83 | map.put("China", "RMB"); 84 | return map; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /data-generator/src/main/java/kafka/JsonOrderSender.java: -------------------------------------------------------------------------------- 1 | package kafka; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import org.apache.kafka.clients.producer.Callback; 6 | import org.apache.kafka.clients.producer.KafkaProducer; 7 | import org.apache.kafka.clients.producer.ProducerRecord; 8 | import org.apache.kafka.clients.producer.RecordMetadata; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.text.DateFormat; 13 | import java.text.SimpleDateFormat; 14 | import java.util.ArrayList; 15 | import java.util.Date; 16 | import java.util.HashMap; 17 | import java.util.List; 18 | import java.util.Map; 19 | import java.util.Properties; 20 | import java.util.Random; 21 | 22 | public class JsonOrderSender { 23 | private static final Logger logger = LoggerFactory.getLogger(JsonCurrencySender.class); 24 | private static final ObjectMapper objectMapper = new ObjectMapper(); 25 | private static final Random random = new Random(); 26 | private static final SendCallBack sendCallBack = new SendCallBack(); 27 | private static final String topicName = "flink_orders3"; 28 | private static final List currencies = initCurrencies(); 29 | private static final List itemNames = initItemNames(); 30 | 31 | public static synchronized void sendMessage(Properties kafkaProperties, int continueMinutes) throws InterruptedException, JsonProcessingException { 32 | KafkaProducer producer = new KafkaProducer<>(kafkaProperties); 33 | // order stream 34 | for (int i = 0; i < continueMinutes * 60; i++) { 35 | long timestart = System.currentTimeMillis(); 36 | for (int j = 0; j < currencies.size(); j++) { 37 | Map map = new HashMap<>(); 38 | map.put("order_id", System.currentTimeMillis() + "_" + random.nextInt()); 39 | map.put("item", itemNames.get(random.nextInt(itemNames.size()) % itemNames.size())); 40 | map.put("currency", currencies.get(j % currencies.size())); 41 | map.put("amount", j % 100 / 100.0); 42 | Long time = System.currentTimeMillis(); 43 | DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 44 | Date date = new Date(time); 45 | String jsonSchemaDate = dateFormat.format(date); 46 | map.put("order_time", jsonSchemaDate); 47 | producer.send( 48 | new ProducerRecord<>( 49 | topicName, 50 | String.valueOf(time), 51 | objectMapper.writeValueAsString(map) 52 | ), sendCallBack 53 | 54 | ); 55 | Thread.sleep(5); 56 | 57 | } 58 | long timecast = System.currentTimeMillis() - timestart; 59 | System.out.println((i + 1) * currencies.size() + " has sended to topic:[" + topicName + "] in " + timecast + "ms"); 60 | if (timecast < 2000) { 61 | System.out.println("begin sleep...." + System.currentTimeMillis()); 62 | Thread.sleep(2000); 63 | System.out.println("end sleep...." + System.currentTimeMillis()); 64 | 65 | } 66 | } 67 | } 68 | 69 | static class SendCallBack implements Callback { 70 | 71 | @Override 72 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 73 | if (e != null) { 74 | logger.error(e.getMessage(), e); 75 | } 76 | } 77 | } 78 | 79 | private static List initCurrencies() { 80 | final List currencies = new ArrayList<>(); 81 | currencies.add("US Dollar"); 82 | currencies.add("Euro"); 83 | currencies.add("Yen"); 84 | currencies.add("人民币"); 85 | return currencies; 86 | } 87 | 88 | private static List initItemNames() { 89 | final List itermNames = new ArrayList<>(); 90 | itermNames.add("Apple"); 91 | itermNames.add("橘子"); 92 | itermNames.add("Paper"); 93 | itermNames.add("牛奶"); 94 | itermNames.add("酸奶"); 95 | itermNames.add("豆腐"); 96 | return itermNames; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /data-generator/src/main/java/kafka/KafkaGenerator.java: -------------------------------------------------------------------------------- 1 | package kafka; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import org.apache.kafka.clients.producer.ProducerConfig; 5 | import org.apache.kafka.common.serialization.StringSerializer; 6 | 7 | import java.util.Properties; 8 | 9 | public class KafkaGenerator { 10 | public static void main(String[] args) throws JsonProcessingException, InterruptedException { 11 | Properties kafkaProperties = new Properties(); 12 | kafkaProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); 13 | kafkaProperties.put(ProducerConfig.ACKS_CONFIG, "all"); 14 | kafkaProperties.put(ProducerConfig.RETRIES_CONFIG, "0"); 15 | kafkaProperties.put(ProducerConfig.LINGER_MS_CONFIG, "1"); 16 | kafkaProperties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432"); 17 | kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 18 | kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 19 | kafkaProperties.put(ProducerConfig.BATCH_SIZE_CONFIG, "163840"); 20 | kafkaProperties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, "100000"); 21 | 22 | // Thread thread1 = new Thread(new Runnable() { 23 | // @Override 24 | // public void run() { 25 | // try { 26 | // JsonCurrencySender.sendMessage(kafkaProperties, 1); 27 | // } catch (Exception e) { 28 | // e.printStackTrace(); 29 | // } 30 | // } 31 | // }); 32 | // thread1.start(); 33 | 34 | Thread thread2 = new Thread(new Runnable() { 35 | @Override 36 | public void run() { 37 | try { 38 | JsonOrderSender.sendMessage(kafkaProperties, 3); 39 | } catch (Exception e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | }); 44 | thread2.start(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/avro/UserAvro.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "kafka", 2 | "type": "record", 3 | "name": "UserAvro", 4 | "fields": [ 5 | {"name": "user_name", "type": "string"}, 6 | {"name": "is_new", "type": "boolean"}, 7 | {"name": "content", "type": "string"} 8 | ] 9 | } -------------------------------------------------------------------------------- /data-generator/src/main/resources/dynamic_index.csv: -------------------------------------------------------------------------------- 1 | 1,apple,1584504734000,2020-03-18,2020-03-18 12:12:14.0,test1 2 | 2,peanut,1584591134000,2020-03-19,2020-03-19 12:22:21.0,test2 3 | 3,apple,1584504736000,2020-03-20,2020-03-18 12:12:14.0,test3 4 | 4,peanut,1584591138000,2020-03-19,2020-03-19 12:22:21.0,test4 -------------------------------------------------------------------------------- /data-generator/src/main/resources/hive_read.csv/1: -------------------------------------------------------------------------------- 1 | sam,true,content 2 | leonard,false,content 3 | 雪尽,true,content 4 | leonard2,true,contentsamd 5 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/hive_read.csv/2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/hive_read.csv/2 -------------------------------------------------------------------------------- /data-generator/src/main/resources/hive_read.csv/3: -------------------------------------------------------------------------------- 1 | sam,true,content 2 | leonard,false,content 3 | 雪尽,true,content 4 | leonard2,true,content 5 | samd,false,content_test 6 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/hive_read.csv/4: -------------------------------------------------------------------------------- 1 | sam,true,content 2 | leonard,false,content 3 | 雪尽,true,content 4 | leonard2,true,content 5 | sam,true,content 6 | leonard,false,content 7 | 雪尽,true,content 8 | leonard2,true,contentsamd 9 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/src.csv: -------------------------------------------------------------------------------- 1 | 1|aavb 2 | 2|dadsaf -------------------------------------------------------------------------------- /data-generator/src/main/resources/test.csv: -------------------------------------------------------------------------------- 1 | 1,abc,China,2019-12-20 12:22:00.1234,1234.23 2 | 2,,America,2019-12-20 12:22:00.1234,1234.23 3 | ,,Japan,2019-12-20 12:22:00.1234,1234.23 -------------------------------------------------------------------------------- /data-generator/src/main/resources/test.json: -------------------------------------------------------------------------------- 1 | //{"w_es":1589870637000,"w_type":"INSERT","w_isDdl":false,"w_data":[{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.0"}],"w_ts":"2020-05-20T13:58:37.131Z","w_table":"cccc111"} 2 | {"w_es":1589870637000,"w_type":"INSERT","w_isDdl":false,"w_data":[{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.1"},{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.0"}],"w_ts":"2020-05-20T13:58:37.131Z","w_table":"cccc111"} -------------------------------------------------------------------------------- /data-generator/src/main/resources/test1.csv: -------------------------------------------------------------------------------- 1 | 1,10,Hello-1,100,1.01,false,Welt-1,2019-08-18 19:00:00.0,2019-08-18,19:00:00,2019-08-18 19:00:00.000000001,123456.0001 2 | 2,20,Hello-2,200,2.02,true,Welt-2,2019-08-18 19:01:00.0,2019-08-18,19:01:00,2019-08-18 19:00:00.000000002,123456.1234 3 | 3,30,Hello-3,300,3.03,false,Welt-3,2019-08-18 19:02:00.0,2019-08-18,19:02:00,2019-08-18 19:00:00.000000003,123456.1000 4 | 4,40,,400,4.04,true,Welt-4,2019-08-18 19:03:00.0,2019-08-18,19:03:00,2019-08-18 19:00:00.400000000,123456.2345 -------------------------------------------------------------------------------- /data-generator/src/main/resources/test15.csv: -------------------------------------------------------------------------------- 1 | 0|HeadQuarters|0|HQ|0|1 Alameda Way|Alameda|CA|55555|USA||||||||||false|false|false|false|false 2 | 1|Supermarket|28|Store 1|1|2853 Bailey Rd|Acapulco|Guerrero|55555|Mexico|Jones|262-555-5124|262-555-5121|1982-01-09 00:00:00|1990-12-05 00:00:00|23593|17475|3671|2447|false|false|false|false|false 3 | 10|Supermarket|24|Store 10|10|7894 Rotherham Dr|Orizaba|Veracruz|55555|Mexico|Merz|212-555-4774|212-555-4771|1979-04-13 00:00:00|1982-01-30 00:00:00|34791|26354|5062|3375|false|false|true|true|false 4 | 11|Supermarket|22|Store 11|11|5371 Holland Circle|Portland|OR|55555|USA|Erickson|685-555-8995|685-555-8991|1976-09-17 00:00:00|1982-05-15 00:00:00|20319|16232|2452|1635|false|false|false|false|false 5 | 12|Deluxe Supermarket|25|Store 12|12|1120 Westchester Pl|Hidalgo|Zacatecas|55555|Mexico|Kalman|151-555-1702|151-555-1701|1968-03-25 00:00:00|1993-12-18 00:00:00|30584|21938|5188|3458|true|true|true|true|true 6 | 13|Deluxe Supermarket|23|Store 13|13|5179 Valley Ave|Salem|OR|55555|USA|Inmon|977-555-2724|977-555-2721|1957-04-13 00:00:00|1997-11-10 00:00:00|27694|18670|5415|3610|true|true|true|true|true 7 | 14|Small Grocery|1|Store 14|14|4365 Indigo Ct|San Francisco|CA|55555|USA|Strehlo|135-555-4888|135-555-4881|1957-11-24 00:00:00|1958-01-07 00:00:00|22478|15321|4294|2863|true|false|false|false|false 8 | 15|Supermarket|18|Store 15|15|5006 Highland Drive|Seattle|WA|55555|USA|Ollom|893-555-1024|893-555-1021|1969-07-24 00:00:00|1973-10-19 00:00:00|21215|13305|4746|3164|true|false|false|false|false 9 | 16|Supermarket|87|Store 16|16|5922 La Salle Ct|Spokane|WA|55555|USA|Mantle|643-555-3645|643-555-3641|1974-08-23 00:00:00|1977-07-13 00:00:00|30268|22063|4923|3282|false|false|false|false|false 10 | 17|Deluxe Supermarket|84|Store 17|17|490 Risdon Road|Tacoma|WA|55555|USA|Mays|855-555-5581|855-555-5581|1970-05-30 00:00:00|1976-06-23 00:00:00|33858|22123|7041|4694|true|false|true|true|true 11 | 18|Mid-Size Grocery|25|Store 18|18|6764 Glen Road|Hidalgo|Zacatecas|55555|Mexico|Brown|528-555-8317|528-555-8311|1969-06-28 00:00:00|1975-08-30 00:00:00|38382|30351|4819|3213|false|false|false|false|false 12 | 19|Deluxe Supermarket|5|Store 19|19|6644 Sudance Drive|Vancouver|BC|55555|Canada|Ruth|862-555-7395|862-555-7391|1977-03-27 00:00:00|1990-10-25 00:00:00|23112|16418|4016|2678|true|true|true|true|true 13 | 2|Small Grocery|78|Store 2|2|5203 Catanzaro Way|Bellingham|WA|55555|USA|Smith|605-555-8203|605-555-8201|1970-04-02 00:00:00|1973-06-04 00:00:00|28206|22271|3561|2374|true|false|false|false|false 14 | 20|Mid-Size Grocery|6|Store 20|20|3706 Marvelle Ln|Victoria|BC|55555|Canada|Cobb|897-555-1931|897-555-1931|1980-02-06 00:00:00|1987-04-09 00:00:00|34452|27463|4193|2795|true|false|false|false|true 15 | 21|Deluxe Supermarket|106|Store 21|21|4093 Steven Circle|San Andres|DF|55555|Mexico|Jones|493-555-4781|493-555-4781|1986-02-07 00:00:00|1990-04-16 00:00:00|||||true|false|true|true|true 16 | 22|Small Grocery|88|Store 22|22|9606 Julpum Loop|Walla Walla|WA|55555|USA|Byrg|881-555-5117|881-555-5111|1951-01-24 00:00:00|1969-10-17 00:00:00|||||false|false|false|false|false 17 | 23|Mid-Size Grocery|89|Store 23|23|3920 Noah Court|Yakima|WA|55555|USA|Johnson|170-555-8424|170-555-8421|1977-07-16 00:00:00|1987-07-24 00:00:00|||||false|false|false|false|false 18 | 24|Supermarket|7|Store 24|24|2342 Waltham St.|San Diego|CA|55555|USA|Byrd|111-555-0303|111-555-0304|1979-05-22 00:00:00|1986-04-20 00:00:00|||||true|false|true|false|true 19 | 3|Supermarket|76|Store 3|3|1501 Ramsey Circle|Bremerton|WA|55555|USA|Davis|509-555-1596|509-555-1591|1959-06-14 00:00:00|1967-11-19 00:00:00|39696|24390|9184|6122|false|false|true|true|false 20 | 4|Gourmet Supermarket|27|Store 4|4|433 St George Dr|Camacho|Zacatecas|55555|Mexico|Johnson|304-555-1474|304-555-1471|1994-09-27 00:00:00|1995-12-01 00:00:00|23759|16844|4149|2766|true|false|true|true|true 21 | 5|Small Grocery|4|Store 5|5|1250 Coggins Drive|Guadalajara|Jalisco|55555|Mexico|Green|801-555-4324|801-555-4321|1978-09-18 00:00:00|1991-06-29 00:00:00|24597|15012|5751|3834|true|false|false|false|false 22 | 6|Gourmet Supermarket|47|Store 6|6|5495 Mitchell Canyon Road|Beverly Hills|CA|55555|USA|Maris|958-555-5002|958-555-5001|1981-01-03 00:00:00|1991-03-13 00:00:00|23688|15337|5011|3340|true|true|true|true|true 23 | 7|Supermarket|3|Store 7|7|1077 Wharf Drive|Los Angeles|CA|55555|USA|White|477-555-7967|477-555-7961|1971-05-21 00:00:00|1981-10-20 00:00:00|23598|14210|5633|3755|false|false|false|false|true 24 | 8|Deluxe Supermarket|26|Store 8|8|3173 Buena Vista Ave|Merida|Yucatan|55555|Mexico|Williams|797-555-3417|797-555-3411|1958-09-23 00:00:00|1967-11-18 00:00:00|30797|20141|6393|4262|true|true|true|true|true 25 | 9|Mid-Size Grocery|2|Store 9|9|1872 El Pintado Road|Mexico City|DF|55555|Mexico|Stuber|439-555-3524|439-555-3521|1955-03-18 00:00:00|1959-06-07 00:00:00|36509|22450|8435|5624|false|false|false|false|false -------------------------------------------------------------------------------- /data-generator/src/main/resources/test_csv.csv: -------------------------------------------------------------------------------- 1 | sam,true,content 2 | leonard,false,content 3 | leonard1,true,content 4 | leonard2,true,content 5 | sam,true,content 6 | leonard,false,content 7 | 雪尽,true,content 8 | leonard2,true,content 9 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/testdata.avro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/testdata.avro -------------------------------------------------------------------------------- /data-generator/src/main/resources/user.avro/part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/user.avro/part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0 -------------------------------------------------------------------------------- /data-generator/src/main/resources/user.csv: -------------------------------------------------------------------------------- 1 | sam,true,content 2 | leonard,false,content 3 | 雪尽,true,content 4 | leonard2,true,content 5 | 超级帅气的人,false,content_test 6 | 超级帅气的人,false,这是一个很长的中文字符串中文字符串中文字符串 7 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/user19.json: -------------------------------------------------------------------------------- 1 | {"monitorId": 789, "deviceId": "ab;cd", "data": 144.0, "state": 2} 2 | {"monitorId": 788, "deviceId": "a;bcd", "data": 144.0, "state": 2} 3 | -------------------------------------------------------------------------------- /data-generator/src/main/resources/user2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/user2.csv -------------------------------------------------------------------------------- /data-generator/src/main/resources/user3.csv: -------------------------------------------------------------------------------- 1 | 1,click,2019-08-19 19:30:01 2 | 2,exposure,2019-08-19 20:30:02 3 | 3,click,2019-08-19 21:31:03 4 | 4,exposure,2019-08-19 19:31:04 5 | 5,click,2019-08-19 20:31:05 6 | 6,click,2019-08-19 21:32:06 -------------------------------------------------------------------------------- /data-generator/src/main/resources/user4.json: -------------------------------------------------------------------------------- 1 | {"monitorId": 789, "deviceId": "abcd", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:19.600000"} 2 | {"monitorId": 788, "deviceId": "abcd", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:11.600000"} 3 | {"monitorId": 7887, "deviceId": "양현마을", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:11.600000"} -------------------------------------------------------------------------------- /data-generator/src/main/resources/user_part.csv: -------------------------------------------------------------------------------- 1 | sam,true,content,2020-03-01 2 | leonard,false,content,2020-03-02 3 | 雪尽,true,content,2020-03-03 4 | leonard2,true,content,2020-03-01 5 | samd,false,content_test,2020-03-02 -------------------------------------------------------------------------------- /etl-job/src/main/java/Test.java: -------------------------------------------------------------------------------- 1 | import java.text.DateFormat; 2 | import java.text.SimpleDateFormat; 3 | import java.util.Date; 4 | 5 | public class Test { 6 | public static void main(String[] args) { 7 | Long time = System.currentTimeMillis(); 8 | DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); 9 | Date date = new Date(time); 10 | String jsonSchemaDate = dateFormat.format(date); 11 | System.out.println(jsonSchemaDate); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /etl-job/src/main/java/TestGen.java: -------------------------------------------------------------------------------- 1 | // 2 | //public final class WatermarkGenerator$0 3 | // extends org.apache.flink.table.runtime.generated.WatermarkGenerator { 4 | // 5 | // private transient org.apache.flink.table.planner.runtime.utils.JavaUserDefinedScalarFunctions$JavaFunc5 function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58; 6 | // private transient org.apache.flink.table.dataformat.DataFormatConverters.TimestampConverter converter$5; 7 | // 8 | // public WatermarkGenerator$0(Object[] references) throws Exception { 9 | // function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58 = (((org.apache.flink.table.planner.runtime.utils.JavaUserDefinedScalarFunctions$JavaFunc5) references[0])); 10 | // converter$5 = (((org.apache.flink.table.dataformat.DataFormatConverters.TimestampConverter) references[1])); 11 | // } 12 | // 13 | // @Override 14 | // public void open(org.apache.flink.configuration.Configuration parameters) throws Exception { 15 | // 16 | // function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.open(new org.apache.flink.table.functions.FunctionContext(getRuntimeContext())); 17 | // 18 | // } 19 | // 20 | // @Override 21 | // public Long currentWatermark(org.apache.flink.table.dataformat.BaseRow row) throws Exception { 22 | // 23 | // org.apache.flink.table.dataformat.SqlTimestamp field$1; 24 | // boolean isNull$1; 25 | // int field$2; 26 | // boolean isNull$2; 27 | // org.apache.flink.table.dataformat.SqlTimestamp result$3; 28 | // org.apache.flink.table.dataformat.SqlTimestamp result$6; 29 | // boolean isNull$6; 30 | // isNull$1 = row.isNullAt(0); 31 | // field$1 = null; 32 | // if (!isNull$1) { 33 | // field$1 = row.getTimestamp(0, 3); 34 | // } 35 | // isNull$2 = row.isNullAt(1); 36 | // field$2 = -1; 37 | // if (!isNull$2) { 38 | // field$2 = row.getInt(1); 39 | // } 40 | // 41 | // 42 | // 43 | // 44 | // 45 | // java.sql.Timestamp javaResult$4 = (java.sql.Timestamp) function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.eval(isNull$1 ? null : ((org.apache.flink.table.dataformat.SqlTimestamp) field$1), isNull$2 ? null : ((java.lang.Integer) field$2)); 46 | // result$3 = javaResult$4 == null ? null : ((org.apache.flink.table.dataformat.SqlTimestamp) converter$5.toInternal((java.sql.Timestamp) javaResult$4)); 47 | // 48 | // 49 | // isNull$6 = result$3 == null; 50 | // result$6 = null; 51 | // if (!isNull$6) { 52 | // result$6 = result$3; 53 | // } 54 | // 55 | // if (isNull$6) { 56 | // return null; 57 | // } else { 58 | // return result$6.getMillisecond(); 59 | // } 60 | // } 61 | // 62 | // @Override 63 | // public void close() throws Exception { 64 | // 65 | // function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.close(); 66 | // 67 | // } 68 | //} 69 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2es/Kafka2AppendEs.java: -------------------------------------------------------------------------------- 1 | package kafka2es; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.table.api.EnvironmentSettings; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | import org.apache.flink.table.functions.ScalarFunction; 9 | 10 | import java.sql.Timestamp; 11 | 12 | public class Kafka2AppendEs { 13 | private static String csvSourceDDL = "create table csv(" + 14 | " pageId VARCHAR," + 15 | " eventId VARCHAR," + 16 | " recvTime VARCHAR" + 17 | ") with (" + 18 | " 'connector.type' = 'filesystem',\n" + 19 | " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',\n" + 20 | " 'format.type' = 'csv',\n" + 21 | " 'format.fields.0.name' = 'pageId',\n" + 22 | " 'format.fields.0.data-type' = 'STRING',\n" + 23 | " 'format.fields.1.name' = 'eventId',\n" + 24 | " 'format.fields.1.data-type' = 'STRING',\n" + 25 | " 'format.fields.2.name' = 'recvTime',\n" + 26 | " 'format.fields.2.data-type' = 'STRING')"; 27 | private static String sinkDDL = "CREATE TABLE append_test (\n" + 28 | " aggId varchar ,\n" + 29 | " pageId varchar ,\n" + 30 | " ts varchar ,\n" + 31 | " expoCnt int ,\n" + 32 | " clkCnt int\n" + 33 | ") WITH (\n" + 34 | "'connector.type' = 'elasticsearch',\n" + 35 | "'connector.version' = '6',\n" + 36 | "'connector.hosts' = 'http://localhost:9200',\n" + 37 | "'connector.index' = 'append_test7',\n" + 38 | "'connector.document-type' = '_doc',\n" + 39 | "'update-mode' = 'upsert',\n" + 40 | "'connector.key-delimiter' = '$',\n" + 41 | "'connector.key-null-literal' = 'n/a',\n" + 42 | "'connector.bulk-flush.interval' = '1000',\n" + 43 | "'format.type' = 'json'\n" + 44 | ")\n"; 45 | private static String query = "INSERT INTO append_test\n" + 46 | " SELECT pageId,eventId,ts2Date(recvTime) as ts, 1, 1 from csv"; 47 | 48 | 49 | public static void main(String[] args) throws Exception { 50 | System.out.println(csvSourceDDL); 51 | System.out.print(sinkDDL); 52 | System.out.print(query); 53 | 54 | // legacy planner test passed 55 | // testLegacyPlanner(); 56 | 57 | // blink planner test passed 58 | testBlinkPlanner(); 59 | // System.out.println(sinkDDL); 60 | } 61 | 62 | public static void testLegacyPlanner() throws Exception { 63 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 64 | env.setParallelism(1); 65 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 66 | .useOldPlanner() 67 | .inStreamingMode() 68 | .build(); 69 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 70 | tableEnvironment.registerFunction("ts2Date", new ts2Date()); 71 | 72 | tableEnvironment.sqlUpdate(csvSourceDDL); 73 | tableEnvironment.sqlUpdate(sinkDDL); 74 | tableEnvironment.sqlUpdate(query); 75 | 76 | tableEnvironment.execute("Kafka2Es"); 77 | } 78 | 79 | public static void testBlinkPlanner() throws Exception { 80 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 81 | env.setParallelism(2); 82 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 83 | .useBlinkPlanner() 84 | .inStreamingMode() 85 | .build(); 86 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 87 | tableEnvironment.registerFunction("ts2Date", new ts2Date()); 88 | tableEnvironment.sqlUpdate(csvSourceDDL); 89 | tableEnvironment.sqlUpdate(sinkDDL); 90 | tableEnvironment.sqlUpdate(query); 91 | 92 | tableEnvironment.execute("Kafka2Es"); 93 | } 94 | 95 | public static class ts2Date extends ScalarFunction { 96 | public String eval(String timeStr) { 97 | Timestamp t = Timestamp.valueOf(timeStr); 98 | return t.getDate() + " " + t.getHours() + ":" + t.getMinutes(); 99 | } 100 | 101 | public TypeInformation getResultType(Class[] signature) { 102 | return Types.STRING; 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/EventTimeBucketAssigner.java: -------------------------------------------------------------------------------- 1 | package kafka2file; 2 | 3 | import org.apache.flink.core.io.SimpleVersionedSerializer; 4 | import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; 5 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer; 6 | 7 | import com.fasterxml.jackson.databind.JsonNode; 8 | import com.fasterxml.jackson.databind.ObjectMapper; 9 | 10 | import java.text.SimpleDateFormat; 11 | import java.util.Date; 12 | 13 | public class EventTimeBucketAssigner implements BucketAssigner { 14 | private ObjectMapper mapper = new ObjectMapper(); 15 | @Override 16 | public String getBucketId(String element, Context context) { 17 | String partitionValue; 18 | try { 19 | JsonNode node = mapper.readTree(element); 20 | long date = (long) (node.path("order_time").floatValue() * 1000); 21 | partitionValue = new SimpleDateFormat("yyyyMMdd").format(new Date(date)); 22 | } catch (Exception e){ 23 | partitionValue = "00000000"; 24 | } 25 | return "dt=" + partitionValue; 26 | } 27 | 28 | @Override 29 | public SimpleVersionedSerializer getSerializer() { 30 | return SimpleVersionedStringSerializer.INSTANCE; 31 | } 32 | } -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/ReadHiveDataETL.java: -------------------------------------------------------------------------------- 1 | //package kafka2file; 2 | // 3 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | //import org.apache.flink.table.api.EnvironmentSettings; 5 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | //import org.apache.flink.table.catalog.hive.HiveCatalog; 7 | //import org.apache.flink.types.Row; 8 | // 9 | //public class ReadHiveDataETL { 10 | // public static void main(String[] args) throws Exception{ 11 | // StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); 12 | // EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance() 13 | // .useBlinkPlanner() 14 | // .inStreamingMode() 15 | // .build(); 16 | // executionEnvironment.setParallelism(1); 17 | // StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings); 18 | // testReadHive(tableEnvironment); 19 | // } 20 | // 21 | // private static void testReadHive(StreamTableEnvironment tableEnvironment) throws Exception { 22 | // HiveCatalog hiveCatalog = new HiveCatalog("myhive", "default", "/Users/bang/hive-3.1.2/conf", "3.1.2"); 23 | // tableEnvironment.registerCatalog("myhive", hiveCatalog); 24 | // tableEnvironment.useCatalog("myhive"); 25 | // tableEnvironment.useDatabase("default"); 26 | // tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select * from user_info"), Row.class).print(); 27 | // tableEnvironment.execute("readHive"); 28 | // } 29 | // 30 | //} 31 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/StreamETLKafka2Hdfs.java: -------------------------------------------------------------------------------- 1 | //package kafka2file; 2 | // 3 | //import org.apache.flink.api.common.serialization.SimpleStringEncoder; 4 | //import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | //import org.apache.flink.core.fs.Path; 6 | //import org.apache.flink.runtime.state.StateBackend; 7 | //import org.apache.flink.runtime.state.filesystem.FsStateBackend; 8 | //import org.apache.flink.streaming.api.datastream.DataStream; 9 | //import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | //import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 12 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy; 13 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; 14 | // 15 | //import java.util.Properties; 16 | // 17 | //public class StreamETLKafka2Hdfs { 18 | // public static void main(String[] args) throws Exception { 19 | // StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | // env.setParallelism(1); 21 | // //checkpoint 22 | // env.enableCheckpointing(60_000); 23 | // env.setStateBackend((StateBackend) new FsStateBackend("file:///tmp/flink/checkpoints")); 24 | // env.getCheckpointConfig().enableExternalizedCheckpoints( 25 | // CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION); 26 | // 27 | // //source 28 | // Properties props = new Properties(); 29 | // props.setProperty("bootstrap.servers", "localhost:9092"); 30 | // FlinkKafkaConsumer010 consumer = new FlinkKafkaConsumer010<>( 31 | // "flink_orders", new SimpleStringSchema(), props); 32 | // 33 | // //transformation 34 | // DataStream stream = env.addSource(consumer) 35 | // .map(r -> r); 36 | // 37 | // //sink 38 | // StreamingFileSink sink = StreamingFileSink 39 | // .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder()) 40 | // .withRollingPolicy(DefaultRollingPolicy.create().build()) 41 | // .withBucketAssigner(new EventTimeBucketAssigner()) 42 | // .build(); 43 | // stream.addSink(sink); 44 | // 45 | // env.execute(); 46 | // } 47 | //} 48 | // 49 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/TestCsv2Csv.java: -------------------------------------------------------------------------------- 1 | package kafka2file; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestCsv2Csv { 9 | public static void main(String[] args) throws Exception { 10 | EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance() 11 | .useBlinkPlanner() 12 | .inStreamingMode() 13 | .build(); 14 | StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | executionEnvironment.setParallelism(1); 16 | 17 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings); 18 | 19 | String csvSourceDDL = "create table csv(" + 20 | " id INT," + 21 | " note STRING," + 22 | " country STRING," + 23 | " record_time TIMESTAMP(4)," + 24 | " doub_val DECIMAL(6, 2)" + 25 | ") with (" + 26 | " 'connector.type' = 'filesystem',\n" + 27 | " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test.csv',\n" + 28 | " 'format.type' = 'csv'" + 29 | ")"; 30 | String csvSink = "create table csvSink(" + 31 | " jnlno STRING,\n" + 32 | " taskid char(4),\n" + 33 | " hit VARCHAR " + 34 | ") with (" + 35 | " 'connector.type' = 'filesystem',\n" + 36 | " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test12312.csv',\n" + 37 | " 'format.type' = 'csv'" + 38 | ")"; 39 | tableEnvironment.sqlUpdate(csvSourceDDL); 40 | tableEnvironment.sqlUpdate(csvSink); 41 | tableEnvironment.sqlUpdate("insert into csvSink select a.country,'111111qeq','false' from csv a"); 42 | System.out.println(csvSourceDDL); 43 | System.out.println(csvSink); 44 | System.out.println("insert into csvSink select a.country,'111111qeq','false' from csv a"); 45 | 46 | // 47 | // tableEnvironment.toAppendStream( 48 | // tableEnvironment.sqlQuery("insert into target select a.country,'111111qeq','false' from csv a"), 49 | // Row.class).print(); 50 | tableEnvironment.execute("csvTest"); 51 | 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/TestCsv2Csv1.java: -------------------------------------------------------------------------------- 1 | package kafka2file; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestCsv2Csv1 { 9 | public static void main(String[] args) throws Exception { 10 | EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance() 11 | .useBlinkPlanner() 12 | .inStreamingMode() 13 | .build(); 14 | StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | executionEnvironment.setParallelism(1); 16 | 17 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings); 18 | 19 | String csvSourceDDL = "create table csv(" + 20 | "rowkey INT,\n" + 21 | "f1c1 INT,\n" + 22 | "f2c1 STRING,\n" + 23 | "f2c2 BIGINT,\n" + 24 | "f3c1 DOUBLE,\n" + 25 | "f3c2 BOOLEAN,\n" + 26 | "f3c3 STRING,\n" + 27 | "f4c1 TIMESTAMP(3),\n" + 28 | "f4c2 DATE,\n" + 29 | "f4c3 TIME(3),\n" + 30 | "f5c1 TIMESTAMP(4),\n" + 31 | "f5c2 DECIMAL(10, 4)" + 32 | ") with (" + 33 | " 'connector.type' = 'filesystem',\n" + 34 | " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test1.csv',\n" + 35 | " 'format.type' = 'csv'" + 36 | ")"; 37 | 38 | tableEnvironment.sqlUpdate(csvSourceDDL); 39 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select f5c1, f5c2 from csv"), Row.class).print(); 40 | 41 | executionEnvironment.execute("csvTest"); 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/TestCsvError.java: -------------------------------------------------------------------------------- 1 | package kafka2file; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestCsvError { 9 | public static void main(String[] args) throws Exception { 10 | EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance() 11 | .useBlinkPlanner() 12 | .inStreamingMode() 13 | .build(); 14 | StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | executionEnvironment.setParallelism(1); 16 | 17 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings); 18 | 19 | String csvSourceDDL = "CREATE TABLE `src` (\n" + 20 | "key bigint,\n" + 21 | "v varchar\n" + 22 | ") WITH (\n" + 23 | "'connector'='filesystem',\n" + 24 | "'csv.field-delimiter'='|',\n" + 25 | "'path'='file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/src.csv',\n" + 26 | "'csv.null-literal'='',\n" + 27 | "'format'='csv'\n" + 28 | ")"; 29 | String csvSinkDDL = "CREATE TABLE `sink` (\n" + 30 | "c1 decimal(10, 2),\n" + 31 | "c2 varchar,\n" + 32 | "c3 varchar" + 33 | ") WITH (\n" + 34 | "'connector'='filesystem',\n" + 35 | "'csv.field-delimiter'='|',\n" + 36 | "'path'='/Users/bang/sink.csv',\n" + 37 | "'csv.null-literal'='',\n" + 38 | "'format'='csv'\n" + 39 | ")"; 40 | 41 | tableEnvironment.sqlUpdate(csvSourceDDL); 42 | tableEnvironment.executeSql(csvSinkDDL); 43 | // tableEnvironment.executeSql("insert into sink select\n" + 44 | // " cast(key as decimal(10,2)) as c1,\n" + 45 | // " cast(key as char(10)) as c2,\n" + 46 | // " cast(key as varchar(10)) as c3\n" + 47 | // " from src\n").collect(); 48 | 49 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select\n" + 50 | " cast(key as decimal(10,2)) as c1,\n" + 51 | " cast(key as char(10)) as c2,\n" + 52 | " cast(key as varchar(10)) as c3\n" + 53 | " from src\n"), Row.class).print(); 54 | executionEnvironment.execute("csvTest"); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/TestFileSink.scala: -------------------------------------------------------------------------------- 1 | //package kafka2file 2 | // 3 | //import java.util.Properties 4 | // 5 | //import org.apache.flink.api.common.serialization.SimpleStringEncoder 6 | //import org.apache.flink.core.fs.Path 7 | //import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink 8 | //import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner 9 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy 10 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 11 | // 12 | ////import org.apache.flink.api.common.functions.MapFunction 13 | ////import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema} 14 | ////import org.apache.flink.core.fs.Path 15 | ////import org.apache.flink.runtime.state.StateBackend 16 | ////import org.apache.flink.runtime.state.filesystem.FsStateBackend 17 | ////import org.apache.flink.streaming.api.environment.{CheckpointConfig, StreamExecutionEnvironment} 18 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink 19 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner 20 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy 21 | ////import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 22 | // 23 | //object TestFileSink { 24 | // def main(args: Array[String]): Unit = { 25 | // val env = StreamExecutionEnvironment.getExecutionEnvironment 26 | // env.setParallelism(1) 27 | // //checkpoint 28 | // //source 29 | // val props = new Properties 30 | // props.setProperty("bootstrap.servers", "localhost:9092") 31 | //// val consumer = new FlinkKafkaConsumer010[String]("flink_orders", new SimpleStringSchema, props) 32 | // 33 | // //transformation 34 | //// val stream = env.addSource(consumer).map() 35 | // 36 | // // //sink 37 | // // Encoder myEncoder = new SimpleStringEncoder<>(); 38 | // // BucketAssigner myBucketAssigner = new EventTimeBucketAssigner(); 39 | // // BucketAssigner myBucketAssigner = new DateTimeBucketAssigner(); 40 | // 41 | // val sink = StreamingFileSink 42 | // .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder[String]) 43 | // .withRollingPolicy(DefaultRollingPolicy.builder().build()) 44 | // .withBucketAssigner(new DateTimeBucketAssigner[String, String]) 45 | // .build() 46 | // 47 | // val sink1 = StreamingFileSink 48 | // .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder) 49 | // .withRollingPolicy(DefaultRollingPolicy.builder().build()) 50 | // .withBucketAssigner(new DateTimeBucketAssigner) 51 | // .build() 52 | // 53 | //// stream.addSink(sink) 54 | // 55 | // env.execute 56 | // } 57 | //} 58 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2file/Write2Kafka.java: -------------------------------------------------------------------------------- 1 | package kafka2file; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | 7 | public class Write2Kafka { 8 | public static void main(String[] args) throws Exception { 9 | EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance() 10 | .useBlinkPlanner() 11 | .inStreamingMode() 12 | .build(); 13 | StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); 14 | executionEnvironment.setParallelism(1); 15 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings); 16 | constructKafkaData(tableEnvironment); 17 | 18 | } 19 | 20 | private static void constructKafkaData(StreamTableEnvironment tableEnvironment) throws Exception { 21 | String csvSourceDDL = "create table csv( " + 22 | "user_name VARCHAR, " + 23 | "is_new BOOLEAN, " + 24 | "content VARCHAR, " + 25 | "date_col VARCHAR) with ( " + 26 | " 'connector.type' = 'filesystem',\n" + 27 | " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user_part.csv',\n" + 28 | " 'format.type' = 'csv',\n" + 29 | " 'format.fields.0.name' = 'user_name',\n" + 30 | " 'format.fields.0.data-type' = 'STRING',\n" + 31 | " 'format.fields.1.name' = 'is_new',\n" + 32 | " 'format.fields.1.data-type' = 'BOOLEAN',\n" + 33 | " 'format.fields.2.name' = 'content',\n" + 34 | " 'format.fields.2.data-type' = 'STRING',\n" + 35 | " 'format.fields.3.name' = 'date_col',\n" + 36 | " 'format.fields.3.data-type' = 'STRING')"; 37 | tableEnvironment.sqlUpdate(csvSourceDDL); 38 | 39 | String sinkTableDDL = "CREATE TABLE csvData (\n" + 40 | " user_name STRING,\n" + 41 | " is_new BOOLEAN,\n" + 42 | " content STRING,\n" + 43 | " date_col STRING" + 44 | ") WITH (\n" + 45 | " 'connector.type' = 'kafka',\n" + 46 | " 'connector.version' = '0.10',\n" + 47 | " 'connector.topic' = 'csv_data',\n" + 48 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 49 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 50 | " 'connector.properties.group.id' = 'testGroup3',\n" + 51 | " 'connector.startup-mode' = 'earliest-offset',\n" + 52 | " 'format.type' = 'csv')"; 53 | tableEnvironment.sqlUpdate(sinkTableDDL); 54 | 55 | String querySql = "insert into csvData \n" + 56 | "select user_name, is_new, content, date_col from\n" + 57 | "csv"; 58 | tableEnvironment.sqlUpdate(querySql); 59 | tableEnvironment.execute("flinkFileCsv2KafkaCsv"); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.java: -------------------------------------------------------------------------------- 1 | package kafka2hbase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | import constants.FlinkSqlConstants; 9 | 10 | public class UnboundedKafkaJoinHbase2Hbase { 11 | public static void main(String[] args) throws Exception { 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | env.setParallelism(1); 14 | 15 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 16 | .useBlinkPlanner() 17 | .inStreamingMode() 18 | .build(); 19 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 20 | testJoinDDLHbaseWithFunction(env, tableEnvironment); 21 | } 22 | 23 | 24 | private static void testJoinDDLHbaseWithFunction(StreamExecutionEnvironment env, StreamTableEnvironment tableEnvironment) throws Exception { 25 | tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL11); 26 | tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL11); 27 | tableEnvironment.sqlUpdate(FlinkSqlConstants.hbaseCountryDDLWithPrecison11); 28 | 29 | String sinkTableDDL = "CREATE TABLE gmv (\n" + 30 | " rowkey VARCHAR,\n" + 31 | " f1 ROW," + 32 | " f2 ROW" + 33 | ") WITH (\n" + 34 | " 'connector' = 'hbase-1.4',\n" + 35 | " 'table-name' = 'gmv',\n" + 36 | " 'zookeeper.quorum' = 'localhost:2182',\n" + 37 | " 'zookeeper.znode.parent' = '/hbase',\n" + 38 | " 'sink.buffer-flush.max-size' = '10mb', \n" + 39 | " 'sink.buffer-flush.max-rows' = '1000', \n" + 40 | " 'sink.buffer-flush.interval' = '2s' " + 41 | ")"; 42 | tableEnvironment.sqlUpdate(sinkTableDDL); 43 | 44 | //test lookup 45 | String querySQL = 46 | " select rowkey, ROW(max(ts), max(item), max(country_name)) as f1, max(gdp), max(record_timestamp3)\n" + 47 | " from (" + 48 | "select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey,\n" + 49 | " cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name," + 50 | "co.gdp as gdp, co.record_timestamp3 as record_timestamp3\n" + 51 | " from orders as o \n" + 52 | " left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" + 53 | " on o.currency = c.currency_name\n" + 54 | " left outer join country FOR SYSTEM_TIME AS OF o.proc_time co\n" + 55 | " on c.country = co.rowkey \n" + 56 | ") a group by rowkey\n" ; 57 | 58 | tableEnvironment.toRetractStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 59 | env.execute(); 60 | 61 | 62 | // test source 63 | // tableEnvironment.toRetractStream(tableEnvironment.sqlQuery("select * from (select rowkey, f1.country_name,f1.country_name_cn, f2.record_timestamp3,f2.record_timestamp9, f2.gdp from country) a "), Row.class) 64 | // .print(); 65 | // env.execute(); 66 | 67 | // tableEnvironment.execute("KafkaJoinHbase2Hbase"); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2jdbc/KafkaJoinJdbc2JdbcProc.java: -------------------------------------------------------------------------------- 1 | package kafka2jdbc; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | 7 | import constants.FlinkSqlConstants; 8 | 9 | public class KafkaJoinJdbc2JdbcProc { 10 | public static void main(String[] args) throws Exception { 11 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 12 | env.setParallelism(1); 13 | 14 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 15 | .useBlinkPlanner() 16 | .inStreamingMode() 17 | .build(); 18 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 19 | 20 | tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL); 21 | tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL); 22 | 23 | String sinkTableDDL = "CREATE TABLE gmv (\n" + 24 | " log_per_min STRING,\n" + 25 | " item STRING,\n" + 26 | " order_cnt BIGINT,\n" + 27 | " currency_time TIMESTAMP(3),\n" + 28 | " gmv DECIMAL(38, 18)," + 29 | " timestamp9 TIMESTAMP(3),\n" + 30 | " time9 TIME(3),\n" + 31 | " gdp DECIMAL(38, 18)\n" + 32 | ") WITH (\n" + 33 | " 'connector.type' = 'jdbc',\n" + 34 | " 'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" + 35 | " 'connector.username' = 'root'," + 36 | " 'connector.table' = 'gmv',\n" + 37 | " 'connector.driver' = 'com.mysql.jdbc.Driver',\n" + 38 | " 'connector.write.flush.max-rows' = '5000', \n" + 39 | " 'connector.write.flush.interval' = '2s', \n" + 40 | " 'connector.write.max-retries' = '3'" + 41 | ")"; 42 | tableEnvironment.sqlUpdate(sinkTableDDL); 43 | 44 | String querySQL = "insert into gmv \n" + 45 | "select cast(TUMBLE_END(o.proc_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts,\n" + 46 | " o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 4)) as gmv,\n" + 47 | " c.timestamp9, c.time9, c.gdp\n" + 48 | "from orders as o \n" + 49 | "join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" + 50 | "on o.currency = c.currency_name\n" + 51 | "group by o.item, c.currency_time, c.rate, c.timestamp9, c.time9, c.gdp, TUMBLE(o.proc_time, INTERVAL '10' SECOND)\n" ; 52 | tableEnvironment.sqlUpdate(querySQL); 53 | 54 | tableEnvironment.execute("KafkaJoinJdbc2Jdbc"); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2jdbc/TestJdbc.java: -------------------------------------------------------------------------------- 1 | package kafka2jdbc; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestJdbc { 9 | public static void main(String[] args) throws Exception { 10 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 11 | env.setParallelism(1); 12 | 13 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 14 | .useBlinkPlanner() 15 | .inStreamingMode() 16 | .build(); 17 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 18 | String mysqlCurrencyDDL = "CREATE TABLE currency (\n" + 19 | " currency_id BIGINT,\n" + 20 | " currency_name STRING,\n" + 21 | " rate DOUBLE,\n" + 22 | " currency_time TIMESTAMP(3),\n" + 23 | " country STRING,\n" + 24 | " timestamp9 TIMESTAMP(6),\n" + 25 | " time9 TIME(3),\n" + 26 | " gdp DECIMAL(10, 6)\n" + 27 | ") WITH (\n" + 28 | " 'connector' = 'jdbc',\n" + 29 | " 'url' = 'jdbc:mysql://localhost:3306/test',\n" + 30 | " 'username' = 'root'," + 31 | " 'password' = ''," + 32 | " 'table-name' = 'currency',\n" + 33 | " 'driver' = 'com.mysql.jdbc.Driver',\n" + 34 | " 'lookup.cache.max-rows' = '500', \n" + 35 | " 'lookup.cache.ttl' = '10s',\n" + 36 | " 'lookup.max-retries' = '3'" + 37 | ")"; 38 | System.out.println(mysqlCurrencyDDL); 39 | 40 | tableEnvironment.sqlUpdate(mysqlCurrencyDDL); 41 | 42 | 43 | String querySQL = "select * from currency" ; 44 | 45 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 46 | env.execute(); 47 | // tableEnvironment.execute("KafkaJoinJdbc2Jdbc"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2jdbc/UnboundedKafkaJoinJdbc2Jdbc.java: -------------------------------------------------------------------------------- 1 | package kafka2jdbc; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.table.api.EnvironmentSettings; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | import org.apache.flink.table.functions.ScalarFunction; 9 | import org.apache.flink.types.Row; 10 | 11 | import constants.FlinkSqlConstants; 12 | 13 | public class UnboundedKafkaJoinJdbc2Jdbc { 14 | public static void main(String[] args) throws Exception { 15 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 16 | env.setParallelism(1); 17 | 18 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 19 | .useBlinkPlanner() 20 | .inStreamingMode() 21 | .build(); 22 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 23 | 24 | tableEnvironment.registerFunction("add_one_fun", new AddOneFunc()); 25 | 26 | tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL); 27 | tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL); 28 | 29 | String sinkTableDDL = "CREATE TABLE gmv (\n" + 30 | " log_per_min STRING,\n" + 31 | " item STRING,\n" + 32 | " order_cnt BIGINT,\n" + 33 | " currency_time TIMESTAMP(3),\n" + 34 | " gmv DECIMAL(38, 18)," + 35 | " timestamp9 TIMESTAMP(3),\n" + 36 | " time9 TIME(3),\n" + 37 | " gdp DECIMAL(38, 18)\n" + 38 | ") WITH (\n" + 39 | " 'connector.type' = 'jdbc',\n" + 40 | " 'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" + 41 | " 'connector.username' = 'root'," + 42 | " 'connector.table' = 'gmv',\n" + 43 | " 'connector.driver' = 'com.mysql.jdbc.Driver',\n" + 44 | " 'connector.write.flush.max-rows' = '5000', \n" + 45 | " 'connector.write.flush.interval' = '2s', \n" + 46 | " 'connector.write.max-retries' = '3'" + 47 | ")"; 48 | tableEnvironment.sqlUpdate(sinkTableDDL); 49 | 50 | String querySQL = "insert into gmv \n" + 51 | "select max(log_ts),\n" + 52 | " item, COUNT(order_id) as order_cnt, max(currency_time), cast(sum(amount_kg) * max(rate) as DOUBLE) as gmv,\n" + 53 | " max(timestamp9), max(time9), max(gdp) \n" + 54 | " from ( \n" + 55 | " select cast(o.ts as VARCHAR) as log_ts, o.item as item, o.order_id as order_id, c.currency_time as currency_time,\n" + 56 | " o.amount_kg as amount_kg, c.rate as rate, c.timestamp9 as timestamp9, c.time9 as time9, c.gdp as gdp \n" + 57 | " from orders as o \n" + 58 | " join currency FOR SYSTEM_TIME AS OF o.proc_time c \n" + 59 | " on o.currency = c.currency_name \n" + 60 | " ) a group by item\n" ; 61 | 62 | System.out.println(FlinkSqlConstants.ordersTableDDL); 63 | System.out.println(FlinkSqlConstants.mysqlCurrencyDDL); 64 | System.out.println(sinkTableDDL); 65 | System.out.println(querySQL); 66 | // tableEnvironment.toRetractStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 67 | tableEnvironment.sqlUpdate(querySQL); 68 | tableEnvironment.execute("KafkaJoinJdbc2Jdbc"); 69 | } 70 | 71 | public static class AddOneFunc extends ScalarFunction { 72 | public Long eval(long t) { 73 | return t + 1; 74 | } 75 | 76 | public TypeInformation getResultType(Class[] signature) { 77 | return Types.LONG; 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2jdbc/testNonExistedTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package kafka2jdbc; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.streaming.api.transformations.ShuffleMode; 23 | import org.apache.flink.table.api.EnvironmentSettings; 24 | import org.apache.flink.table.api.TableEnvironment; 25 | import org.apache.flink.table.api.config.ExecutionConfigOptions; 26 | import org.apache.flink.table.api.config.OptimizerConfigOptions; 27 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 28 | 29 | public class testNonExistedTable { 30 | public static void main(String[] args) throws Exception { 31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | env.setParallelism(4); 33 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 34 | .useBlinkPlanner() 35 | .inStreamingMode() 36 | .build(); 37 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 38 | 39 | String csvSourceDDL = "create table csv(" + 40 | " id INT," + 41 | " note VARCHAR," + 42 | " country VARCHAR," + 43 | " record_time TIMESTAMP(3)," + 44 | " doub_val DECIMAL(6, 2)," + 45 | " date_val DATE," + 46 | " time_val TIME" + 47 | ") with (" + 48 | " 'connector.type' = 'filesystem',\n" + 49 | " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test_nonexistedTable.csv',\n" + 50 | " 'format.type' = 'csv'" + 51 | ")"; 52 | String mysqlSinkDDL = "CREATE TABLE nonExisted (\n" + 53 | " c0 BOOLEAN," + 54 | " c1 INTEGER," + 55 | " c2 BIGINT," + 56 | " c3 FLOAT," + 57 | " c4 DOUBLE," + 58 | " c5 DECIMAL(38, 18)," + 59 | " c6 VARCHAR," + 60 | " c7 DATE," + 61 | " c8 TIME," + 62 | " c9 TIMESTAMP(3)" + 63 | ") WITH (\n" + 64 | " 'connector.type' = 'jdbc',\n" + 65 | " 'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" + 66 | " 'connector.username' = 'root'," + 67 | " 'connector.table' = 'nonExisted3',\n" + 68 | " 'connector.driver' = 'com.mysql.jdbc.Driver',\n" + 69 | " 'connector.write.auto-create-table' = 'true' " + 70 | ")"; 71 | String query = "insert into nonExisted " + 72 | "select max(c0),c1,c2,c3,c4,max(c5),max(c6),max(c7),max(c8),max(c9) from " + 73 | " (select true as c0, id as c1, cast(id as bigint) as c2,cast(doub_val as float)as c3,cast(doub_val as double) as c4," + 74 | " doub_val as c5, country as c6, date_val as c7, time_val as c8, record_time as c9 from csv)" + 75 | " a group by c1, c2, c3, c4"; 76 | // String query = "insert into nonExisted select true as c0, id as c1, cast(id as bigint) as c2,cast(doub_val as float)as c3,cast(doub_val as double) as c4," + 77 | // " doub_val as c5, country as c6, date_val as c7, time_val as c8, record_time as c9 from csv"; 78 | tableEnvironment.sqlUpdate(csvSourceDDL); 79 | tableEnvironment.sqlUpdate(mysqlSinkDDL); 80 | tableEnvironment.sqlUpdate(query); 81 | tableEnvironment.execute("csvTest"); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2kafka/ConsumeConfluentAvroTest.java: -------------------------------------------------------------------------------- 1 | package kafka2kafka; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | import io.confluent.kafka.serializers.KafkaAvroSerializer; 9 | import kafka.UserAvro; 10 | import org.apache.kafka.clients.producer.KafkaProducer; 11 | import org.apache.kafka.clients.producer.ProducerConfig; 12 | import org.apache.kafka.clients.producer.ProducerRecord; 13 | import org.apache.kafka.common.serialization.StringSerializer; 14 | 15 | import java.io.IOException; 16 | import java.util.Properties; 17 | import java.util.Random; 18 | import java.util.stream.IntStream; 19 | 20 | public class ConsumeConfluentAvroTest { 21 | 22 | public static void main(String[] args) throws Exception { 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | env.setParallelism(1); 25 | 26 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 27 | .useBlinkPlanner() 28 | .inStreamingMode() 29 | .build(); 30 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 31 | 32 | String tableDDL = "CREATE TABLE WikipediaFeed (\n" + 33 | " user_name STRING,\n" + 34 | " is_new BOOLEAN,\n" + 35 | " content STRING" + 36 | ") WITH (\n" + 37 | " 'connector.type' = 'kafka',\n" + 38 | " 'connector.version' = '0.10',\n" + 39 | " 'connector.topic' = 'WikipediaFeed',\n" + 40 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 41 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 42 | " 'connector.properties.group.id' = 'testGroup3',\n" + 43 | " 'connector.startup-mode' = 'earliest-offset',\n" + 44 | " 'format.type' = 'avro',\n" + 45 | " 'format.avro-schema' =\n" + 46 | " '{ \n" + 47 | " \"type\": \"record\",\n" + 48 | " \"name\": \"UserAvro\",\n" + 49 | " \"fields\": [\n" + 50 | " {\"name\": \"user_name\", \"type\": \"string\"},\n" + 51 | " {\"name\": \"is_new\", \"type\": \"boolean\"},\n" + 52 | " {\"name\": \"content\", \"type\": \"string\"}\n" + 53 | " ]\n" + 54 | " }'" + 55 | ")\n"; 56 | tableEnvironment.sqlUpdate(tableDDL); 57 | 58 | String querySQL = "select user_name, is_new, content \n" + 59 | "from WikipediaFeed\n" ; 60 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 61 | tableEnvironment.execute("KafkaAvro2Kafka"); 62 | } 63 | 64 | // prepare confluent avro foramt data 65 | private static void produceInputs() throws IOException { 66 | final String[] users = {"leonard", "bob", "joe", "damian", "tania", "phil", "sam", "lauren", "joseph"}; 67 | final Properties props = new Properties(); 68 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); 69 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); 70 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class); 71 | props.put("schema.registry.url", "http://localhost:8081"); 72 | final KafkaProducer producer = new KafkaProducer<>(props); 73 | final Random random = new Random(); 74 | 75 | IntStream.range(0, 10) 76 | .mapToObj(value -> new UserAvro(users[random.nextInt(users.length)], true, "content")) 77 | .forEach( 78 | record -> { 79 | System.out.println(record.toString()) ; 80 | producer.send(new ProducerRecord<>("WikipediaFeed", record.getUserName(), record)); 81 | }); 82 | 83 | producer.flush(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2kafka/KafkaJoinJdbc2Kafka.java: -------------------------------------------------------------------------------- 1 | package kafka2kafka; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.EnvironmentSettings; 6 | import org.apache.flink.table.api.Types; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | import org.apache.flink.table.functions.ScalarFunction; 9 | import org.apache.flink.types.Row; 10 | 11 | import constants.FlinkSqlConstants; 12 | 13 | import java.math.BigDecimal; 14 | 15 | public class KafkaJoinJdbc2Kafka { 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | env.setParallelism(1); 19 | 20 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 21 | .useBlinkPlanner() 22 | .inStreamingMode() 23 | .build(); 24 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 25 | 26 | tableEnvironment.registerFunction("func", new Func()); 27 | tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL); 28 | tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL); 29 | 30 | String sinkTableDDL = "CREATE TABLE gmv (\n" + 31 | " log_per_min STRING,\n" + 32 | " item STRING,\n" + 33 | " order_cnt BIGINT,\n" + 34 | " currency_time TIMESTAMP(3),\n" + 35 | " gmv DECIMAL(38, 18)" + 36 | ") WITH (\n" + 37 | " 'connector.type' = 'kafka',\n" + 38 | " 'connector.version' = '0.10',\n" + 39 | " 'connector.topic' = 'gmv',\n" + 40 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 41 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 42 | " 'format.type' = 'json',\n" + 43 | " 'format.derive-schema' = 'true'\n" + 44 | ")"; 45 | tableEnvironment.sqlUpdate(sinkTableDDL); 46 | 47 | String querySQL = 48 | "insert into gmv \n" + 49 | "select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_per_min,\n" + 50 | " o.item, COUNT(o.order_id) as order_cnt, c.currency_time, " + 51 | " cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18)) as gmv \n" + 52 | " from orders as o \n" + 53 | " join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" + 54 | " on o.currency = c.currency_name\n" + 55 | " group by o.item, c.currency_time,c.rate,TUMBLE(o.order_time, INTERVAL '10' SECOND)\n"; 56 | 57 | tableEnvironment.sqlUpdate(querySQL); 58 | System.out.println(FlinkSqlConstants.ordersTableDDL); 59 | System.out.println(FlinkSqlConstants.mysqlCurrencyDDL); 60 | System.out.println(sinkTableDDL); 61 | System.out.println(querySQL); 62 | 63 | tableEnvironment.execute("KafkaJoinJdbc2Kafka.sql"); 64 | } 65 | 66 | public static class Func extends ScalarFunction { 67 | public BigDecimal eval(BigDecimal amount) { 68 | return amount.multiply(new BigDecimal("100.0")); 69 | } 70 | 71 | @Override 72 | public TypeInformation getResultType(Class[] signature) { 73 | return Types.DECIMAL(); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2kafka/KafkaJoinKafka2Kafka.java: -------------------------------------------------------------------------------- 1 | package kafka2kafka; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.EnvironmentSettings; 6 | import org.apache.flink.table.api.Types; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | import org.apache.flink.table.functions.ScalarFunction; 9 | import org.apache.flink.types.Row; 10 | 11 | import constants.FlinkSqlConstants; 12 | 13 | import java.math.BigDecimal; 14 | 15 | public class KafkaJoinKafka2Kafka { 16 | public static String ordersTableDDL = "CREATE TABLE orders (\n" + 17 | " order_id STRING,\n" + 18 | " item STRING,\n" + 19 | " currency STRING,\n" + 20 | " amount INT,\n" + 21 | " order_time TIMESTAMP(3),\n" + 22 | " proc_time as PROCTIME()" + 23 | // ", WATERMARK FOR order_time AS order_time\n" + 24 | ") WITH (\n" + 25 | " 'connector' = 'kafka-0.10',\n" + 26 | " 'topic' = 'flink_orders',\n" + 27 | " 'properties.zookeeper.connect' = 'localhost:2181',\n" + 28 | " 'properties.bootstrap.servers' = 'localhost:9092',\n" + 29 | " 'properties.group.id' = 'testGroup',\n" + 30 | " 'scan.startup.mode' = 'earliest-offset',\n" + 31 | " 'format' = 'json'\n" + 32 | ")\n"; 33 | 34 | public static final String currencyTableDDL = "CREATE TABLE currency (\n" + 35 | " country STRING,\n" + 36 | " currency STRING,\n" + 37 | " rate INT,\n" + 38 | " rowtime TIMESTAMP(3)" + 39 | // ",WATERMARK FOR currency_time AS currency_time\n" + 40 | ") WITH (\n" + 41 | " 'connector' = 'kafka-0.10',\n" + 42 | " 'topic' = 'flink_currency',\n" + 43 | " 'properties.zookeeper.connect' = 'localhost:2181',\n" + 44 | " 'properties.bootstrap.servers' = 'localhost:9092',\n" + 45 | " 'properties.group.id' = 'testGroup',\n" + 46 | " 'scan.startup.mode' = 'earliest-offset',\n" + 47 | " 'format' = 'json'\n" + 48 | ")"; 49 | 50 | public static void main(String[] args) throws Exception { 51 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 52 | env.setParallelism(1); 53 | 54 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 55 | .useBlinkPlanner() 56 | .inStreamingMode() 57 | .build(); 58 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 59 | 60 | tableEnvironment.executeSql(ordersTableDDL); 61 | tableEnvironment.executeSql(currencyTableDDL); 62 | 63 | String querySQL = 64 | " select * \n" + 65 | " from orders as o \n" + 66 | " join currency c\n" + 67 | " on o.currency = c.currency\n"; 68 | String querySQL2 = "SELECT *\n" + 69 | "FROM currency AS r\n" + 70 | "WHERE r.rowtime = (\n" + 71 | " SELECT MAX(rowtime)\n" + 72 | " FROM currency AS r2\n" + 73 | " WHERE r2.currency = r.currency\n" + 74 | " AND r2.rowtime <= '10:58:00')"; 75 | 76 | System.out.println(tableEnvironment.sqlQuery(querySQL2).explain()); 77 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL2), Row.class).print(); 78 | env.execute(); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /etl-job/src/main/java/kafka2kafka/KafkaJson2Kafka.java: -------------------------------------------------------------------------------- 1 | package kafka2kafka; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class KafkaJson2Kafka { 9 | 10 | public static void main(String[] args) throws Exception { 11 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 12 | env.setParallelism(1); 13 | 14 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 15 | .useBlinkPlanner() 16 | .inStreamingMode() 17 | .build(); 18 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 19 | 20 | String sourceTableDDL = "CREATE TABLE orders (\n" + 21 | " order_id STRING,\n" + 22 | " item STRING,\n" + 23 | " currency STRING,\n" + 24 | " amount DOUBLE,\n" + 25 | " order_time TIMESTAMP(3),\n" + 26 | " proc_time as PROCTIME(),\n" + 27 | " amount_kg as amount * 1000,\n" + 28 | " ts as order_time + INTERVAL '1' SECOND,\n" + 29 | " WATERMARK FOR order_time AS order_time" + 30 | ") WITH (\n" + 31 | " 'connector.type' = 'kafka',\n" + 32 | " 'connector.version' = '0.10',\n" + 33 | " 'connector.topic' = 'flink_orders',\n" + 34 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 35 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 36 | " 'connector.properties.group.id' = 'testGroup3',\n" + 37 | " 'connector.startup-mode' = 'earliest-offset',\n" + 38 | " 'format.type' = 'json',\n" + 39 | " 'format.derive-schema' = 'true'\n" + 40 | ")\n"; 41 | tableEnvironment.sqlUpdate(sourceTableDDL); 42 | 43 | String sinkTableDDL = "CREATE TABLE order_cnt (\n" + 44 | " log_per_min TIMESTAMP(3),\n" + 45 | " item STRING,\n" + 46 | " order_cnt BIGINT,\n" + 47 | " total_quality BIGINT\n" + 48 | ") WITH (\n" + 49 | " 'connector.type' = 'kafka',\n" + 50 | " 'connector.version' = '0.10',\n" + 51 | " 'connector.topic' = 'order_cnt',\n" + 52 | " 'update-mode' = 'append',\n" + 53 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 54 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 55 | " 'format.type' = 'json',\n" + 56 | " 'format.derive-schema' = 'true'\n" + 57 | ")"; 58 | tableEnvironment.sqlUpdate(sinkTableDDL); 59 | 60 | String querySQL = "insert into order_cnt \n" + 61 | "select TUMBLE_END(order_time, INTERVAL '10' SECOND),\n" + 62 | " item, COUNT(order_id) as order_cnt, CAST(sum(amount_kg) as BIGINT) as total_quality\n" + 63 | "from orders\n" + 64 | "group by item, TUMBLE(order_time, INTERVAL '10' SECOND)\n" ; 65 | 66 | tableEnvironment.sqlUpdate(querySQL); 67 | System.out.println(sourceTableDDL); 68 | System.out.println(sinkTableDDL); 69 | System.out.println(querySQL); 70 | 71 | tableEnvironment.execute("StreamKafka2KafkaJob"); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /etl-job/src/main/java/pge2e/PgCatalogTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package pge2e; 20 | 21 | import org.apache.flink.connector.jdbc.catalog.JdbcCatalog; 22 | import org.apache.flink.table.api.EnvironmentSettings; 23 | import org.apache.flink.table.api.TableEnvironment; 24 | import org.apache.flink.table.catalog.ObjectPath; 25 | 26 | import java.util.Arrays; 27 | 28 | public class PgCatalogTest { 29 | public static void main(String[] args) throws Exception { 30 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 31 | TableEnvironment tableEnv = TableEnvironment.create(settings); 32 | 33 | String name = "mypg"; 34 | String defaultDatabase = "mydb"; 35 | String username = "postgres"; 36 | String password = "postgres"; 37 | String baseUrl = "jdbc:postgresql://localhost:5432/"; 38 | 39 | JdbcCatalog catalog = new JdbcCatalog(name, defaultDatabase, username, password, baseUrl); 40 | tableEnv.registerCatalog("mypg", catalog); 41 | 42 | // set the JdbcCatalog as the current catalog of the session 43 | tableEnv.useCatalog("mypg"); 44 | 45 | System.out.println(tableEnv.getCatalog("mypg").get().databaseExists("mydb")); 46 | System.out.println(tableEnv.getCatalog("mypg").get().tableExists(new ObjectPath("mydb","public.primitive_arr_table"))); 47 | 48 | Arrays.stream(tableEnv.listDatabases()).forEach(System.out::println); 49 | 50 | Arrays.stream(tableEnv.listTables()).forEach(System.out::println); 51 | 52 | tableEnv.executeSql("select * from `public.primitive_arr_table`").print(); 53 | // true 54 | // true 55 | // postgres 56 | // mydb 57 | //bang.primitive_table 58 | //public.primitive_arr_table 59 | //public.primitive_serial_table 60 | //public.primitive_table 61 | //public.primitive_table2 62 | //public.simple_t1 63 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+ 64 | //| row_kind | int_arr | bytea_arr | short_arr | long_arr | real_arr | double_precision_arr | numeric_arr | numeric_arr_default | decimal_arr | boolean_arr | text_arr | char_arr | character_arr | character_varying_arr | timestamp_arr | date_arr | time_arr | 65 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+ 66 | //| +I | [1, 2, 3] | [[92, 120, 51, 50], [92, 1... | [3, 4, 5] | [4, 5, 6] | [5.5, 6.6, 7.7] | [6.6, 7.7, 8.8] | [7.70000, 8.80000, 9.90000] | [8.800000000000000000, 9.9... | [9.90, 10.10, 11.11] | [true, false, true] | [a, b, c] | [b, c, d] | [b , c , d ] | [b, c, d] | [2016-06-22T19:10:25, 2019... | [2015-01-01, 2020-01-01] | [00:51:03, 00:59:03] | 67 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+ 68 | //1 row in set 69 | // 70 | //Process finished with exit code 0 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue10.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 23 | 24 | import java.sql.Timestamp; 25 | import java.time.Instant; 26 | 27 | public class TestUserIssue10 { 28 | public static void main(String[] args) throws Exception { 29 | System.out.println(Timestamp.from(Instant.ofEpochMilli( 1593443236124L))); 30 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 32 | tableEnvironment.executeSql("CREATE TABLE ods_foo (\n" + 33 | " id INT,\n" + 34 | " usera ARRAY>\n" + 35 | ") WITH (" + 36 | " 'connector.type' = 'filesystem',\n" + 37 | " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test1.csv',\n" + 38 | " 'format.type' = 'csv'" + 39 | ")"); 40 | environment.execute(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue11.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 23 | 24 | import java.sql.Timestamp; 25 | import java.time.Instant; 26 | 27 | public class TestUserIssue11 { 28 | public static void main(String[] args) throws Exception { 29 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 30 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 31 | tableEnvironment.executeSql("CREATE TABLE people (\n" + 32 | " user_name STRING,\n" + 33 | " content STRING\n" + 34 | ") WITH (\n" + 35 | " 'connector' = 'filesystem',\n" + 36 | " 'path' = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/avro/UserAvro.avsc',\n" + 37 | " 'format' = 'avro',\n" + 38 | " 'record-class' = 'avro.Person',\n" + 39 | " 'property-version' = '1',\n" + 40 | " 'properties.bootstrap.servers' = 'kafka:9092'\n" + 41 | ")"); 42 | 43 | System.out.println("CREATE TABLE people (\n" + 44 | " user_name STRING,\n" + 45 | " content STRING\n" + 46 | ") WITH (\n" + 47 | " 'connector' = 'filesystem',\n" + 48 | " 'path' = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/avro/UserAvro.avsc',\n" + 49 | " 'format' = 'avro',\n" + 50 | " 'record-class' = 'avro.Person',\n" + 51 | " 'property-version' = '1',\n" + 52 | " 'properties.bootstrap.servers' = 'kafka:9092'\n" + 53 | ")"); 54 | 55 | 56 | tableEnvironment.executeSql("select * from people"); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue12.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.EnvironmentSettings; 23 | import org.apache.flink.table.api.Table; 24 | import org.apache.flink.table.api.TableResult; 25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 26 | 27 | public class TestUserIssue12 { 28 | public static void main(String[] args) throws Exception { 29 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 30 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 31 | environment.setParallelism(1); 32 | 33 | tableEnvironment.executeSql("create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector' = 'filesystem',\n" + 34 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',\n" + 35 | " 'format' = 'csv')"); 36 | tableEnvironment.executeSql("CREATE TABLE es_table (\n" + 37 | " aggId varchar ,\n" + 38 | " pageId varchar ,\n" + 39 | " ts varchar ,\n" + 40 | " expoCnt int ,\n" + 41 | " clkCnt int\n" + 42 | ") WITH (\n" + 43 | "'connector' = 'elasticsearch-6',\n" + 44 | "'hosts' = 'http://localhost:9200',\n" + 45 | "'index' = 'usercase111',\n" + 46 | "'document-type' = '_doc',\n" + 47 | "'document-id.key-delimiter' = '$',\n" + 48 | "'sink.bulk-flush.interval' = '1000',\n" + 49 | "'format' = 'json'\n" + 50 | ")"); 51 | Table res = tableEnvironment.sqlQuery(" SELECT pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv"); 52 | TableResult tableResult = res.executeInsert("es_table"); 53 | tableResult.getJobClient().get(); 54 | 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue13.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 23 | import org.apache.flink.table.api.Table; 24 | import org.apache.flink.table.api.TableResult; 25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 26 | import org.apache.flink.types.Row; 27 | import org.apache.flink.util.CloseableIterator; 28 | 29 | import static org.apache.flink.configuration.CheckpointingOptions.CHECKPOINTS_DIRECTORY; 30 | 31 | public class TestUserIssue13 { 32 | public static void main(String[] args) throws Exception { 33 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 35 | environment.setParallelism(1); 36 | 37 | tableEnvironment.executeSql("create table jsonT ( " + 38 | " `monitorId` STRING,\n" + 39 | " `deviceId` STRING,\n" + 40 | " `state` INT,\n" + 41 | " `time_st` TIMESTAMP(3),\n" + 42 | " WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" + 43 | " `data` DOUBLE) with ( 'connector' = 'filesystem',\n" + 44 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" + 45 | " 'format' = 'json')"); 46 | System.out.println("create table jsonT ( " + 47 | " `monitorId` STRING,\n" + 48 | " `deviceId` STRING,\n" + 49 | " `state` INT,\n" + 50 | " `time_st` TIMESTAMP(3),\n" + 51 | " WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" + 52 | " `data` DOUBLE) with ( 'connector' = 'filesystem',\n" + 53 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" + 54 | " 'format' = 'json')"); 55 | CloseableIterator tableResult = tableEnvironment.executeSql(" SELECT * from jsonT").collect(); 56 | while(tableResult.hasNext()) { 57 | System.out.println(tableResult.next()); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue14.java: -------------------------------------------------------------------------------- 1 | ///* 2 | // * Licensed to the Apache Software Foundation (ASF) under one 3 | // * or more contributor license agreements. See the NOTICE file 4 | // * distributed with this work for additional information 5 | // * regarding copyright ownership. The ASF licenses this file 6 | // * to you under the Apache License, Version 2.0 (the 7 | // * "License"); you may not use this file except in compliance 8 | // * with the License. You may obtain a copy of the License at 9 | // * 10 | // * http://www.apache.org/licenses/LICENSE-2.0 11 | // * 12 | // * Unless required by applicable law or agreed to in writing, software 13 | // * distributed under the License is distributed on an "AS IS" BASIS, 14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // * See the License for the specific language governing permissions and 16 | // * limitations under the License. 17 | // */ 18 | // 19 | //package usercase; 20 | // 21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | //import org.apache.flink.table.api.DataTypes; 23 | //import org.apache.flink.table.api.EnvironmentSettings; 24 | //import org.apache.flink.table.api.Table; 25 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 26 | //import org.apache.flink.types.Row; 27 | // 28 | // 29 | //public class TestUserIssue14 { 30 | // public static void main(String[] args) throws Exception { 31 | // StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | // EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 33 | // StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings); 34 | // env.setParallelism(1); 35 | // 36 | // final Table inputTable = tableEnv.fromValues(// 37 | // DataTypes.ROW(// 38 | // DataTypes.FIELD("col1", DataTypes.STRING()), // 39 | // DataTypes.FIELD("col2", DataTypes.STRING())// 40 | // ), // 41 | // Row.of(1L, "Hello"), // 42 | // Row.of(2L, "Hello"), // 43 | // Row.of(3L, ""), // 44 | // Row.of(4L, "Ciao")); 45 | // tableEnv.createTemporaryView("ParquetDataset", inputTable); 46 | // tableEnv.executeSql(// 47 | // "CREATE TABLE `out` (\n" + // 48 | // "col1 STRING,\n" + // 49 | // "col2 STRING\n" + // 50 | // ") WITH (\n" + // 51 | // " 'connector' = 'filesystem',\n" + // 52 | // " 'format' = 'parquet',\n" + // 53 | // " 'path' = 'file:///Users/bang/test',\n" + // 54 | // " 'sink.shuffle-by-partition.enable' = 'true'\n" + // 55 | // ")"); 56 | // 57 | // tableEnv.executeSql("INSERT INTO `out` SELECT * FROM ParquetDataset").getJobClient() 58 | // .get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get(); 59 | // } 60 | //} 61 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue15.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.DataTypes; 23 | import org.apache.flink.table.api.EnvironmentSettings; 24 | import org.apache.flink.table.api.Table; 25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 26 | import org.apache.flink.types.Row; 27 | import org.apache.flink.util.CloseableIterator; 28 | 29 | 30 | public class TestUserIssue15 { 31 | public static void main(String[] args) throws Exception { 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 34 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings); 35 | env.setParallelism(1); 36 | 37 | tableEnv.executeSql("CREATE TABLE test (\n" + 38 | "store_id INT,\n" + 39 | "store_type VARCHAR,\n" + 40 | "region_id INT,\n" + 41 | "store_name VARCHAR,\n" + 42 | "store_number INT,\n" + 43 | "store_street_address VARCHAR,\n" + 44 | "store_city VARCHAR,\n" + 45 | "store_state VARCHAR,\n" + 46 | "store_postal_code VARCHAR,\n" + 47 | "store_country VARCHAR,\n" + 48 | "store_manager VARCHAR,\n" + 49 | "store_phone VARCHAR,\n" + 50 | "store_fax VARCHAR,\n" + 51 | "first_opened_date TIMESTAMP,\n" + 52 | "last_remodel_date DATE,\n" + 53 | "store_sqft INT,\n" + 54 | "grocery_sqft INT,\n" + 55 | "frozen_sqft INT,\n" + 56 | "meat_sqft INT,\n" + 57 | "coffee_bar BOOLEAN,\n" + 58 | "video_store BOOLEAN,\n" + 59 | "salad_bar BOOLEAN,\n" + 60 | "prepared_food BOOLEAN,\n" + 61 | "florist BOOLEAN" + 62 | ") WITH (" + 63 | " 'connector' = 'filesystem',\n" + 64 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test15.csv',\n" + 65 | " 'format' = 'csv'," + 66 | " 'csv.field-delimiter' = '|'," + 67 | " 'csv.null-literal'=''" + 68 | ")"); 69 | CloseableIterator it = tableEnv.executeSql(" SELECT * FROM test").collect(); 70 | 71 | while(it.hasNext()) { 72 | System.out.println(it.next()); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue16.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 23 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 24 | import org.apache.flink.types.Row; 25 | import org.apache.flink.util.CloseableIterator; 26 | 27 | import static org.apache.flink.configuration.CheckpointingOptions.CHECKPOINTS_DIRECTORY; 28 | 29 | public class TestUserIssue16 { 30 | public static void main(String[] args) throws Exception { 31 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 33 | environment.setParallelism(1); 34 | 35 | tableEnvironment.executeSql("create table jsonT ( " + 36 | " `monitorId` STRING,\n" + 37 | " `deviceId` STRING,\n" + 38 | " `state` INT,\n" + 39 | " `time_st` TIMESTAMP(3),\n" + 40 | " WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" + 41 | " `data` DOUBLE) with ( " + 42 | " 'connector' = 'filesystem',\n" + 43 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" + 44 | " 'format' = 'json')"); 45 | CloseableIterator tableResult = tableEnvironment.executeSql(" SELECT * from jsonT " + 46 | "where deviceId LIKE '%양현마을%' ").collect(); 47 | while(tableResult.hasNext()) { 48 | System.out.println(tableResult.next()); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue17.java: -------------------------------------------------------------------------------- 1 | ///* 2 | // * Licensed to the Apache Software Foundation (ASF) under one 3 | // * or more contributor license agreements. See the NOTICE file 4 | // * distributed with this work for additional information 5 | // * regarding copyright ownership. The ASF licenses this file 6 | // * to you under the Apache License, Version 2.0 (the 7 | // * "License"); you may not use this file except in compliance 8 | // * with the License. You may obtain a copy of the License at 9 | // * 10 | // * http://www.apache.org/licenses/LICENSE-2.0 11 | // * 12 | // * Unless required by applicable law or agreed to in writing, software 13 | // * distributed under the License is distributed on an "AS IS" BASIS, 14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // * See the License for the specific language governing permissions and 16 | // * limitations under the License. 17 | // */ 18 | // 19 | //package usercase; 20 | // 21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | //import org.apache.flink.table.api.TableResult; 23 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 24 | //import org.apache.flink.table.catalog.DataTypeFactory; 25 | //import org.apache.flink.table.functions.ScalarFunction; 26 | //import org.apache.flink.table.types.inference.TypeInference; 27 | // 28 | //import java.sql.Date; 29 | //import java.time.LocalDate; 30 | // 31 | //public class TestUserIssue17 { 32 | // public static void main(String[] args) throws Exception { 33 | // StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | // StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 35 | // environment.setParallelism(1); 36 | // 37 | // 38 | // tableEnvironment.executeSql("CREATE TABLE orders (\n" + 39 | // " order_number INT,\n" + 40 | // " order_date INT NULL,\n" + 41 | // " purchaser INT,\n" + 42 | // " quantity INT,\n" + 43 | // " product_id INT\n" + 44 | // " ) WITH (\n" + 45 | // " 'connector' = 'kafka',\n" + 46 | // " 'topic' = 'dbserver1.inventory.orders',\n" + 47 | // " 'scan.startup.mode' = 'earliest-offset',\n" + 48 | // " 'properties.bootstrap.servers' = 'localhost:9092',\n" + 49 | // " 'properties.group.id' = 'xxtestgroup1',\n" + 50 | // " 'format' = 'debezium-json',\n" + 51 | // " 'debezium-json.schema-include' = 'true' " + 52 | // " )"); 53 | // tableEnvironment.executeSql("create table orders1 ( " + 54 | // " order_number INT,\n" + 55 | // " order_date date,\n" + 56 | // " purchaser INT,\n" + 57 | // " quantity INT,\n" + 58 | // " product_id INT," + 59 | // " PRIMARY KEY(order_number) NOT ENFORCED\n" + 60 | // ") with ( " + 61 | // " 'connector' = 'jdbc',\n" + 62 | // " 'url' = 'jdbc:mysql://localhost:3306/inventory',\n" + 63 | // " 'username' = 'mysqluser',\n" + 64 | // " 'password' = 'mysqlpw',\n" + 65 | // " 'table-name' = 'orders2',\n" + 66 | // " 'driver' = 'com.mysql.jdbc.Driver')"); 67 | // tableEnvironment.registerFunction("int2Date", new Int2DateFunc()); 68 | // TableResult result = tableEnvironment.executeSql("insert into orders1 SELECT order_number, int2Date(order_date),purchaser,quantity,product_id from orders "); 69 | // result.getJobClient().get() 70 | // .getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get(); 71 | // } 72 | // 73 | // public static class Int2DateFunc extends ScalarFunction { 74 | // 75 | // public Date eval(int epochDay) { 76 | // return Date.valueOf(LocalDate.ofEpochDay(epochDay)); 77 | // } 78 | // 79 | // @Override 80 | // public TypeInference getTypeInference(DataTypeFactory typeFactory) { 81 | // return super.getTypeInference(typeFactory); 82 | // } 83 | // } 84 | //} 85 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue18.java: -------------------------------------------------------------------------------- 1 | ///* 2 | // * Licensed to the Apache Software Foundation (ASF) under one 3 | // * or more contributor license agreements. See the NOTICE file 4 | // * distributed with this work for additional information 5 | // * regarding copyright ownership. The ASF licenses this file 6 | // * to you under the Apache License, Version 2.0 (the 7 | // * "License"); you may not use this file except in compliance 8 | // * with the License. You may obtain a copy of the License at 9 | // * 10 | // * http://www.apache.org/licenses/LICENSE-2.0 11 | // * 12 | // * Unless required by applicable law or agreed to in writing, software 13 | // * distributed under the License is distributed on an "AS IS" BASIS, 14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // * See the License for the specific language governing permissions and 16 | // * limitations under the License. 17 | // */ 18 | // 19 | //package usercase; 20 | // 21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | //import org.apache.flink.table.api.StatementSet; 23 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 24 | //import org.apache.flink.table.catalog.DataTypeFactory; 25 | //import org.apache.flink.table.functions.ScalarFunction; 26 | //import org.apache.flink.table.types.inference.TypeInference; 27 | // 28 | //import java.sql.Date; 29 | //import java.time.LocalDate; 30 | // 31 | //public class TestUserIssue18 { 32 | // public static void main(String[] args) throws Exception { 33 | // StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | // StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 35 | // environment.setParallelism(1); 36 | // 37 | // 38 | // tableEnvironment.executeSql("create table online_example (\n" + 39 | // " face_id varchar,\n" + 40 | // " device_id varchar,\n" + 41 | // " feature_data double\n" + 42 | // ") with (\n" + 43 | // " 'connector' = 'kafka',\n" + 44 | // " 'topic' = 'json-test-2',\n" + 45 | // " 'properties.bootstrap.servers' = 'localhost:9092',\n" + 46 | // " 'properties.group.id' = 'read_example',\n" + 47 | // " 'format' = 'csv',\n" + 48 | // " 'csv.field-delimiter' = ' '," + 49 | // " 'scan.startup.mode' = 'earliest-offset' \n" + 50 | // ")"); 51 | // tableEnvironment.executeSql("create table write_example (\n" + 52 | // " face_id varchar,\n" + 53 | // " device_id varchar " + 54 | // " ) with (\n" + 55 | // " 'connector' = 'kafka',\n" + 56 | // " 'topic' = 'tianchi_write_example-3',\n" + 57 | // " 'properties.bootstrap.servers' = 'localhost:9092',\n" + 58 | // " 'properties.group.id' = 'write_example',\n" + 59 | // " 'format' = 'csv',\n" + 60 | // " 'scan.startup.mode' = 'earliest-offset'\n" + 61 | // " )"); 62 | // 63 | // StatementSet statementSet = tableEnvironment.createStatementSet(); 64 | // statementSet.addInsertSql("insert into write_example SELECT face_id, device_id from online_example"); 65 | // 66 | // statementSet.execute().getJobClient().get() 67 | // .getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get(); 68 | // } 69 | // 70 | // public static class Int2DateFunc extends ScalarFunction { 71 | // 72 | // public Date eval(int epochDay) { 73 | // return Date.valueOf(LocalDate.ofEpochDay(epochDay)); 74 | // } 75 | // 76 | // @Override 77 | // public TypeInference getTypeInference(DataTypeFactory typeFactory) { 78 | // return super.getTypeInference(typeFactory); 79 | // } 80 | // } 81 | //} 82 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue19.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 23 | import org.apache.flink.table.catalog.DataTypeFactory; 24 | import org.apache.flink.table.functions.ScalarFunction; 25 | import org.apache.flink.table.types.inference.TypeInference; 26 | 27 | import java.sql.Date; 28 | import java.time.LocalDate; 29 | 30 | public class TestUserIssue19 { 31 | public static void main(String[] args) throws Exception { 32 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 34 | environment.setParallelism(1); 35 | environment.enableCheckpointing(200); 36 | 37 | tableEnvironment.executeSql("create table test_tbl ( " + 38 | " `monitorId` STRING,\n" + 39 | " `deviceId` STRING,\n" + 40 | " `state` DOUBLE ) with ( " + 41 | " 'connector' = 'filesystem',\n" + 42 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user19.json',\n" + 43 | " 'format' = 'json')"); 44 | //tableEnvironment.executeSql("select SPLIT_INDEX(deviceId, ';', 0) from test_tbl").print(); 45 | tableEnvironment.executeSql("select SPLIT_INDEX(deviceId, U&'\\003B', 0) from test_tbl").print(); 46 | 47 | } 48 | 49 | public static class Int2DateFunc extends ScalarFunction { 50 | 51 | public Date eval(int epochDay) { 52 | return Date.valueOf(LocalDate.ofEpochDay(epochDay)); 53 | } 54 | 55 | @Override 56 | public TypeInference getTypeInference(DataTypeFactory typeFactory) { 57 | return super.getTypeInference(typeFactory); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue2.java: -------------------------------------------------------------------------------- 1 | package usercase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestUserIssue2 { 9 | private static String kafkaOrdersDDL = "CREATE TABLE user_log (\n" + 10 | " order_id STRING,\n" + 11 | " item STRING,\n" + 12 | " currency STRING,\n" + 13 | " amount INT,\n" + 14 | " order_time TIMESTAMP(3),\n" + 15 | " rowtime as order_time,\n" + 16 | " amount_kg as amount * 1000,\n" + 17 | " WATERMARK FOR rowtime AS rowtime\n" + 18 | ") WITH (\n" + 19 | " 'connector.type' = 'kafka',\n" + 20 | " 'connector.version' = '0.10',\n" + 21 | " 'connector.topic' = 'flink_orders3',\n" + 22 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 23 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 24 | " 'connector.properties.group.id' = 'testGroup4',\n" + 25 | " 'connector.startup-mode' = 'earliest-offset',\n" + 26 | " 'format.type' = 'json',\n" + 27 | " 'format.derive-schema' = 'true'\n" + 28 | ")\n"; 29 | 30 | private static String mysqlSinkDDL = "CREATE TABLE test_mysql_2 (\n" + 31 | "vid string,\n" + 32 | "rss BIGINT,\n" + 33 | "start_time string\n" + 34 | ") with ( \n" + 35 | " 'connector.type' = 'jdbc',\n" + 36 | " 'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" + 37 | " 'connector.username' = 'root'," + 38 | " 'connector.table' = 'task_flink_table_3',\n" + 39 | " 'connector.write.flush.max-rows' = '100'\n" + 40 | ")"; 41 | 42 | private static String query = "INSERT INTO test_mysql_2\n" + 43 | " SELECT order_id,rss, start_time FROM(" + 44 | " SELECT order_id,rss, start_time FROM (\n" + 45 | " SELECT order_id,rss, start_time,\n" + 46 | " ROW_NUMBER() OVER (PARTITION BY start_time ORDER BY rss desc) AS rownum\n" + 47 | " FROM (\n" + 48 | " SELECT order_id,\n" + 49 | "DATE_FORMAT(TUMBLE_START(rowtime, INTERVAL '5' MINUTE),'yyyy-MM-dd HH:00') AS start_time,\n" + 50 | "SUM(amount) AS rss\n" + 51 | "FROM user_log\n" + 52 | "GROUP BY order_id, TUMBLE(rowtime, INTERVAL '5' MINUTE)\n" + 53 | " )\n" + 54 | ")\n" + 55 | "WHERE rownum <= 10" 56 | +") group by order_id,rss, start_time\n"; 57 | 58 | public static void main(String[] args) throws Exception { 59 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 60 | env.setParallelism(1); 61 | 62 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 63 | .useBlinkPlanner() 64 | .inStreamingMode() 65 | .build(); 66 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 67 | 68 | System.out.println(query); 69 | tableEnvironment.sqlUpdate(kafkaOrdersDDL); 70 | tableEnvironment.sqlUpdate(mysqlSinkDDL); 71 | tableEnvironment.sqlUpdate(query); 72 | // 73 | // //check the plan 74 | // System.out.println(tableEnvironment.explain(tableEnvironment.sqlQuery(query))); 75 | 76 | // tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(query), Row.class).print(); 77 | tableEnvironment.execute("reproduce_user_issue"); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue20.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 23 | import org.apache.flink.table.catalog.DataTypeFactory; 24 | import org.apache.flink.table.functions.ScalarFunction; 25 | import org.apache.flink.table.types.inference.TypeInference; 26 | 27 | import java.sql.Date; 28 | import java.time.LocalDate; 29 | 30 | public class TestUserIssue20 { 31 | public static void main(String[] args) throws Exception { 32 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 34 | environment.setParallelism(1); 35 | environment.enableCheckpointing(200); 36 | 37 | tableEnvironment.executeSql("CREATE TABLE es_table (\n" + 38 | " aggId varchar ,\n" + 39 | " pageId varchar ,\n" + 40 | " ts varchar ,\n" + 41 | " expoCnt int ,\n" + 42 | " clkCnt int\n" + 43 | ") WITH (\n" + 44 | "'connector' = 'elasticsearch-6',\n" + 45 | "'hosts' = 'http://localhost:9200',\n" + 46 | "'index' = 'usercase13',\n" + 47 | "'document-type' = '_doc',\n" + 48 | "'document-id.key-delimiter' = '$',\n" + 49 | "'sink.bulk-flush.interval' = '1000',\n" + 50 | "'format' = 'json'\n" + 51 | ")"); 52 | //tableEnvironment.executeSql("select SPLIT_INDEX(deviceId, ';', 0) from test_tbl").print(); 53 | tableEnvironment.executeSql("select * from es_table").print(); 54 | 55 | } 56 | 57 | public static class Int2DateFunc extends ScalarFunction { 58 | 59 | public Date eval(int epochDay) { 60 | return Date.valueOf(LocalDate.ofEpochDay(epochDay)); 61 | } 62 | 63 | @Override 64 | public TypeInference getTypeInference(DataTypeFactory typeFactory) { 65 | return super.getTypeInference(typeFactory); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue21.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.Table; 23 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 24 | import org.apache.flink.table.catalog.Catalog; 25 | 26 | import java.util.Arrays; 27 | 28 | public class TestUserIssue21 { 29 | public static void main(String[] args) throws Exception { 30 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 32 | 33 | String inTablePath = "CREATE TABLE datagen ( " + 34 | " id INT, " + 35 | " total string, " + 36 | " ts AS localtimestamp, " + 37 | " WATERMARK FOR ts AS ts " + 38 | ") WITH ( " + 39 | " 'connector' = 'datagen', " + 40 | " 'rows-per-second'='5', " + 41 | " 'fields.id.min'='1', " + 42 | " 'fields.id.max'='10', " + 43 | " 'fields.total.length'='10' " + 44 | ")"; 45 | // tableEnvironment 46 | tableEnvironment.executeSql(inTablePath); 47 | 48 | Table table = tableEnvironment.sqlQuery("select id, total, 12 as col_1 from datagen"); 49 | tableEnvironment.createTemporaryView("table1", table); 50 | Arrays.stream(tableEnvironment.listTables()).forEach(t -> System.out.println(t)); 51 | 52 | Catalog catalog = tableEnvironment.getCatalog(tableEnvironment.getCurrentCatalog()).get(); 53 | catalog.listTables(tableEnvironment.getCurrentDatabase()).stream().forEach(t -> System.out.println(t)); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue3.java: -------------------------------------------------------------------------------- 1 | package usercase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestUserIssue3 { 9 | 10 | private static String kafkaSourceDDL = "create table json_table(" + 11 | " w_es BIGINT," + 12 | " w_type STRING," + 13 | " w_isDdl BOOLEAN," + 14 | " w_data ARRAY>," + 15 | " w_ts TIMESTAMP(3)," + 16 | " w_table STRING" + 17 | ") WITH (\n" + 18 | " 'connector.type' = 'kafka',\n" + 19 | " 'connector.version' = '0.10',\n" + 20 | " 'connector.topic' = 'json-test1',\n" + 21 | " 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" + 22 | " 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" + 23 | " 'connector.properties.group.id' = 'test-jdb',\n" + 24 | " 'connector.startup-mode' = 'earliest-offset',\n" + 25 | " 'format.type' = 'json',\n" + 26 | " 'format.derive-schema' = 'true'\n" + 27 | ")\n"; 28 | private static String csvSinkDDL = "create table csv(" + 29 | " w_ts TIMESTAMP(3)," + 30 | " city_id VARCHAR," + 31 | " pay_info VARCHAR," + 32 | " w_type STRING" + 33 | ") with (" + 34 | " 'connector.type' = 'filesystem',\n" + 35 | " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test_codegen.csv',\n" + 36 | " 'format.type' = 'csv')"; 37 | public static void main(String[] args) throws Exception { 38 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 39 | env.setParallelism(1); 40 | 41 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 42 | .useBlinkPlanner() 43 | .inStreamingMode() 44 | .build(); 45 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 46 | tableEnvironment.executeSql(kafkaSourceDDL); 47 | tableEnvironment.executeSql(csvSinkDDL); 48 | String querySQL = "insert into csv select w_ts," + 49 | " 'test' as city_id, " + 50 | " w_data[cast(w_es/1000 as INT) - 1589870637 + 1].pay_info," + 51 | " w_type " + 52 | "from json_table"; 53 | 54 | tableEnvironment.sqlUpdate(querySQL); 55 | tableEnvironment.execute("test"); 56 | // tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 57 | // tableEnvironment.execute("reproduce_user_issue"); 58 | } 59 | 60 | //@Test 61 | // public void testArray() throws Exception { 62 | // String jsonStr = "{" + 63 | // "\"w_es\":1589870637000," + 64 | // "\"w_type\":\"INSERT\"," + 65 | // "\"w_isDdl\":false," + 66 | // "\"w_data\":[" + 67 | // "{\"pay_info\":\"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00\"," + 68 | // "\"online_fee\":\"89.0\"," + 69 | // "\"sign\":\"00\"," + 70 | // "\"account_pay_fee\":\"0.0\"}]," + 71 | // "\"w_ts\":\"2020-05-20T13:58:37.131Z\"," + 72 | // "\"w_table\":\"cccc111\"}"; 73 | // System.out.println(jsonStr); 74 | // DataType rowType = ROW( 75 | // FIELD("w_es", DataTypes.BIGINT()), 76 | // FIELD("w_type", DataTypes.STRING()), 77 | // FIELD("w_isDdl", DataTypes.BOOLEAN()), 78 | // FIELD("w_data", ARRAY(ROW( 79 | // FIELD("pay_info", DataTypes.STRING()), 80 | // FIELD("online_fee", DataTypes.DECIMAL(38, 4)), 81 | // FIELD("sign", DataTypes.STRING()), 82 | // FIELD("account_pay_fee", DataTypes.DECIMAL(38, 4)) 83 | // ))), 84 | // FIELD("w_ts", DataTypes.TIMESTAMP()), 85 | // FIELD("w_table", DataTypes.STRING())); 86 | // JsonRowDeserializationSchema deserializationSchema = new JsonRowDeserializationSchema.Builder( 87 | // (TypeInformation) TypeConversions.fromDataTypeToLegacyInfo(rowType)) 88 | // .build(); 89 | // Row row = deserializationSchema.deserialize(jsonStr.getBytes()); 90 | // System.out.println(row); 91 | // } 92 | } 93 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue4.java: -------------------------------------------------------------------------------- 1 | package usercase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | public class TestUserIssue4 { 9 | 10 | private static String mysqlTable = "create table tb(id string, cooper bigint, user_sex string) with(\n" + 11 | " 'connector.type' = 'jdbc',\n" + 12 | " 'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" + 13 | " 'connector.username' = 'root',\n" + 14 | " 'connector.table' = 'tb'\n" + 15 | ")"; 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | env.setParallelism(1); 19 | 20 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 21 | .useBlinkPlanner() 22 | .inStreamingMode() 23 | .build(); 24 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 25 | tableEnvironment.sqlUpdate(mysqlTable); 26 | String querySQL = "select id, cooper from tb"; 27 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 28 | 29 | tableEnvironment.execute("reproduce_user_issue"); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue5.java: -------------------------------------------------------------------------------- 1 | package usercase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.types.Row; 7 | 8 | import java.net.URL; 9 | import java.net.URLClassLoader; 10 | 11 | public class TestUserIssue5 { 12 | 13 | private static String hbaseSourceDDL = "CREATE TABLE country (\n" + 14 | " rowkey VARCHAR,\n" + 15 | " f1 ROW \n" + 16 | " " + 17 | ") WITH (\n" + 18 | " 'connector.type' = 'hbase',\n" + 19 | " 'connector.version' = '1.4.3',\n" + 20 | " 'connector.table-name' = 'country',\n" + 21 | " 'connector.zookeeper.quorum' = 'localhost:2182',\n" + 22 | " 'connector.zookeeper.znode.parent' = '/hbase' " + 23 | ")"; 24 | public static void main(String[] args) throws Exception { 25 | 26 | ClassLoader cl = ClassLoader.getSystemClassLoader(); 27 | 28 | URL[] urls = ((URLClassLoader)cl).getURLs(); 29 | 30 | for(URL url: urls){ 31 | System.out.println(url.getFile()); 32 | } 33 | 34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 35 | env.setParallelism(1); 36 | 37 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 38 | .useBlinkPlanner() 39 | .inStreamingMode() 40 | .build(); 41 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 42 | tableEnvironment.sqlUpdate(hbaseSourceDDL); 43 | 44 | String querySQL = "select * from country\n"; 45 | 46 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print(); 47 | 48 | tableEnvironment.execute("read_hbase_sql"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue6.java: -------------------------------------------------------------------------------- 1 | package usercase; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | import org.apache.flink.table.descriptors.Json; 7 | import org.apache.flink.table.descriptors.Kafka; 8 | import org.apache.flink.table.descriptors.Schema; 9 | import org.apache.flink.types.Row; 10 | 11 | import static org.apache.flink.table.api.DataTypes.FIELD; 12 | import static org.apache.flink.table.api.DataTypes.INT; 13 | import static org.apache.flink.table.api.DataTypes.ROW; 14 | import static org.apache.flink.table.api.DataTypes.STRING; 15 | 16 | public class TestUserIssue6 { 17 | public static void main(String[] args) throws Exception { 18 | System.out.println("\u65E0"); 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | env.setParallelism(1); 21 | 22 | EnvironmentSettings envSettings = EnvironmentSettings.newInstance() 23 | .useBlinkPlanner() 24 | .inStreamingMode() 25 | .build(); 26 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings); 27 | 28 | tableEnvironment.connect( 29 | new Kafka() 30 | .topic("test-json") 31 | .version("0.10") 32 | .property("bootstrap.servers", "localhost:9092") 33 | .property("zookeeper.connect", "localhost:2181") 34 | .property("group.id", "testGroup")) 35 | .withFormat(new Json()) 36 | .withSchema(new Schema() 37 | .field("general",STRING()) 38 | .field("data", ROW( 39 | FIELD("reference_id", STRING()), 40 | FIELD("transaction_type", INT()), 41 | FIELD("merchant_id", INT()), 42 | FIELD("status", INT()), 43 | FIELD("create_time", INT()) 44 | ) 45 | ) 46 | ) 47 | .createTemporaryTable("KafkaSource"); 48 | tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select general, reference_id, data.reference_id from KafkaSource"), Row.class) 49 | .print(); 50 | tableEnvironment.execute("case6"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue8.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 23 | 24 | public class TestUserIssue8 { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | StreamTableEnvironment tEnv = StreamTableEnvironment.create(environment); 28 | tEnv.sqlQuery("DESCRIBE fact_table"); 29 | tEnv.execute(""); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /etl-job/src/main/java/usercase/TestUserIssue9.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package usercase; 20 | 21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 22 | import org.apache.flink.table.api.Table; 23 | import org.apache.flink.table.api.Tumble; 24 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 25 | import org.apache.flink.table.descriptors.Json; 26 | import org.apache.flink.table.descriptors.Kafka; 27 | import org.apache.flink.table.descriptors.Rowtime; 28 | import org.apache.flink.table.descriptors.Schema; 29 | import org.apache.flink.types.Row; 30 | 31 | import java.sql.Timestamp; 32 | import java.time.Instant; 33 | 34 | public class TestUserIssue9 { 35 | public static void main(String[] args) throws Exception { 36 | //2020-06-29 21:12:04.471 37 | //2020-06-29 23:07:01.1245406 38 | System.out.println(Timestamp.from(Instant.ofEpochMilli( 1593443236124L))); 39 | // StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 40 | // StreamTableEnvironment tEnv = StreamTableEnvironment.create(environment); 41 | // tEnv.connect(new Kafka() 42 | // .version("0.10") 43 | // .topic("jes_topic_evtime") 44 | // .property("zookeeper.connect", "localhost:2181") 45 | // .property("bootstrap.servers", "localhost:9092") 46 | // .property("group.id", "grp1") 47 | // .startFromEarliest() 48 | // ).withFormat(new Json() 49 | // .failOnMissingField(false).deriveSchema()) 50 | // .withSchema(new Schema() 51 | // .field("acct", "STRING") 52 | // .field("evtime", "LONG") 53 | // .field("logictime","TIMESTAMP(3)") 54 | // .rowtime(new Rowtime().timestampsFromField("evtime").watermarksPeriodicBounded(5000))) 55 | // .inAppendMode().createTemporaryTable("testTableName"); 56 | // 57 | // 58 | // 59 | // Table testTab = tEnv.sqlQuery("SELECT acct, evtime, logictime FROM testTableName") 60 | // .window(Tumble.over("5.seconds").on("logictime").as("w1")) 61 | // .groupBy("w1, acct") 62 | // .select("w1.rowtime, acctno"); 63 | // 64 | // tEnv.toRetractStream(testTab, Row.class).print(); 65 | // environment.execute(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-scripts/kafak2kafka_etl_run.sh: -------------------------------------------------------------------------------- 1 | # start kafka cluster 2 | cd /Users/bang/kafka_2.11-0.10.2.0 3 | ./bin/zookeeper-server-start.sh -daemon ./config/zookeeper.properties 4 | ./bin/kafka-server-start.sh -daemon ./config/server.properties 5 | #optional 6 | ./bin/kafka-topics.sh --create --topic json-test --zookeeper localhost:2181 --partitions 1 --replication-factor 1 7 | ./bin/kafka-console-producer.sh --topic json-test --broker-list localhost:9092 8 | 9 | ## relate command 10 | cd ~/confluent-3.2.0/ 11 | ./bin/kafka-avro-console-producer --broker-list localhost:9092 --topic t1 --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}' 12 | cd kafka_2.11-0.10.2.0 13 | ./bin/kafka-topics.sh --list --zookeeper localhost:2181 14 | ./bin/kafka-console-consumer.sh --topic csv_data --bootstrap-server localhost:9092 --from-beginning 15 | 16 | # start hdfs 17 | cd /Users/bang/hadoop-2.8.5 18 | hadoop namenode -format 19 | cd /Users/bang/hadoop-2.8.5/sbin 20 | ./start-dfs.sh 21 | ./start-yarn.sh 22 | 23 | # start mysql 24 | /usr/local/opt/mysql/support-files/mysql.server start 25 | # stop mysql 26 | /usr/local/opt/mysql/support-files/mysql.server stop 27 | 28 | # start es 29 | cd /Users/bang/elasticsearch-6.3.1 30 | ./bin/elasticsearch 31 | ## es cli: 32 | ./bin/elasticsearch-sql-cli 33 | 34 | 35 | # start hbase 36 | cd /Users/bang/hbase-1.4.3 37 | ./bin/start-hbase.sh 38 | ## use own zookeeper wich client port is setted 2182 to avoid conficts with Kafka zookeeper 39 | ## : ./bin/hbase shell 40 | ## list; create 't1','f1'; scan 'gmv'; 41 | ## care hbase conflict with hive 42 | 43 | # start hive 44 | ## start haoop first 45 | ./start-dfs.sh 46 | ./start-yarn.sh 47 | ## start mysql(metastore) 48 | cd /Users/bang/hive-3.1.2 49 | ## init hive schema (only requiired intialization once) 50 | bin/schematool -initSchema -dbType mysql 51 | bin/hive 52 | ## start metastore service 53 | bin/hive --service metastore -p 9083 & 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2AppendEs.sql: -------------------------------------------------------------------------------- 1 | ## batch 2 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem', 3 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 4 | 'format.type' = 'csv', 5 | 'format.fields.0.name' = 'pageId', 6 | 'format.fields.0.data-type' = 'STRING', 7 | 'format.fields.1.name' = 'eventId', 8 | 'format.fields.1.data-type' = 'STRING', 9 | 'format.fields.2.name' = 'recvTime', 10 | 'format.fields.2.data-type' = 'STRING') 11 | 12 | CREATE TABLE es_table ( 13 | aggId varchar , 14 | pageId varchar , 15 | ts varchar , 16 | expoCnt int , 17 | clkCnt int 18 | ) WITH ( 19 | 'connector.type' = 'elasticsearch', 20 | 'connector.version' = '6', 21 | 'connector.hosts' = 'http://localhost:9200', 22 | 'connector.index' = '66_test', 23 | 'connector.document-type' = '_doc', 24 | 'update-mode' = 'upsert', 25 | 'connector.key-delimiter' = '$', 26 | 'connector.key-null-literal' = 'n/a', 27 | 'connector.bulk-flush.interval' = '1000', 28 | 'format.type' = 'json' 29 | ) 30 | INSERT INTO es_table 31 | SELECT pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv 32 | 33 | 34 | ## streaming 35 | 36 | create table csv_user( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem', 37 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv', 38 | 'format.type' = 'csv', 39 | 'format.fields.0.name' = 'user_name', 40 | 'format.fields.0.data-type' = 'STRING', 41 | 'format.fields.1.name' = 'is_new', 42 | 'format.fields.1.data-type' = 'BOOLEAN', 43 | 'format.fields.2.name' = 'content', 44 | 'format.fields.2.data-type' = 'STRING') 45 | CREATE TABLE kafka_user ( 46 | user_name STRING, 47 | is_new BOOLEAN, 48 | content STRING) WITH ( 49 | 'connector.type' = 'kafka', 50 | 'connector.version' = '0.10', 51 | 'connector.topic' = 'kafka_user', 52 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 53 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 54 | 'connector.properties.group.id' = 'testGroup3', 55 | 'connector.startup-mode' = 'earliest-offset', 56 | 'format.type' = 'csv') 57 | insert into kafka_user 58 | select user_name, is_new, content from 59 | csv_user; 60 | 61 | CREATE TABLE es_user ( 62 | user_name STRING, 63 | is_new BOOLEAN, 64 | content STRING 65 | ) WITH ( 66 | 'connector.type' = 'elasticsearch', 67 | 'connector.version' = '7', 68 | 'connector.hosts' = 'http://localhost:9200', 69 | 'connector.index' = 'es_user', 70 | 'connector.document-type' = '_doc', 71 | 'update-mode' = 'upsert', 72 | 'connector.key-delimiter' = '$', 73 | 'connector.key-null-literal' = 'n/a', 74 | 'connector.bulk-flush.interval' = '1000', 75 | 'format.type' = 'json' 76 | ); 77 | insert into es_user 78 | select user_name, is_new, content from 79 | kafka_user; 80 | 81 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2DynamicIndexEs.sql: -------------------------------------------------------------------------------- 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime TIMESTAMP(3)) with ( 'connector.type' = 'filesystem', 2 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 3 | 'format.type' = 'csv', 4 | 'format.fields.0.name' = 'pageId', 5 | 'format.fields.0.data-type' = 'STRING', 6 | 'format.fields.1.name' = 'eventId', 7 | 'format.fields.1.data-type' = 'STRING', 8 | 'format.fields.2.name' = 'recvTime', 9 | 'format.fields.2.data-type' = 'TIMESTAMP(3)') 10 | 11 | CREATE TABLE append_test ( 12 | aggId varchar , 13 | pageId varchar , 14 | ts timestamp(3) , 15 | expoCnt int , 16 | clkCnt int 17 | ) WITH ( 18 | 'connector.type' = 'elasticsearch', 19 | 'connector.version' = '6', 20 | 'connector.hosts' = 'http://localhost:9200', 21 | 'connector.index' = 'dadynamic-index-{clkCnt}', 22 | 'connector.document-type' = '_doc', 23 | 'update-mode' = 'upsert', 24 | 'connector.key-delimiter' = '$', 25 | 'connector.key-null-literal' = 'n/a', 26 | 'connector.bulk-flush.interval' = '1000', 27 | 'format.type' = 'json' 28 | ) 29 | 30 | INSERT INTO append_test 31 | SELECT pageId,eventId,recvTime ts, 1, 1 from csv 32 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2UpsertEs.sql: -------------------------------------------------------------------------------- 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem', 2 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 3 | 'format.type' = 'csv', 4 | 'format.fields.0.name' = 'pageId', 5 | 'format.fields.0.data-type' = 'STRING', 6 | 'format.fields.1.name' = 'eventId', 7 | 'format.fields.1.data-type' = 'STRING', 8 | 'format.fields.2.name' = 'recvTime', 9 | 'format.fields.2.data-type' = 'STRING') 10 | 11 | CREATE TABLE test_upsert ( 12 | aggId varchar , 13 | pageId varchar , 14 | ts varchar , 15 | expoCnt bigint , 16 | clkCnt bigint 17 | ) WITH ( 18 | 'connector.type' = 'elasticsearch', 19 | 'connector.version' = '6', 20 | 'connector.hosts' = 'http://localhost:9200', 21 | 'connector.index' = 'flink_zhangle_pageview', 22 | 'connector.document-type' = '_doc', 23 | 'update-mode' = 'upsert', 24 | 'connector.key-delimiter' = '$', 25 | 'connector.key-null-literal' = 'n/a', 26 | 'connector.bulk-flush.interval' = '1000', 27 | 'format.type' = 'json' 28 | ) 29 | 30 | INSERT INTO test_upsert 31 | SELECT aggId, pageId, ts, 32 | count(case when eventId = 'exposure' then 1 else null end) as expoCnt, 33 | count(case when eventId = 'click' then 1 else null end) as clkCnt 34 | FROM 35 | ( 36 | SELECT 37 | 'ZL_001' as aggId, 38 | pageId, 39 | eventId, 40 | recvTime, 41 | ts2Date(recvTime) as ts 42 | from csv 43 | where eventId in ('exposure', 'click') 44 | ) as t1 45 | group by aggId, pageId, ts -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Csv2HivePartition.sql: -------------------------------------------------------------------------------- 1 | create table test_csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR, date_col VARCHAR) with ( 2 | 'connector.type' = 'filesystem', 3 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user_part.csv', 4 | 'format.type' = 'csv') 5 | 6 | -- table user_info_partition is a hive partition table, we create hive table first, and then use hivecatalog to load hive table, then flink can insert 7 | -- data to hive table, the hive create table command is: 8 | -- create table user_info_partition(user_name string, is_new boolean, content string) PARTITIONED BY (date_col string) row format delimited fields terminated by '\t'; 9 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Csv2HiveSink.sql: -------------------------------------------------------------------------------- 1 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem', 2 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv', 3 | 'format.type' = 'csv') 4 | 5 | -- table user_ino_no_part is a hive table, we create hive table first, and then use hivecatalog to load hive table, then flink can insert 6 | -- data to hive table, the hive create table command is: 7 | -- hive> create table user_ino_no_part(user_name string, is_new boolean, content string) row format delimited fields terminated by '\t'; 8 | 9 | insert into user_ino_no_part select user_name, is_new, content from csv 10 | 11 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/FileSystem2FileSystem.sql: -------------------------------------------------------------------------------- 1 | create table csv( id INT, note STRING, country STRING, record_time TIMESTAMP(4), doub_val DECIMAL(6, 2)) with ( 'connector.type' = 'filesystem', 2 | 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test.csv', 3 | 'format.type' = 'csv') 4 | 5 | create table csvSink( jnlno STRING, 6 | taskid char(4), 7 | hit VARCHAR ) with ( 'connector.type' = 'filesystem', 8 | 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test12312.csv', 9 | 'format.type' = 'csv') 10 | 11 | insert into csvSink select a.country,'111111qeq','false' from csv a -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Kafka2HiveSink.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE csvData ( 2 | user_name STRING, 3 | is_new BOOLEAN, 4 | content STRING, 5 | date_col STRING) WITH ( 6 | 'connector.type' = 'kafka', 7 | 'connector.version' = '0.10', 8 | 'connector.topic' = 'csv_data', 9 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 10 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 11 | 'connector.properties.group.id' = 'testGroup-1', 12 | 'connector.startup-mode' = 'earliest-offset', 13 | 'format.type' = 'csv') 14 | 15 | -- read from kafka, and then write to hive 16 | 17 | insert into myhive.hive_test.user_info_kafka select user_name, is_new, content from csvData -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2hbase/KafkaJoinHbaseJoinMysql2Hbase.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time 11 | ) WITH ( 12 | 'connector.type' = 'kafka', 13 | 'connector.version' = '0.10', 14 | 'connector.topic' = 'flink_orders3', 15 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 16 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 17 | 'connector.properties.group.id' = 'testGroup4', 18 | 'connector.startup-mode' = 'earliest-offset', 19 | 'format.type' = 'json', 20 | 'format.derive-schema' = 'true' 21 | ) 22 | 23 | CREATE TABLE country ( 24 | rowkey VARCHAR, 25 | f1 ROW 26 | ) WITH ( 27 | 'connector.type' = 'hbase', 28 | 'connector.version' = '1.4.3', 29 | 'connector.table-name' = 'country', 30 | 'connector.zookeeper.quorum' = 'localhost:2182', 31 | 'connector.zookeeper.znode.parent' = '/hbase' ) 32 | 33 | CREATE TABLE currency ( 34 | currency_id BIGINT, 35 | currency_name STRING, 36 | rate DOUBLE, 37 | currency_time TIMESTAMP(3), 38 | country STRING, 39 | timestamp9 TIMESTAMP(3), 40 | time9 TIME(3), 41 | gdp DOUBLE 42 | ) WITH ( 43 | 'connector.type' = 'jdbc', 44 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 45 | 'connector.username' = 'root', 'connector.table' = 'currency', 46 | 'connector.driver' = 'com.mysql.jdbc.Driver', 47 | 'connector.lookup.cache.max-rows' = '500', 48 | 'connector.lookup.cache.ttl' = '10s', 49 | 'connector.lookup.max-retries' = '3') 50 | 51 | 52 | CREATE TABLE gmv ( 53 | rowkey VARCHAR, 54 | f1 ROW 56 | ) WITH ( 57 | 'connector.type' = 'hbase', 58 | 'connector.version' = '1.4.3', 59 | 'connector.table-name' = 'gmv1', 60 | 'connector.zookeeper.quorum' = 'localhost:2182', 61 | 'connector.zookeeper.znode.parent' = '/hbase', 62 | 'connector.write.buffer-flush.max-size' = '10mb', 63 | 'connector.write.buffer-flush.max-rows' = '1000', 64 | 'connector.write.buffer-flush.interval' = '2s' ) 65 | 66 | 67 | insert into gmv select concat(log_ts,'_',item) as rowkey, 68 | ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1 from (select co.f1.country_name as country_name, co.f1.country_name_cn as country_name_cn, co.f1.region_name as region_name, co.f1.currency as currency, cast(TUMBLE_END(o.ts, INTERVAL '10' SECOND) as VARCHAR) as log_ts, 69 | o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE) as gmv 70 | from orders as o 71 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 72 | on o.currency = c.currency_name 73 | left outer join country FOR SYSTEM_TIME AS OF o.proc_time co 74 | on c.country = co.rowkey group by o.item, c.currency_time, c.rate, co.f1.country_name, co.f1.country_name_cn, co.f1.region_name, co.f1.currency, TUMBLE(o.ts, INTERVAL '10' SECOND)) a -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time 11 | ) WITH ( 12 | 'connector.type' = 'kafka', 13 | 'connector.version' = '0.10', 14 | 'connector.topic' = 'flink_orders2', 15 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 16 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 17 | 'connector.properties.group.id' = 'testGroup3', 18 | 'connector.startup-mode' = 'earliest-offset', 19 | 'format.type' = 'json', 20 | 'format.derive-schema' = 'true' 21 | ) 22 | 23 | CREATE TABLE currency ( 24 | currency_id BIGINT, 25 | currency_name STRING, 26 | rate DOUBLE, 27 | currency_time TIMESTAMP(3), 28 | country STRING, 29 | timestamp9 TIMESTAMP(3), 30 | time9 TIME(3), 31 | gdp DECIMAL(38, 18) 32 | ) WITH ( 33 | 'connector.type' = 'jdbc', 34 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 35 | 'connector.username' = 'root', 'connector.table' = 'currency', 36 | 'connector.driver' = 'com.mysql.jdbc.Driver', 37 | 'connector.lookup.cache.max-rows' = '500', 38 | 'connector.lookup.cache.ttl' = '10s', 39 | 'connector.lookup.max-retries' = '3') 40 | 41 | CREATE TABLE country ( 42 | rowkey VARCHAR, 43 | f1 ROW 44 | ,f2 ROW) WITH ( 45 | 'connector.type' = 'hbase', 46 | 'connector.version' = '1.4.3', 47 | 'connector.table-name' = 'country', 48 | 'connector.zookeeper.quorum' = 'localhost:2182', 49 | 'connector.zookeeper.znode.parent' = '/hbase' ) 50 | 51 | CREATE TABLE gmv ( 52 | rowkey VARCHAR, 53 | f1 ROW 54 | ) WITH ( 55 | 'connector.type' = 'hbase', 56 | 'connector.version' = '1.4.3', 57 | 'connector.table-name' = 'gmv', 58 | 'connector.zookeeper.quorum' = 'localhost:2182', 59 | 'connector.zookeeper.znode.parent' = '/hbase', 60 | 'connector.write.buffer-flush.max-size' = '10mb', 61 | 'connector.write.buffer-flush.max-rows' = '1000', 62 | 'connector.write.buffer-flush.interval' = '2s' ) 63 | 64 | insert into gmv 65 | select rowkey, ROW(max(ts), max(item), max(country_name)) as f1 66 | from (select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey, 67 | cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name 68 | from orders as o 69 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 70 | on o.currency = c.currency_name 71 | left outer join country FOR SYSTEM_TIME AS OF o.proc_time co 72 | on c.country = co.rowkey 73 | ) a group by rowkey 74 | 75 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2jdbc/KafkaJoinJdbc2Jdbc.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | WATERMARK FOR order_time AS order_time 10 | ) WITH ( 11 | 'connector.type' = 'kafka', 12 | 'connector.version' = '0.10', 13 | 'connector.topic' = 'flink_orders3', 14 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 15 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 16 | 'connector.properties.group.id' = 'test-jdbc', 17 | 'connector.startup-mode' = 'earliest-offset', 18 | 'format.type' = 'json', 19 | 'format.derive-schema' = 'true' 20 | ) 21 | 22 | CREATE TABLE currency ( 23 | currency_id BIGINT, 24 | currency_name STRING, 25 | rate DOUBLE, 26 | currency_time TIMESTAMP(3), 27 | country STRING, 28 | timestamp9 TIMESTAMP(3), 29 | time9 TIME(3), 30 | gdp DECIMAL(38, 18) 31 | ) WITH ( 32 | 'connector.type' = 'jdbc', 33 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 34 | 'connector.username' = 'root', 'connector.table' = 'currency', 35 | 'connector.driver' = 'com.mysql.jdbc.Driver', 36 | 'connector.lookup.cache.max-rows' = '500', 37 | 'connector.lookup.cache.ttl' = '10s', 38 | 'connector.lookup.max-retries' = '3') 39 | 40 | 41 | CREATE TABLE gmv ( 42 | log_per_min STRING, 43 | item STRING, 44 | order_cnt BIGINT, 45 | currency_time TIMESTAMP(3), 46 | gmv DECIMAL(38, 18), timestamp9 TIMESTAMP(3), 47 | time9 TIME(3), 48 | gdp DECIMAL(38, 18) 49 | ) WITH ( 50 | 'connector.type' = 'jdbc', 51 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 52 | 'connector.username' = 'root', 'connector.table' = 'gmv_table', 53 | 'connector.driver' = 'com.mysql.jdbc.Driver', 54 | 'connector.write.flush.max-rows' = '3', 55 | 'connector.write.flush.interval' = '120s', 56 | 'connector.write.max-retries' = '2') 57 | 58 | insert into gmv 59 | select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts, 60 | o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18)) as gmv, 61 | c.timestamp9, c.time9, c.gdp 62 | from orders as o 63 | join currency FOR SYSTEM_TIME AS OF o.proc_time c 64 | on o.currency = c.currency_name 65 | group by o.item, c.currency_time, c.rate, c.timestamp9, c.time9, c.gdp, TUMBLE(o.order_time, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2jdbc/UnboundedKafkaJoinJdbc2Jdbc.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time 11 | ) WITH ( 12 | 'connector.type' = 'kafka', 13 | 'connector.version' = '0.10', 14 | 'connector.topic' = 'flink_orders2', 15 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 16 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 17 | 'connector.properties.group.id' = 'testGroup3', 18 | 'connector.startup-mode' = 'earliest-offset', 19 | 'format.type' = 'json', 20 | 'format.derive-schema' = 'true' 21 | ) 22 | 23 | CREATE TABLE currency ( 24 | currency_id BIGINT, 25 | currency_name STRING, 26 | rate DOUBLE, 27 | currency_time TIMESTAMP(3), 28 | country STRING, 29 | timestamp9 TIMESTAMP(3), 30 | time9 TIME(3), 31 | gdp DECIMAL(38, 18) 32 | ) WITH ( 33 | 'connector.type' = 'jdbc', 34 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 35 | 'connector.username' = 'root', 36 | 'connector.table' = 'currency', 37 | 'connector.driver' = 'com.mysql.jdbc.Driver', 38 | 'connector.lookup.cache.max-rows' = '500', 39 | 'connector.lookup.cache.ttl' = '10s', 40 | 'connector.lookup.max-retries' = '3') 41 | CREATE TABLE gmv ( 42 | log_per_min STRING, 43 | item STRING, 44 | order_cnt BIGINT, 45 | currency_time TIMESTAMP(3), 46 | gmv DECIMAL(38, 18), timestamp9 TIMESTAMP(3), 47 | time9 TIME(3), 48 | gdp DECIMAL(38, 18) 49 | ) WITH ( 50 | 'connector.type' = 'jdbc', 51 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 52 | 'connector.username' = 'root', 53 | 'connector.table' = 'gmv', 54 | 'connector.driver' = 'com.mysql.jdbc.Driver', 55 | 'connector.write.flush.max-rows' = '5000', 56 | 'connector.write.flush.interval' = '2s', 57 | 'connector.write.max-retries' = '3') 58 | insert into gmv 59 | select max(log_ts), 60 | item, COUNT(order_id) as order_cnt, max(currency_time), cast(sum(amount_kg) * max(rate) as DOUBLE) as gmv, 61 | max(timestamp9), max(time9), max(gdp) 62 | from ( 63 | select cast(o.ts as VARCHAR) as log_ts, o.item as item, o.order_id as order_id, c.currency_time as currency_time, 64 | o.amount_kg as amount_kg, c.rate as rate, c.timestamp9 as timestamp9, c.time9 as time9, c.gdp as gdp 65 | from orders as o 66 | join currency FOR SYSTEM_TIME AS OF o.proc_time c 67 | on o.currency = c.currency_name 68 | ) a group by item 69 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaAvro2Kafka.sql: -------------------------------------------------------------------------------- 1 | -- first job: build avro format data from csv and write to kafka topic 2 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem', 3 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv', 4 | 'format.type' = 'csv') 5 | CREATE TABLE AvroTest ( 6 | user_name VARCHAR, 7 | is_new BOOLEAN, 8 | content VARCHAR) WITH ( 9 | 'connector.type' = 'kafka', 10 | 'connector.version' = '0.10', 11 | 'connector.topic' = 'avro_from_csv', 12 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 13 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 14 | 'connector.properties.group.id' = 'testGroup3', 15 | 'connector.startup-mode' = 'earliest-offset', 16 | 'format.type' = 'avro', 17 | 'format.record-class' = 'kafka.UserAvro' 18 | ) 19 | 20 | insert into AvroTest select user_name, is_new, content from csv 21 | 22 | -- second job: consume avro format data from kafka and write to another kafka topic 23 | 24 | CREATE TABLE AvroTest ( 25 | user_name VARCHAR, 26 | is_new BOOLEAN, 27 | content VARCHAR) WITH ( 28 | 'connector.type' = 'kafka', 29 | 'connector.version' = '0.10', 30 | 'connector.topic' = 'avro_from_csv', 31 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 32 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 33 | 'connector.properties.group.id' = 'testGroup4', 34 | 'connector.startup-mode' = 'earliest-offset', 35 | 'format.type' = 'avro', 36 | 'format.record-class' = 'kafka.UserAvro' 37 | ) 38 | 39 | CREATE TABLE WikipediaFeed_filtered ( 40 | user_name STRING, 41 | is_new BOOLEAN, 42 | content STRING) WITH ( 43 | 'connector.type' = 'kafka', 44 | 'connector.version' = '0.10', 45 | 'connector.topic' = 'WikipediaFeed2_filtered', 46 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 47 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 48 | 'connector.properties.group.id' = 'testGroup3', 49 | 'connector.startup-mode' = 'earliest-offset', 50 | 'format.type' = 'avro', 51 | 'format.avro-schema' = 52 | '{ 53 | "type": "record", 54 | "name": "UserAvro", 55 | "fields": [ 56 | {"name": "user_name", "type": "string"}, 57 | {"name": "is_new", "type": "boolean"}, 58 | {"name": "content", "type": "string"} 59 | ] 60 | }') 61 | 62 | insert into WikipediaFeed_filtered 63 | select user_name, is_new, content 64 | from AvroTest -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaCsv2Kafka.sql: -------------------------------------------------------------------------------- 1 | -- from csv data to kafka 2 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem', 3 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv', 4 | 'format.type' = 'csv') 5 | CREATE TABLE csvData ( 6 | user_name STRING, 7 | is_new BOOLEAN, 8 | content STRING) WITH ( 9 | 'connector.type' = 'kafka', 10 | 'connector.version' = '0.10', 11 | 'connector.topic' = 'csv_data', 12 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 13 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 14 | 'connector.properties.group.id' = 'testGroup3', 15 | 'connector.startup-mode' = 'earliest-offset', 16 | 'format.type' = 'csv') 17 | insert into csvData 18 | select user_name, is_new, content from 19 | csv 20 | 21 | -- from kafka to csv 22 | CREATE TABLE csvData ( 23 | user_name STRING, 24 | is_new BOOLEAN, 25 | content STRING) WITH ( 26 | 'connector.type' = 'kafka', 27 | 'connector.version' = '0.10', 28 | 'connector.topic' = 'csv_data', 29 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 30 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 31 | 'connector.properties.group.id' = 'testGroup4', 32 | 'connector.startup-mode' = 'earliest-offset', 33 | 'format.type' = 'csv') 34 | create table csvTest( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem', 35 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test.csv', 36 | 'format.type' = 'csv', 37 | 'update-mode' = 'append', 38 | 'format.fields.0.name' = 'user_name', 39 | 'format.fields.0.data-type' = 'STRING', 40 | 'format.fields.1.name' = 'is_new', 41 | 'format.fields.1.data-type' = 'BOOLEAN', 42 | 'format.fields.2.name' = 'content', 43 | 'format.fields.2.data-type' = 'STRING') 44 | insert into csvTest select user_name, is_new, content from csvData 45 | 46 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaJoinJdbc2Kafka.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time 11 | ) WITH ( 12 | 'connector.type' = 'kafka', 13 | 'connector.version' = '0.10', 14 | 'connector.topic' = 'flink_orders2', 15 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 16 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 17 | 'connector.properties.group.id' = 'testGroup3', 18 | 'connector.startup-mode' = 'earliest-offset', 19 | 'format.type' = 'json', 20 | 'format.derive-schema' = 'true' 21 | ) 22 | 23 | CREATE TABLE currency ( 24 | currency_id BIGINT, 25 | currency_name STRING, 26 | rate DOUBLE, 27 | currency_time TIMESTAMP(3), 28 | country STRING, 29 | timestamp9 TIMESTAMP(3), 30 | time9 TIME(3), 31 | gdp DECIMAL(38, 18) 32 | ) WITH ( 33 | 'connector.type' = 'jdbc', 34 | 'connector.url' = 'jdbc:mysql://localhost:3306/test', 35 | 'connector.username' = 'root', 'connector.table' = 'currency', 36 | 'connector.driver' = 'com.mysql.jdbc.Driver', 37 | 'connector.lookup.cache.max-rows' = '500', 38 | 'connector.lookup.cache.ttl' = '10s', 39 | 'connector.lookup.max-retries' = '3') 40 | CREATE TABLE gmv ( 41 | log_per_min STRING, 42 | item STRING, 43 | order_cnt BIGINT, 44 | currency_time TIMESTAMP(3), 45 | gmv DECIMAL(38, 18)) WITH ( 46 | 'connector.type' = 'kafka', 47 | 'connector.version' = '0.10', 48 | 'connector.topic' = 'gmv', 49 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 50 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 51 | 'format.type' = 'json', 52 | 'format.derive-schema' = 'true' 53 | ) 54 | insert into gmv 55 | select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_per_min, 56 | o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18)) as gmv 57 | from orders as o 58 | join currency FOR SYSTEM_TIME AS OF o.proc_time c 59 | on o.currency = c.currency_name 60 | group by o.item, c.currency_time,c.rate,TUMBLE(o.order_time, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.10/kafka2kafka/kafkaJson2kafka.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount DOUBLE, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time) WITH ( 11 | 'connector.type' = 'kafka', 12 | 'connector.version' = '0.10', 13 | 'connector.topic' = 'flink_orders', 14 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 15 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 16 | 'connector.properties.group.id' = 'testGroup3', 17 | 'connector.startup-mode' = 'earliest-offset', 18 | 'format.type' = 'json', 19 | 'format.derive-schema' = 'true' 20 | ) 21 | 22 | CREATE TABLE order_cnt ( 23 | log_per_min TIMESTAMP(3), 24 | item STRING, 25 | order_cnt BIGINT, 26 | total_quality BIGINT 27 | ) WITH ( 28 | 'connector.type' = 'kafka', 29 | 'connector.version' = '0.10', 30 | 'connector.topic' = 'order_cnt', 31 | 'update-mode' = 'append', 32 | 'connector.properties.zookeeper.connect' = 'localhost:2181', 33 | 'connector.properties.bootstrap.servers' = 'localhost:9092', 34 | 'format.type' = 'json', 35 | 'format.derive-schema' = 'true' 36 | ) 37 | insert into order_cnt 38 | select TUMBLE_END(order_time, INTERVAL '10' SECOND), 39 | item, COUNT(order_id) as order_cnt, CAST(sum(amount_kg) as BIGINT) as total_quality 40 | from orders 41 | group by item, TUMBLE(order_time, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/catalog/PgcatalogE2eTest.sql: -------------------------------------------------------------------------------- 1 | # create table by pgadmin 2 | CREATE TABLE public.primitive_table(id integer); 3 | CREATE TABLE bang.primitive_table(id integer); 4 | CREATE TABLE public.primitive_table(int integer, bytea bytea, short smallint, long bigint, real real, double_precision double precision, numeric numeric(10, 5), decimal decimal(10, 1), boolean boolean, text text, char char, character character(3), character_varying character varying(20), timestamp timestamp(5), date date,time time(0), default_numeric numeric, CONSTRAINT test_pk PRIMARY KEY (short, int)); 5 | CREATE TABLE public.primitive_table(int_arr integer[], bytea_arr bytea[], short_arr smallint[], long_arr bigint[], real_arr real[], double_precision_arr double precision[], numeric_arr numeric(10, 5)[], numeric_arr_default numeric[], decimal_arr decimal(10,2)[], boolean_arr boolean[], text_arr text[], char_arr char[], character_arr character(3)[], character_varying_arr character varying(20)[], timestamp_arr timestamp(5)[], date_arr date[], time_arr time(0)[]); 6 | CREATE TABLE public.primitive_table(f0 smallserial, f1 serial, f2 serial2, f3 serial4, f4 serial8, f5 bigserial); 7 | CREATE TABLE public.primitive_table2(int integer, bytea bytea, short smallint, long bigint, real real, double_precision double precision, numeric numeric(10, 5), decimal decimal(10, 1), boolean boolean, text text, char char, character character(3), character_varying character varying(20), timestamp timestamp(5), date date,time time(0), default_numeric numeric, CONSTRAINT test_pk1 PRIMARY KEY (short, int)); 8 | 9 | # insert test data 10 | insert into public.t1 values (1); 11 | insert into primitive_table values (1,'2',3,4,5.5,6.6,7.7,8.8,true,'a','b','c','d','2016-06-22 19:10:25','2015-01-01','00:51:02.746572', 500); 12 | insert into array_table values ('{1,2,3}','{2,3,4}','{3,4,5}','{4,5,6}','{5.5,6.6,7.7}','{6.6,7.7,8.8}','{7.7,8.8,9.9}','{8.8,9.9,10.10}','{9.9,10.10,11.11}','{true,false,true}','{a,b,c}','{b,c,d}','{b,c,d}','{b,c,d}','{"2016-06-22 19:10:25", "2019-06-22 19:10:25"}','{"2015-01-01", "2020-01-01"}','{"00:51:02.746572", "00:59:02.746572"}'); 13 | insert into serial_table values (32767,2147483647,32767,2147483647,9223372036854775807,9223372036854775807); 14 | 15 | # test in sql-client 16 | 17 | (1) config conf/sql-client-defaults.yaml 18 | catalogs: 19 | - name: mypg 20 | type: jdbc 21 | default-database: mydb 22 | username: postgres 23 | password: postgres 24 | base-url: jdbc:postgresql://localhost/ 25 | 26 | (2) add necessary dependency to /lib 27 | flink-connector-jdbc_2.11-1.12-SNAPSHOT.jar 28 | postgresql-42.2.9.jar 29 | 30 | (3) sql-client test 31 | Flink SQL> show tables; 32 | bang.primitive_table 33 | public.primitive_arr_table 34 | public.primitive_serial_table 35 | public.primitive_table 36 | public.primitive_table2 37 | public.simple_t1 38 | 39 | # test read/write 40 | Flink SQL> insert into `public.primitive_table2` select * from `public.primitive_table`; 41 | [INFO] Submitting SQL update statement to the cluster... 42 | [INFO] Table update statement has been successfully submitted to the cluster: 43 | Job ID: aa953b785dea9903acaf4caafa50987a 44 | 45 | #check result 46 | Flink SQL> select * from `public.primitive_table2`; 47 | [INFO] Result retrieval cancelled. 48 | -- int bytea short long real double_precision numeric 49 | -- 1 [50] 3 4 5.5 6.6 7.700000000000000000 50 | 51 | -- See FLINK-17948, sql-client bug 52 | Flink SQL> select * from `public.primitive_arr_table`; 53 | [ERROR] Could not execute SQL statement. Reason: 54 | org.apache.flink.table.planner.codegen.CodeGenException: Unsupported cast from 'ARRAY' to 'ARRAY'. -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/jdbc/kafka2mysql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE currency ( 2 | currency_id BIGINT, 3 | currency_name STRING, 4 | rate DOUBLE, 5 | currency_time TIMESTAMP(3), 6 | country STRING, 7 | timestamp9 TIMESTAMP(3), 8 | time9 TIME(3), 9 | gdp DECIMAL(38, 18) 10 | ) WITH ( 11 | 'connector' = 'jdbc', 12 | 'url' = 'jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8', 13 | 'username' = 'root', 14 | 'password' = '', 15 | 'table-name' = 'currency', 16 | 'driver' = 'com.mysql.jdbc.Driver') 17 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/jdbc/kafkajoinmysql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | WATERMARK FOR order_time AS order_time 10 | ) WITH ( 11 | 'connector' = 'kafka', 12 | 'topic' = 'flink_orders3', 13 | 'properties.zookeeper.connect' = 'localhost:2181', 14 | 'properties.bootstrap.servers' = 'localhost:9092', 15 | 'properties.group.id' = 'testGroup3', 16 | 'scan.startup.mode' = 'earliest-offset', 17 | 'format' = 'json' 18 | ); 19 | 20 | CREATE TABLE currency ( 21 | currency_id BIGINT, 22 | currency_name STRING, 23 | rate DOUBLE, 24 | currency_time TIMESTAMP(3), 25 | country STRING, 26 | timestamp9 TIMESTAMP(3), 27 | time9 TIME(3), 28 | gdp DECIMAL(38, 18) 29 | ) WITH ( 30 | 'connector' = 'jdbc', 31 | 'url' = 'jdbc:mysql://localhost:3306/test', 32 | 'username' = 'root', 33 | 'password' = '', 34 | 'table-name' = 'currency', 35 | 'driver' = 'com.mysql.jdbc.Driver', 36 | 'lookup.cache.max-rows' = '500', 37 | 'lookup.cache.ttl' = '3s', 38 | 'lookup.max-retries' = '3'); 39 | 40 | select o.order_id, o.item, c.currency_name, c.rate from orders as o 41 | join currency FOR SYSTEM_TIME AS OF o.proc_time c 42 | on o.currency = c.currency_name; 43 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2AppendEs.sql: -------------------------------------------------------------------------------- 1 | ## batch 2 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector' = 'filesystem', 3 | 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 4 | 'format' = 'csv') 5 | 6 | CREATE TABLE es_table ( 7 | aggId varchar , 8 | pageId varchar , 9 | ts varchar , 10 | expoCnt int , 11 | clkCnt int 12 | ) WITH ( 13 | 'connector' = 'elasticsearch-6', 14 | 'hosts' = 'http://localhost:9200', 15 | 'index' = 'usercase13', 16 | 'document-type' = '_doc', 17 | 'document-id.key-delimiter' = '$', 18 | 'sink.bulk-flush.interval' = '1000', 19 | 'format' = 'json' 20 | ) 21 | INSERT INTO es_table 22 | SELECT pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv 23 | 24 | 25 | ## streaming 26 | 27 | create table csv_user( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'type' = 'filesystem', 28 | 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv', 29 | 'format.type' = 'csv', 30 | 'format.fields.0.name' = 'user_name', 31 | 'format.fields.0.data-type' = 'STRING', 32 | 'format.fields.1.name' = 'is_new', 33 | 'format.fields.1.data-type' = 'BOOLEAN', 34 | 'format.fields.2.name' = 'content', 35 | 'format.fields.2.data-type' = 'STRING') 36 | CREATE TABLE kafka_user ( 37 | user_name STRING, 38 | is_new BOOLEAN, 39 | content STRING) WITH ( 40 | 'type' = 'kafka', 41 | 'version' = '0.10', 42 | 'topic' = 'kafka_user', 43 | 'properties.zookeeper.connect' = 'localhost:2181', 44 | 'properties.bootstrap.servers' = 'localhost:9092', 45 | 'properties.group.id' = 'testGroup3', 46 | 'startup-mode' = 'earliest-offset', 47 | 'format.type' = 'csv') 48 | insert into kafka_user 49 | select user_name, is_new, content from 50 | csv_user; 51 | 52 | CREATE TABLE es_user ( 53 | user_name STRING, 54 | is_new BOOLEAN, 55 | content STRING 56 | ) WITH ( 57 | 'type' = 'elasticsearch', 58 | 'version' = '7', 59 | 'hosts' = 'http://localhost:9200', 60 | 'index' = 'es_user', 61 | 'document-type' = '_doc', 62 | 'update-mode' = 'upsert', 63 | 'key-delimiter' = '$', 64 | 'key-null-literal' = 'n/a', 65 | 'bulk-flush.interval' = '1000', 66 | 'format.type' = 'json' 67 | ); 68 | insert into es_user 69 | select user_name, is_new, content from 70 | kafka_user; 71 | 72 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2DynamicIndexEs.sql: -------------------------------------------------------------------------------- 1 | create table csv1( pageId VARCHAR, eventId VARCHAR, recvTime TIMESTAMP(3)) with ( 'connector' = 'filesystem', 2 | 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 3 | 'format' = 'csv') 4 | 5 | CREATE TABLE append_test ( 6 | aggId varchar , 7 | pageId varchar , 8 | ts timestamp(3) , 9 | expoCnt int , 10 | clkCnt int 11 | ) WITH ( 12 | 'connector' = 'elasticsearch-7', 13 | 'hosts' = 'http://localhost:9200', 14 | 'index' = 'xudynamic-index-{clkCnt}', 15 | 'document-id.key-delimiter' = '$', 16 | 'sink.bulk-flush.interval' = '1000', 17 | 'format' = 'json' 18 | ); 19 | 20 | INSERT INTO append_test 21 | SELECT pageId,eventId,recvTime ts, 1, 1 from csv1; 22 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2UpsertEs.sql: -------------------------------------------------------------------------------- 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem', 2 | 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv', 3 | 'format.type' = 'csv', 4 | 'format.fields.0.name' = 'pageId', 5 | 'format.fields.0.data-type' = 'STRING', 6 | 'format.fields.1.name' = 'eventId', 7 | 'format.fields.1.data-type' = 'STRING', 8 | 'format.fields.2.name' = 'recvTime', 9 | 'format.fields.2.data-type' = 'STRING') 10 | 11 | CREATE TABLE test_upsert ( 12 | aggId varchar , 13 | pageId varchar , 14 | ts varchar , 15 | expoCnt bigint , 16 | clkCnt bigint 17 | ) WITH ( 18 | 'connector.type' = 'elasticsearch', 19 | 'connector.version' = '6', 20 | 'connector.hosts' = 'http://localhost:9200', 21 | 'connector.index' = 'flink_zhangle_pageview', 22 | 'connector.document-type' = '_doc', 23 | 'update-mode' = 'upsert', 24 | 'connector.key-delimiter' = '$', 25 | 'connector.key-null-literal' = 'n/a', 26 | 'connector.bulk-flush.interval' = '1000', 27 | 'format.type' = 'json' 28 | ) 29 | 30 | INSERT INTO test_upsert 31 | SELECT aggId, pageId, ts, 32 | count(case when eventId = 'exposure' then 1 else null end) as expoCnt, 33 | count(case when eventId = 'click' then 1 else null end) as clkCnt 34 | FROM 35 | ( 36 | SELECT 37 | 'ZL_001' as aggId, 38 | pageId, 39 | eventId, 40 | recvTime, 41 | ts2Date(recvTime) as ts 42 | from csv 43 | where eventId in ('exposure', 'click') 44 | ) as t1 45 | group by aggId, pageId, ts -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2hbase/KafkaJoinHbaseJoinMysql2Hbase.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | WATERMARK FOR order_time AS order_time 10 | ) WITH ( 11 | 'connector' = 'kafka', 12 | 'topic' = 'flink_orders3', 13 | 'properties.zookeeper.connect' = 'localhost:2181', 14 | 'properties.bootstrap.servers' = 'localhost:9092', 15 | 'properties.group.id' = 'testGroup3', 16 | 'scan.startup.mode' = 'earliest-offset', 17 | 'format' = 'json' 18 | ); 19 | 20 | CREATE TABLE currency ( 21 | currency_id BIGINT, 22 | currency_name STRING, 23 | rate DOUBLE, 24 | currency_time TIMESTAMP(3), 25 | country STRING, 26 | timestamp9 TIMESTAMP(3), 27 | time9 TIME(3), 28 | gdp DECIMAL(38, 18) 29 | ) WITH ( 30 | 'connector' = 'jdbc', 31 | 'url' = 'jdbc:mysql://localhost:3306/test', 32 | 'username' = 'root', 33 | 'table-name' = 'currency', 34 | 'password' = '', 35 | 'driver' = 'com.mysql.jdbc.Driver', 36 | 'lookup.cache.max-rows' = '500', 37 | 'lookup.cache.ttl' = '10s', 38 | 'lookup.max-retries' = '3'); 39 | 40 | CREATE TABLE country ( 41 | rowkey VARCHAR, 42 | f1 ROW 43 | ,f2 ROW) WITH ( 44 | 'connector' = 'hbase-1.4', 45 | 'table-name' = 'country', 46 | 'zookeeper.quorum' = 'localhost:2182', 47 | 'zookeeper.znode.parent' = '/hbase' ); 48 | 49 | CREATE TABLE gmv ( 50 | rowkey VARCHAR, 51 | f1 ROW 53 | ) WITH ( 54 | 'connector' = 'hbase-1.4', 55 | 'table-name' = 'gmv', 56 | 'zookeeper.quorum' = 'localhost:2182', 57 | 'zookeeper.znode.parent' = '/hbase', 58 | 'sink.buffer-flush.max-size' = '10mb', 59 | 'sink.buffer-flush.max-rows' = '1000', 60 | 'sink.buffer-flush.interval' = '2s' ); 61 | 62 | insert into gmv select concat(log_ts,'_',item) as rowkey, 63 | ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1 64 | from (select co.f1.country_name as country_name, co.f1.country_name_cn as country_name_cn, co.f1.region_name as region_name, co.f1.currency as currency, cast(TUMBLE_END(o.ts, INTERVAL '10' SECOND) as VARCHAR) as log_ts, 65 | o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE) as gmv 66 | from orders as o 67 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 68 | on o.currency = c.currency_name 69 | -- see FLINK-18072 70 | -- left outer join country FOR SYSTEM_TIME AS OF o.proc_time co 71 | -- on c.country = co.rowkey 72 | group by o.item, c.currency_time, c.rate, co.f1.country_name, co.f1.country_name_cn, co.f1.region_name, co.f1.currency, TUMBLE(o.ts, INTERVAL '10' SECOND)) a 73 | 74 | insert into gmv select concat(log_ts,'_',item) as rowkey, 75 | ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1 from ( 76 | select 'test' as country_name, 'test' as country_name_cn,'test' as region_name, 'test' as currency, cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts, 77 | o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE) as gmv 78 | from orders as o 79 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 80 | on o.currency = c.currency_name 81 | group by o.item, c.currency_time, c.rate, 'test', 'test', 'test', 'test', TUMBLE(o.order_time, INTERVAL '10' SECOND)) a 82 | 83 | 84 | -- result in hbase 85 | -- 2020-06-08 18:13:00.000_\xE9\x85\xB8\xE column=f1:region_name, timestamp=1591630452428, value=test 86 | -- 5\xA5\xB6 87 | -- 233 row(s) in 0.2560 seconds -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE orders ( 2 | order_id STRING, 3 | item STRING, 4 | currency STRING, 5 | amount INT, 6 | order_time TIMESTAMP(3), 7 | proc_time as PROCTIME(), 8 | amount_kg as amount * 1000, 9 | ts as order_time + INTERVAL '1' SECOND, 10 | WATERMARK FOR order_time AS order_time 11 | ) WITH ( 12 | 'connector' = 'kafka', 13 | 'topic' = 'flink_orders2', 14 | 'properties.zookeeper.connect' = 'localhost:2181', 15 | 'properties.bootstrap.servers' = 'localhost:9092', 16 | 'properties.group.id' = 'testGroup3', 17 | 'scan.startup.mode' = 'earliest-offset', 18 | 'format' = 'json' 19 | ); 20 | 21 | CREATE TABLE currency ( 22 | currency_id BIGINT, 23 | currency_name STRING, 24 | rate DOUBLE, 25 | currency_time TIMESTAMP(3), 26 | country STRING, 27 | timestamp9 TIMESTAMP(3), 28 | time9 TIME(3), 29 | gdp DECIMAL(38, 18) 30 | ) WITH ( 31 | 'connector' = 'jdbc', 32 | 'url' = 'jdbc:mysql://localhost:3306/test', 33 | 'username' = 'root', 34 | 'table-name' = 'currency', 35 | 'password' = '', 36 | 'driver' = 'com.mysql.jdbc.Driver', 37 | 'lookup.cache.max-rows' = '500', 38 | 'lookup.cache.ttl' = '10s', 39 | 'lookup.max-retries' = '3'); 40 | 41 | CREATE TABLE country ( 42 | rowkey VARCHAR, 43 | f1 ROW 44 | ,f2 ROW) WITH ( 45 | 'connector' = 'hbase-1.4', 46 | 'table-name' = 'country', 47 | 'zookeeper.quorum' = 'localhost:2182', 48 | 'zookeeper.znode.parent' = '/hbase' ); 49 | 50 | CREATE TABLE gmv1 ( 51 | rowkey VARCHAR, 52 | f1 ROW 53 | ) WITH ( 54 | 'connector' = 'hbase-1.4', 55 | 'table-name' = 'gmv', 56 | 'zookeeper.quorum' = 'localhost:2182', 57 | 'zookeeper.znode.parent' = '/hbase', 58 | 'sink.buffer-flush.max-size' = '10mb', 59 | 'sink.buffer-flush.max-rows' = '1000', 60 | 'sink.buffer-flush.interval' = '2s' ); 61 | 62 | 63 | 64 | insert into gmv1 65 | select rowkey, ROW(max(ts), max(item), max(country_name)) as f1 66 | from (select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey, 67 | cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name 68 | from orders as o 69 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 70 | on o.currency = c.currency_name 71 | -- see FLINK-18072 72 | -- left outer join country FOR SYSTEM_TIME AS OF o.proc_time co 73 | -- on c.country = co.rowkey 74 | ) a group by rowkey 75 | 76 | 77 | insert into gmv 78 | select rowkey, ROW(ts), max(item), max(country_name)) as f1 from 79 | select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey, 80 | cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name 81 | from orders as o 82 | left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c 83 | on o.currency = c.currency_name 84 | 85 | 86 | -- see FLINK-18072 87 | -- left outer join country FOR SYSTEM_TIME AS OF o.proc_time co 88 | -- on c.country = co.rowkey 89 | ) a group by rowkey 90 | 91 | -- 92 | -- result in hbase: 93 | -- 2020-06-08 18:12:53.061_Apple_\xE4\xBA\ column=f1:item, timestamp=1591611172859, value=Apple 94 | -- xBA\xE6\xB0\x91\xE5\xB8\x81 95 | -- 2020-06-08 18:12:53.061_Apple_\xE4\xBA\ column=f1:log_ts, timestamp=1591611172859, value=2020-06-08 18:12:53.061 96 | -- xBA\xE6\xB0\x91\xE5\xB8\x81 97 | -- 52 row(s) in 0.0280 seconds 98 | -- 99 | -- hbase(main):026:0> scan 'gmv' 100 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.11/kafka2hbase/hbase_cdc: -------------------------------------------------------------------------------- 1 | // prepare cdc data 2 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic product_binlog 3 | cat ~/sourcecode/project/flink-1.11/flink/flink-formats/flink-json/src/test/resources/debezium-data-schema-exclude.txt | bin/kafka-console-producer.sh --broker-list localhost:9092 --topic product_binlog 4 | bin/kafka-console-consumer.sh --topic product_binlog --bootstrap-server localhost:9092 --from-beginning 5 | 6 | // test write to hbase 7 | CREATE TABLE product_binlog ( 8 | id INT NOT NULL, 9 | name STRING, 10 | description STRING, 11 | weight DECIMAL(10,3) 12 | ) WITH ( 13 | 'connector' = 'kafka', 14 | 'topic' = 'product_binlog', 15 | 'properties.bootstrap.servers' = 'localhost:9092', 16 | 'scan.startup.mode' = 'earliest-offset', 17 | 'format' = 'debezium-json' 18 | ); 19 | 20 | CREATE TABLE hbase_product ( 21 | id INT NOT NULL PRIMARY KEY NOT ENFORCED, 22 | f1 ROW 23 | ) WITH ( 24 | 'connector' = 'hbase-2.2', 25 | 'table-name' = 'product1', 26 | 'zookeeper.quorum' = 'localhost:2181', 27 | 'zookeeper.znode.parent' = '/hbase', 28 | 'sink.buffer-flush.max-size' = '10mb', 29 | 'sink.buffer-flush.max-rows' = '1000', 30 | 'sink.buffer-flush.interval' = '2s' ); 31 | 32 | insert into hbase_product select id, ROW(name,description) from product_binlog; -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.12/hbase2_test.sql: -------------------------------------------------------------------------------- 1 | // prepare cdc data 2 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic product_binlog 3 | cat ~/sourcecode/project/flink-1.11/flink/flink-formats/flink-json/src/test/resources/debezium-data-schema-exclude.txt | bin/kafka-console-producer.sh --broker-list localhost:9092 --topic product_binlog 4 | bin/kafka-console-consumer.sh --topic product_binlog --bootstrap-server localhost:9092 --from-beginning 5 | 6 | // test write to hbase 7 | CREATE TABLE product_binlog1 ( 8 | id INT NOT NULL, 9 | name STRING, 10 | description STRING, 11 | weight DECIMAL(10,3) 12 | ) WITH ( 13 | 'connector' = 'kafka', 14 | 'topic' = 'product_binlog1', 15 | 'properties.bootstrap.servers' = 'localhost:9092', 16 | 'scan.startup.mode' = 'earliest-offset', 17 | 'format' = 'debezium-json' 18 | ); 19 | 20 | CREATE TABLE hbase_product ( 21 | id INT NOT NULL PRIMARY KEY NOT ENFORCED, 22 | f1 ROW 23 | ) WITH ( 24 | 'connector' = 'hbase-2.2', 25 | 'table-name' = 'product1', 26 | 'zookeeper.quorum' = 'localhost:2182', 27 | 'zookeeper.znode.parent' = '/hbase', 28 | 'sink.buffer-flush.max-size' = '10mb', 29 | 'sink.buffer-flush.max-rows' = '1000', 30 | 'sink.buffer-flush.interval' = '2s' ); 31 | 32 | insert into hbase_product select id, ROW(name,description) from product_binlog1; -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.12/hive_latest_dim.sql: -------------------------------------------------------------------------------- 1 | -- build local hive environment 2 | -- $:git clone git@github.com:big-data-europe/docker-hive.git 3 | -- $:cd docker-hive 4 | -- $:docker-compose up -d 5 | 6 | -- create hive partition table 7 | create table user_info_latest(user_name string, is_new boolean, content string) 8 | PARTITIONED BY (date_col STRING) TBLPROPERTIES ( 9 | -- using default partition-name order to load the latest partition every 12h (the most recommended and convenient way) 10 | 'streaming-source.enable' = 'true', 11 | 'streaming-source.partition.include' = 'latest', 12 | 'streaming-source.monitor-interval' = '10 s', 13 | 'streaming-source.partition-order' = 'partition-name' 14 | ); 15 | 16 | -- create kafka fact table 17 | CREATE TABLE kafkaTable ( 18 | user_name STRING, 19 | is_new BOOLEAN, 20 | content STRING, 21 | date_col STRING,proctime as PROCTIME()) WITH ( 22 | 'connector' = 'kafka', 23 | 'topic' = 'kafka_user', 24 | 'properties.zookeeper.connect' = 'localhost:2181', 25 | 'properties.bootstrap.servers' = 'localhost:9092', 26 | 'properties.group.id' = 'testCsv', 27 | 'scan.startup.mode' = 'earliest-offset', 28 | 'format' = 'csv'); 29 | 30 | create table test_csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR, date_col VARCHAR) with ( 31 | 'connector.type' = 'filesystem', 32 | 'connector.path' = '/opt/user_part.csv', 33 | 'format.type' = 'csv') 34 | 35 | -- join the latest hive partition 36 | select * from kafkaTable LEFT JOIN user_info_latest 37 | for system_time as of kafkaTable.proctime as h 38 | on kafkaTable.user_name = h.user_name; 39 | -------------------------------------------------------------------------------- /etl-job/src/main/resources/job-sql-1.12/upsert-kafka.sql: -------------------------------------------------------------------------------- 1 | 2 | -- create an upsert-kafka table 3 | CREATE TABLE pageviews_per_region ( 4 | region STRING, 5 | pv BIGINT, 6 | uv BIGINT, 7 | PRIMARY KEY (region) NOT ENFORCED 8 | ) WITH ( 9 | 'connector' = 'upsert-kafka', 10 | 'topic' = 'pageviews_per_region', 11 | 'properties.bootstrap.servers' = 'localhost:9092', 12 | 'key.format' = 'json', 13 | 'value.format' = 'json' 14 | ); 15 | 16 | -- write test data to upsert-kafka table 17 | insert into pageviews_per_region values('test1', 100, 20); 18 | insert into pageviews_per_region values('test2', 200, 20); 19 | insert into pageviews_per_region values('test1', 101, 20); 20 | 21 | 22 | -- check data has been writen into kafka 23 | ./bin/kafka-console-consumer.sh --topic pageviews_per_region --bootstrap-server localhost:9092 --from-beginning --property print.key=true --property key.separator="-" 24 | -- {"region":"test1"}-{"region":"test1","pv":100,"uv":20} 25 | -- {"region":"test2"}-{"region":"test2","pv":200,"uv":200} 26 | -- {"region":"test1"}-{"region":"test1","pv":101,"uv":20} 27 | 28 | -- Read upsert kafka in sql client, the key {"region":"test1"} should update with the new value 29 | select * from pageviews_per_region; 30 | -- region pv uv 31 | -- test2 200 200 32 | -- test1 101 20 -------------------------------------------------------------------------------- /etl-job/src/main/resources/pictures/CURRRENT_TIMESTAMP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/etl-job/src/main/resources/pictures/CURRRENT_TIMESTAMP.png -------------------------------------------------------------------------------- /etl-job/src/main/resources/readme.md: -------------------------------------------------------------------------------- 1 | we can use flink-sql-client to do all SQL tests. 2 | 3 | * (1) Add necessary connector jar to flink intall directory's lib, eg: if you want to test read from kafka and write to elasticsearch 4 | please add flink-sql-connector-kafka.jar and flink-sql-connector-elasticsearch.jar to lib. 5 | 6 | * (2) Set up necessary component like kafka cluster/elasticsearch cluster/mysql/hbase. 7 | 8 | * (3) start flink cluster, start sql-client. 9 | 10 | * (4) post related sql to sql-client to test. -------------------------------------------------------------------------------- /flink-demo/flink-jdbc-demo/docker-compose-flink-demo.yaml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | zookeeper: 4 | image: wurstmeister/zookeeper:3.4.6 5 | ports: 6 | - "2181:2181" 7 | kafka: 8 | image: wurstmeister/kafka:2.12-2.2.1 9 | ports: 10 | - "9092:9092" 11 | - "9094:9094" 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | - KAFKA_ADVERTISED_LISTENERS=INSIDE://:9094,OUTSIDE://localhost:9092 16 | - KAFKA_LISTENERS=INSIDE://:9094,OUTSIDE://:9092 17 | - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT 18 | - KAFKA_INTER_BROKER_LISTENER_NAME=INSIDE 19 | - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 20 | volumes: 21 | - /var/run/docker.sock:/var/run/docker.sock 22 | mysql: 23 | image: debezium/example-mysql:1.1 24 | ports: 25 | - 3306:3306 26 | environment: 27 | - MYSQL_ROOT_PASSWORD=debezium 28 | - MYSQL_USER=mysqluser 29 | - MYSQL_PASSWORD=mysqlpw 30 | postgres: 31 | image: debezium/example-postgres:1.1 32 | ports: 33 | - 5432:5432 34 | environment: 35 | - POSTGRES_USER=postgres 36 | - POSTGRES_PASSWORD=postgres 37 | connect: 38 | image: debezium/connect:1.1 39 | ports: 40 | - 8083:8083 41 | depends_on: 42 | - kafka 43 | - mysql 44 | environment: 45 | - BOOTSTRAP_SERVERS=kafka:9094 46 | - GROUP_ID=1 47 | - CONFIG_STORAGE_TOPIC=my_connect_configs 48 | - OFFSET_STORAGE_TOPIC=my_connect_offsets 49 | - STATUS_STORAGE_TOPIC=my_connect_statuses 50 | -------------------------------------------------------------------------------- /flink-demo/flink-jdbc-demo/flink-demo-udf.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/flink-demo/flink-jdbc-demo/flink-demo-udf.jar -------------------------------------------------------------------------------- /flink-demo/flink-temporal-join-demo/register-mysql.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mysql-inventory-connector", 3 | "config": { 4 | "connector.class": "io.debezium.connector.mysql.MySqlConnector", 5 | "tasks.max": "1", 6 | "tombstones.on.delete":"false", 7 | "database.hostname": "mysql", 8 | "database.port": "3306", 9 | "database.user": "debezium", 10 | "database.password": "dbz", 11 | "database.server.id": "184054", 12 | "database.server.name": "dbserver1", 13 | "database.whitelist": "inventory", 14 | "database.history.kafka.bootstrap.servers": "kafka:9094", 15 | "database.history.kafka.topic": "schema-changes.inventory" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /flink-demo/flink-temporal-join-demo/temporal-join-versioned-table.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | zookeeper: 4 | image: wurstmeister/zookeeper:3.4.6 5 | ports: 6 | - "2181:2181" 7 | kafka: 8 | image: wurstmeister/kafka:2.12-2.2.1 9 | ports: 10 | - "9092:9092" 11 | - "9094:9094" 12 | depends_on: 13 | - zookeeper 14 | environment: 15 | - KAFKA_ADVERTISED_LISTENERS=INSIDE://:9094,OUTSIDE://localhost:9092 16 | - KAFKA_LISTENERS=INSIDE://:9094,OUTSIDE://:9092 17 | - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT 18 | - KAFKA_INTER_BROKER_LISTENER_NAME=INSIDE 19 | - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 20 | volumes: 21 | - /var/run/docker.sock:/var/run/docker.sock 22 | mysql: 23 | image: debezium/example-mysql:1.1 24 | ports: 25 | - 3306:3306 26 | environment: 27 | - MYSQL_ROOT_PASSWORD=debezium 28 | - MYSQL_USER=mysqluser 29 | - MYSQL_PASSWORD=mysqlpw 30 | connect: 31 | image: debezium/connect:1.1 32 | ports: 33 | - 8083:8083 34 | depends_on: 35 | - kafka 36 | - mysql 37 | environment: 38 | - BOOTSTRAP_SERVERS=kafka:9094 39 | - GROUP_ID=1 40 | - CONFIG_STORAGE_TOPIC=my_connect_configs 41 | - OFFSET_STORAGE_TOPIC=my_connect_offsets 42 | - STATUS_STORAGE_TOPIC=my_connect_statuses 43 | -------------------------------------------------------------------------------- /flink-demo/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | flink-sql-etl 7 | org.example 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | flink-demo 13 | 14 | 15 | org.apache.flink 16 | flink-table-common 17 | ${flink.version} 18 | provided 19 | 20 | 21 | 22 | 23 | 24 | 25 | org.apache.maven.plugins 26 | maven-jar-plugin 27 | 3.0.0 28 | 29 | flink-demo-udf 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /flink-demo/src/main/java/udf/Int2DateUDF.java: -------------------------------------------------------------------------------- 1 | package udf;/* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.apache.flink.table.catalog.DataTypeFactory; 20 | import org.apache.flink.table.functions.ScalarFunction; 21 | import org.apache.flink.table.types.inference.TypeInference; 22 | 23 | import java.sql.Date; 24 | import java.time.LocalDate; 25 | 26 | public class Int2DateUDF extends ScalarFunction { 27 | 28 | public Date eval(int epochDay) { 29 | return Date.valueOf(LocalDate.ofEpochDay(epochDay)); 30 | } 31 | 32 | @Override 33 | public TypeInference getTypeInference(DataTypeFactory typeFactory) { 34 | return super.getTypeInference(typeFactory); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | 8 | 9 | 1.12-vvr-3.0.0-SNAPSHOT 10 | 2.11 11 | 2.8.3 12 | 3.1.0 13 | 14 | 15 | org.example 16 | flink-sql-etl 17 | pom 18 | 1.0-SNAPSHOT 19 | 20 | data-generator 21 | etl-job 22 | sql-avro 23 | flink-demo 24 | state-process 25 | 26 | 27 | 28 | 29 | org.scala-lang 30 | scala-library 31 | 2.11.12 32 | 33 | 34 | 35 | 36 | 37 | confluent 38 | https://mvnrepository.com/artifact/io.confluent/kafka-avro-serializer 39 | 40 | 41 | confluent1 42 | http://packages.confluent.io/maven/ 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /sql-avro/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | flink-sql-etl 7 | org.example 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | sql-avro 13 | 14 | 15 | 16 | 17 | 18 | org.apache.flink 19 | flink-table-common 20 | ${flink.version} 21 | 22 | 23 | org.apache.flink 24 | flink-table-planner-blink_${scala.binary.version} 25 | ${flink.version} 26 | 27 | 28 | org.apache.flink 29 | flink-clients_${scala.binary.version} 30 | ${flink.version} 31 | 32 | 33 | 34 | org.apache.flink 35 | flink-avro 36 | ${flink.version} 37 | 38 | 39 | org.apache.avro 40 | avro 41 | 1.8.2 42 | 43 | 44 | 45 | org.apache.flink 46 | flink-csv 47 | ${flink.version} 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /sql-avro/src/main/java/TestUserIssue12.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 20 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 21 | import org.apache.flink.types.Row; 22 | import org.apache.flink.util.CloseableIterator; 23 | 24 | public class TestUserIssue12 { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment); 28 | environment.setParallelism(1); 29 | 30 | //construct some test data with avro format 31 | //writeTestAvroData(tableEnvironment); 32 | 33 | tableEnvironment.executeSql("CREATE TABLE people (\n" + 34 | " name String," + 35 | " status Boolean," + 36 | " note STRING" + 37 | ") WITH (\n" + 38 | " 'connector' = 'filesystem',\n" + 39 | " 'path' = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" + 40 | " 'format' = 'avro'\n" + 41 | ")"); 42 | System.out.println("CREATE TABLE people (\n" + 43 | " name String," + 44 | " status Boolean," + 45 | " note STRING" + 46 | ") WITH (\n" + 47 | " 'connector' = 'filesystem',\n" + 48 | " 'path' = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" + 49 | " 'format' = 'avro'\n" + 50 | ")"); 51 | 52 | CloseableIterator result = tableEnvironment.executeSql("select * from people").collect(); 53 | while (result.hasNext()) { 54 | System.out.println(result.next()); 55 | } 56 | } 57 | 58 | private static void writeTestAvroData(StreamTableEnvironment tableEnvironment) throws Exception { 59 | String csvSourceDDL = "create table csv(" + 60 | " name String," + 61 | " status Boolean," + 62 | " note STRING" + 63 | ") with (" + 64 | " 'connector' = 'filesystem',\n" + 65 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',\n" + 66 | " 'format' = 'csv'" + 67 | ")"; 68 | String csvSink = "create table csvSink(" + 69 | " name String," + 70 | " status Boolean," + 71 | " note STRING" + 72 | ") with (" + 73 | " 'connector' = 'filesystem',\n" + 74 | " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" + 75 | " 'format' = 'avro'" + 76 | ")"; 77 | tableEnvironment.executeSql(csvSourceDDL); 78 | tableEnvironment.executeSql(csvSink); 79 | tableEnvironment.executeSql("insert into csvSink select * from csv").await(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /state-process/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | flink-sql-etl 7 | org.example 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | state-process 13 | 14 | 15 | 16 | org.apache.flink 17 | flink-state-processor-api_2.11 18 | ${flink.version} 19 | 20 | 21 | 22 | org.apache.flink 23 | flink-java 24 | ${flink.version} 25 | 26 | 27 | org.apache.flink 28 | flink-streaming-java_${scala.binary.version} 29 | ${flink.version} 30 | 31 | 32 | org.apache.flink 33 | flink-runtime_2.11 34 | ${flink.version} 35 | 36 | 37 | org.apache.flink 38 | flink-clients_2.11 39 | ${flink.version} 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /state-process/src/main/java/state/CdcSourceStateAnalysis.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package state; 20 | 21 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 22 | import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo; 23 | import org.apache.flink.api.java.DataSet; 24 | import org.apache.flink.api.java.ExecutionEnvironment; 25 | import org.apache.flink.configuration.CheckpointingOptions; 26 | import org.apache.flink.configuration.Configuration; 27 | import org.apache.flink.runtime.state.StateBackendLoader; 28 | import org.apache.flink.state.api.ExistingSavepoint; 29 | import org.apache.flink.state.api.Savepoint; 30 | 31 | import java.nio.charset.StandardCharsets; 32 | 33 | public class CdcSourceStateAnalysis { 34 | 35 | public static void main(String[] args) throws Exception { 36 | ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment(); 37 | bEnv.setParallelism(1); 38 | 39 | Configuration configuration = new Configuration(); 40 | configuration.setString(CheckpointingOptions.STATE_BACKEND.key(), "com.alibaba.flink.statebackend.GeminiStateBackendFactory"); 41 | ExistingSavepoint savepoint = Savepoint.load(bEnv, "/Users/bang/flink-cdc-debug", 42 | StateBackendLoader.loadStateBackendFromConfig(configuration, Thread.currentThread().getContextClassLoader(), null)); 43 | 44 | 45 | DataSet offsetStat = savepoint.readUnionState("6cdc5bb954874d922eaee11a8e7b5dd5", "offset-states", PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO); 46 | System.out.println(new String(offsetStat.collect().get(0), StandardCharsets.UTF_8) ); 47 | DataSet historyRecords = savepoint.readUnionState("6cdc5bb954874d922eaee11a8e7b5dd5", "history-records-states", BasicTypeInfo.STRING_TYPE_INFO); 48 | historyRecords.print(); 49 | 50 | bEnv.execute(""); 51 | } 52 | } 53 | --------------------------------------------------------------------------------