├── .gitignore
├── data-generator
    ├── pom.xml
    └── src
    │   └── main
    │       ├── java
    │           └── kafka
    │           │   ├── AvroSchemaRegistryTest.java
    │           │   ├── JsonCurrencySender.java
    │           │   ├── JsonOrderSender.java
    │           │   └── KafkaGenerator.java
    │       └── resources
    │           ├── avro
    │               └── UserAvro.avsc
    │           ├── dynamic_index.csv
    │           ├── hive_read.csv
    │               ├── 1
    │               ├── 2
    │               ├── 3
    │               └── 4
    │           ├── src.csv
    │           ├── test.csv
    │           ├── test.json
    │           ├── test1.csv
    │           ├── test15.csv
    │           ├── test_csv.csv
    │           ├── testdata.avro
    │           ├── user.avro
    │               └── part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0
    │           ├── user.csv
    │           ├── user19.json
    │           ├── user2.csv
    │           ├── user3.csv
    │           ├── user4.json
    │           └── user_part.csv
├── etl-job
    ├── pom.xml
    └── src
    │   └── main
    │       ├── java
    │           ├── Test.java
    │           ├── TestGen.java
    │           ├── constants
    │           │   ├── FlinkSqlConstants.java
    │           │   └── UnboundedFlinkSqlConstants.java
    │           ├── kafka
    │           │   └── UserAvro.java
    │           ├── kafka2es
    │           │   ├── Kafak2DynamicIndexEs.java
    │           │   ├── Kafka2AppendEs.java
    │           │   ├── Kafka2UpsertEs.java
    │           │   └── Kafka2dynamicEsSQL.java
    │           ├── kafka2file
    │           │   ├── EventTimeBucketAssigner.java
    │           │   ├── ReadHiveDataETL.java
    │           │   ├── StreamETLKafka2Hdfs.java
    │           │   ├── StreamETLKafka2HdfsSQL.java
    │           │   ├── TestCsv2Csv.java
    │           │   ├── TestCsv2Csv1.java
    │           │   ├── TestCsvError.java
    │           │   ├── TestFileSink.scala
    │           │   └── Write2Kafka.java
    │           ├── kafka2hbase
    │           │   ├── KafkaJoinHbaseJoinMysql2Hbase.java
    │           │   ├── TestHbase.java
    │           │   └── UnboundedKafkaJoinHbase2Hbase.java
    │           ├── kafka2jdbc
    │           │   ├── KafkaJoinJdbc2Jdbc.java
    │           │   ├── KafkaJoinJdbc2JdbcProc.java
    │           │   ├── TestJdbc.java
    │           │   ├── UnboundedKafkaJoinJdbc2Jdbc.java
    │           │   ├── retract2Mysql.java
    │           │   └── testNonExistedTable.java
    │           ├── kafka2kafka
    │           │   ├── ConsumeConfluentAvroTest.java
    │           │   ├── KafkaAvro2Kafka.java
    │           │   ├── KafkaCsv2Kafka.java
    │           │   ├── KafkaJoinJdbc2Kafka.java
    │           │   ├── KafkaJoinKafka2Kafka.java
    │           │   └── KafkaJson2Kafka.java
    │           ├── pge2e
    │           │   └── PgCatalogTest.java
    │           └── usercase
    │           │   ├── TestUserIssue.java
    │           │   ├── TestUserIssue10.java
    │           │   ├── TestUserIssue11.java
    │           │   ├── TestUserIssue12.java
    │           │   ├── TestUserIssue13.java
    │           │   ├── TestUserIssue14.java
    │           │   ├── TestUserIssue15.java
    │           │   ├── TestUserIssue16.java
    │           │   ├── TestUserIssue17.java
    │           │   ├── TestUserIssue18.java
    │           │   ├── TestUserIssue19.java
    │           │   ├── TestUserIssue2.java
    │           │   ├── TestUserIssue20.java
    │           │   ├── TestUserIssue21.java
    │           │   ├── TestUserIssue22.java
    │           │   ├── TestUserIssue3.java
    │           │   ├── TestUserIssue4.java
    │           │   ├── TestUserIssue5.java
    │           │   ├── TestUserIssue6.java
    │           │   ├── TestUserIssue7.java
    │           │   ├── TestUserIssue8.java
    │           │   └── TestUserIssue9.java
    │       └── resources
    │           ├── job-scripts
    │               └── kafak2kafka_etl_run.sh
    │           ├── job-sql-1.10
    │               ├── kafka2es
    │               │   ├── Kafka2AppendEs.sql
    │               │   ├── Kafka2DynamicIndexEs.sql
    │               │   └── Kafka2UpsertEs.sql
    │               ├── kafka2filesystemandhive
    │               │   ├── Csv2HivePartition.sql
    │               │   ├── Csv2HiveSink.sql
    │               │   ├── FileSystem2FileSystem.sql
    │               │   └── Kafka2HiveSink.sql
    │               ├── kafka2hbase
    │               │   ├── KafkaJoinHbaseJoinMysql2Hbase.sql
    │               │   └── UnboundedKafkaJoinHbase2Hbase.sql
    │               ├── kafka2jdbc
    │               │   ├── KafkaJoinJdbc2Jdbc.sql
    │               │   └── UnboundedKafkaJoinJdbc2Jdbc.sql
    │               └── kafka2kafka
    │               │   ├── KafkaAvro2Kafka.sql
    │               │   ├── KafkaCsv2Kafka.sql
    │               │   ├── KafkaJoinJdbc2Kafka.sql
    │               │   └── kafkaJson2kafka.sql
    │           ├── job-sql-1.11
    │               ├── catalog
    │               │   └── PgcatalogE2eTest.sql
    │               ├── jdbc
    │               │   ├── kafka2mysql.sql
    │               │   └── kafkajoinmysql.sql
    │               ├── kafka2es
    │               │   ├── Kafka2AppendEs.sql
    │               │   ├── Kafka2DynamicIndexEs.sql
    │               │   └── Kafka2UpsertEs.sql
    │               └── kafka2hbase
    │               │   ├── KafkaJoinHbaseJoinMysql2Hbase.sql
    │               │   ├── UnboundedKafkaJoinHbase2Hbase.sql
    │               │   └── hbase_cdc
    │           ├── job-sql-1.12
    │               ├── hbase2_test.sql
    │               ├── hive_latest_dim.sql
    │               └── upsert-kafka.sql
    │           ├── log4j.properties
    │           ├── pictures
    │               └── CURRRENT_TIMESTAMP.png
    │           └── readme.md
├── flink-demo
    ├── flink-jdbc-demo
    │   ├── README.md
    │   ├── docker-compose-flink-demo.yaml
    │   ├── flink-demo-udf.jar
    │   └── sql-client-defaults.yaml
    ├── flink-temporal-join-demo
    │   ├── README.md
    │   ├── register-mysql.json
    │   ├── sql-client-defaults.yaml
    │   └── temporal-join-versioned-table.yaml
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── udf
    │               └── Int2DateUDF.java
├── pom.xml
├── sql-avro
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── TestUserIssue12.java
└── state-process
    ├── pom.xml
    └── src
        └── main
            └── java
                ├── org
                    └── apache
                    │   └── flink
                    │       └── state
                    │           └── api
                    │               └── runtime
                    │                   └── metadata
                    │                       └── SavepointMetadata.java
                └── state
                    └── CdcSourceStateAnalysis.java


/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache
 2 | scalastyle-output.xml
 3 | .classpath
 4 | .idea
 5 | .metadata
 6 | .settings
 7 | .project
 8 | .version.properties
 9 | filter.properties
10 | logs.zip
11 | target
12 | tmp
13 | *.class
14 | *.iml
15 | *.swp
16 | *.jar
17 | !flink-demo-udf.jar
18 | *.zip
19 | *.log
20 | *.pyc
21 | .DS_Store
22 | build-target
23 | flink-end-to-end-tests/flink-datastream-allround-test/src/main/java/org/apache/flink/streaming/tests/avro/
24 | flink-formats/flink-avro/src/test/java/org/apache/flink/formats/avro/generated/
25 | flink-formats/flink-parquet/src/test/java/org/apache/flink/formats/parquet/generated/
26 | flink-runtime-web/web-dashboard/node/
27 | flink-runtime-web/web-dashboard/node_modules/
28 | flink-runtime-web/web-dashboard/web/
29 | flink-python/dist/
30 | flink-python/build/
31 | flink-python/pyflink.egg-info/
32 | flink-python/apache_flink.egg-info/
33 | flink-python/docs/_build
34 | flink-python/.tox/
35 | flink-python/dev/download
36 | flink-python/dev/.conda/
37 | flink-python/dev/log/
38 | flink-python/dev/.stage.txt
39 | flink-python/.eggs/
40 | atlassian-ide-plugin.xml
41 | out/
42 | /docs/api
43 | /docs/content
44 | /docs/.bundle
45 | /docs/.rubydeps
46 | /docs/ruby2/.bundle
47 | /docs/ruby2/.rubydeps
48 | /docs/.jekyll-metadata
49 | *.ipr
50 | *.iws
51 | tools/flink
52 | tools/flink-*
53 | tools/releasing/release
54 | tools/japicmp-output
55 | 


--------------------------------------------------------------------------------
/data-generator/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>flink-sql-etl</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>data-generator</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>org.apache.kafka</groupId>
16 |             <artifactId>kafka_2.11</artifactId>
17 |             <version>1.0.0</version>
18 |         </dependency>
19 |         <dependency>
20 |             <groupId>io.confluent</groupId>
21 |             <artifactId>kafka-avro-serializer</artifactId>
22 |             <version>5.3.0</version>
23 |         </dependency>
24 |         <dependency>
25 |             <groupId>org.apache.avro</groupId>
26 |             <artifactId>avro</artifactId>
27 |             <version>1.8.2</version>
28 |         </dependency>
29 |         <dependency>
30 |             <groupId>org.apache.avro</groupId>
31 |             <artifactId>avro-maven-plugin</artifactId>
32 |             <version>1.8.2</version>
33 |         </dependency>
34 | 
35 |     </dependencies>
36 | 
37 |     <build>
38 |         <plugins>
39 |             <plugin>
40 |                 <groupId>org.apache.avro</groupId>
41 |                 <artifactId>avro-maven-plugin</artifactId>
42 |                 <version>1.8.2</version>
43 |                 <executions>
44 |                     <execution>
45 |                         <phase>generate-sources</phase>
46 |                         <goals>
47 |                             <goal>schema</goal>
48 |                         </goals>
49 |                         <configuration>
50 |                             <sourceDirectory>src/main/resources/avro</sourceDirectory>
51 |                             <outputDirectory>${project.build.directory}/generated-sources</outputDirectory>
52 |                             <stringType>String</stringType>
53 |                         </configuration>
54 |                     </execution>
55 |                 </executions>
56 |             </plugin>
57 |         </plugins>
58 |     </build>
59 | </project>


--------------------------------------------------------------------------------
/data-generator/src/main/java/kafka/AvroSchemaRegistryTest.java:
--------------------------------------------------------------------------------
 1 | //package kafka;
 2 | //
 3 | //import io.confluent.kafka.serializers.KafkaAvroDeserializer;
 4 | //import io.confluent.kafka.serializers.KafkaAvroSerializer;
 5 | //import org.apache.kafka.clients.consumer.ConsumerConfig;
 6 | //import org.apache.kafka.clients.consumer.ConsumerRecord;
 7 | //import org.apache.kafka.clients.consumer.ConsumerRecords;
 8 | //import org.apache.kafka.clients.consumer.KafkaConsumer;
 9 | //import org.apache.kafka.clients.producer.KafkaProducer;
10 | //import org.apache.kafka.clients.producer.ProducerConfig;
11 | //import org.apache.kafka.clients.producer.ProducerRecord;
12 | //import org.apache.kafka.common.serialization.StringDeserializer;
13 | //import org.apache.kafka.common.serialization.StringSerializer;
14 | //
15 | //import java.io.IOException;
16 | //import java.text.DateFormat;
17 | //import java.text.SimpleDateFormat;
18 | //import java.util.Collections;
19 | //import java.util.Date;
20 | //import java.util.Properties;
21 | //import java.util.Random;
22 | //import java.util.stream.IntStream;
23 | //
24 | //public class AvroSchemaRegistryTest {
25 | //    public static final String WIKIPEDIA_FEED = "WikipediaFeed2_filtered";
26 | //
27 | //    public static void main(final String[] args) throws IOException {
28 | //        produceInputs();
29 | //        consumeOutput();
30 | //    }
31 | //
32 | //    private static void produceInputs() throws IOException {
33 | //        final String[] users = {"leonard", "bob", "joe", "damian", "tania", "phil", "sam", "lauren", "joseph"};
34 | //        final Properties props = new Properties();
35 | //        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
36 | //        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
37 | //        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
38 | //        props.put("schema.registry.url", "http://localhost:8081");
39 | //        final KafkaProducer<String, UserAvro> producer = new KafkaProducer<>(props);
40 | //        final Random random = new Random();
41 | //
42 | //        IntStream.range(0, 10)
43 | //                .mapToObj(value -> new UserAvro(users[random.nextInt(users.length)], true, "content"))
44 | //                .forEach(
45 | //                        record -> {
46 | //                            System.out.println(record.toString()) ;
47 | //                            producer.send(new ProducerRecord<>(WIKIPEDIA_FEED, record.getUserName(), record));
48 | //                        });
49 | //
50 | //        producer.flush();
51 | //    }
52 | //
53 | //    private static void consumeOutput() {
54 | //        final Properties consumerProperties = new Properties();
55 | //        consumerProperties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
56 | //        consumerProperties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
57 | //        consumerProperties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaAvroDeserializer.class);
58 | //        consumerProperties.put("schema.registry.url", "http://localhost:8081");
59 | //        consumerProperties.put(ConsumerConfig.GROUP_ID_CONFIG, "wikipedia-feed-example-consumer3");
60 | //        consumerProperties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
61 | //        final KafkaConsumer<String, UserAvro> consumer = new KafkaConsumer<>(consumerProperties);
62 | //        consumer.subscribe(Collections.singleton(WIKIPEDIA_FEED));
63 | //        while (true) {
64 | //            final ConsumerRecords<String, UserAvro> consumerRecords = consumer.poll(Long.MAX_VALUE);
65 | //            for (final ConsumerRecord<String, UserAvro> consumerRecord : consumerRecords) {
66 | //
67 | //                System.out.println(consumerRecord.key() + "=" + consumerRecord.value());
68 | //            }
69 | //        }
70 | //    }
71 | //
72 | //}
73 | 


--------------------------------------------------------------------------------
/data-generator/src/main/java/kafka/JsonCurrencySender.java:
--------------------------------------------------------------------------------
 1 | package kafka;
 2 | 
 3 | import com.fasterxml.jackson.core.JsonProcessingException;
 4 | import com.fasterxml.jackson.databind.ObjectMapper;
 5 | import org.apache.kafka.clients.producer.Callback;
 6 | import org.apache.kafka.clients.producer.KafkaProducer;
 7 | import org.apache.kafka.clients.producer.ProducerRecord;
 8 | import org.apache.kafka.clients.producer.RecordMetadata;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | import java.text.DateFormat;
13 | import java.text.SimpleDateFormat;
14 | import java.util.Date;
15 | import java.util.HashMap;
16 | import java.util.Map;
17 | import java.util.Properties;
18 | 
19 | public class JsonCurrencySender {
20 |     private static final Logger logger = LoggerFactory.getLogger(JsonCurrencySender.class);
21 |     private static final ObjectMapper objectMapper = new ObjectMapper();
22 |     private static final SendCallBack sendCallBack = new SendCallBack();
23 |     private static final String topicName = "flink_currency1";
24 |     private static final Map<String, Integer> currency2rates = initCurrency2rates();
25 |     private static final Map<String, String> country2currency = initCountry2Currency();
26 | 
27 |     public static void sendMessage(Properties kafkaProperties, int continueMinutes) throws InterruptedException, JsonProcessingException {
28 |         KafkaProducer<Object, Object> producer = new KafkaProducer<>(kafkaProperties);
29 |         //update currency per 30 seconds
30 |         for (int i = 0; i < (continueMinutes * 60 / 30); i++) {
31 |             long timestart = System.currentTimeMillis();
32 |             for (Map.Entry<String, String> entry : country2currency.entrySet()) {
33 |                 Map<String, Object> map = new HashMap<>();
34 |                 map.put("country", entry.getKey());
35 |                 map.put("currency", entry.getValue());
36 |                 map.put("rate", currency2rates.get(entry.getValue()) + 1);
37 |                 DateFormat dateFormat =  new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
38 |                 Long time = System.currentTimeMillis();
39 |                 Date date = new Date(time);
40 |                 String jsonSchemaDate = dateFormat.format(date);
41 |                 map.put("currency_time", jsonSchemaDate);
42 |                 producer.send(
43 |                         new ProducerRecord<>(
44 |                                 topicName,
45 |                                 String.valueOf(time),
46 |                                 objectMapper.writeValueAsString(map)
47 |                         ), sendCallBack
48 | 
49 |                 );
50 |             }
51 |             long timecast = System.currentTimeMillis() - timestart;
52 |             System.out.println((i + 1) * currency2rates.size() + " has sent to topic:[" + topicName + "] in " + timecast + "ms");
53 |             if (timecast < 30 * 1000) {
54 |                 Thread.sleep(30 * 1000 - timecast);
55 |             }
56 |         }
57 |     }
58 | 
59 |     static class SendCallBack implements Callback {
60 | 
61 |         @Override
62 |         public void onCompletion(RecordMetadata recordMetadata, Exception e) {
63 |             if (e != null) {
64 |                 logger.error(e.getMessage(), e);
65 |             }
66 |         }
67 |     }
68 | 
69 |     private static Map<String, Integer> initCurrency2rates() {
70 |         final Map<String, Integer> map = new HashMap<>();
71 |         map.put("US Dollar", 102);
72 |         map.put("Euro", 114);
73 |         map.put("Yen", 1);
74 |         map.put("RMB", 16);
75 |         return map;
76 |     }
77 | 
78 |     private static Map<String, String> initCountry2Currency() {
79 |         final Map<String, String> map = new HashMap<>();
80 |         map.put("America", "US Dollar");
81 |         map.put("German", "Euro");
82 |         map.put("Japan", "Yen");
83 |         map.put("China", "RMB");
84 |         return map;
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/data-generator/src/main/java/kafka/JsonOrderSender.java:
--------------------------------------------------------------------------------
 1 | package kafka;
 2 | 
 3 | import com.fasterxml.jackson.core.JsonProcessingException;
 4 | import com.fasterxml.jackson.databind.ObjectMapper;
 5 | import org.apache.kafka.clients.producer.Callback;
 6 | import org.apache.kafka.clients.producer.KafkaProducer;
 7 | import org.apache.kafka.clients.producer.ProducerRecord;
 8 | import org.apache.kafka.clients.producer.RecordMetadata;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | import java.text.DateFormat;
13 | import java.text.SimpleDateFormat;
14 | import java.util.ArrayList;
15 | import java.util.Date;
16 | import java.util.HashMap;
17 | import java.util.List;
18 | import java.util.Map;
19 | import java.util.Properties;
20 | import java.util.Random;
21 | 
22 | public class JsonOrderSender {
23 |     private static final Logger logger = LoggerFactory.getLogger(JsonCurrencySender.class);
24 |     private static final ObjectMapper objectMapper = new ObjectMapper();
25 |     private static final Random random = new Random();
26 |     private static final SendCallBack sendCallBack = new SendCallBack();
27 |     private static final String topicName = "flink_orders3";
28 |     private static final List<String> currencies = initCurrencies();
29 |     private static final List<String> itemNames = initItemNames();
30 | 
31 |     public static synchronized void sendMessage(Properties kafkaProperties, int continueMinutes) throws InterruptedException, JsonProcessingException {
32 |         KafkaProducer<Object, Object> producer = new KafkaProducer<>(kafkaProperties);
33 |         // order stream
34 |         for (int i = 0; i < continueMinutes * 60; i++) {
35 |             long timestart = System.currentTimeMillis();
36 |             for (int j = 0; j < currencies.size(); j++) {
37 |                 Map<String, Object> map = new HashMap<>();
38 |                 map.put("order_id", System.currentTimeMillis() + "_" + random.nextInt());
39 |                 map.put("item", itemNames.get(random.nextInt(itemNames.size())  % itemNames.size()));
40 |                 map.put("currency", currencies.get(j % currencies.size()));
41 |                 map.put("amount", j % 100 / 100.0);
42 |                 Long time = System.currentTimeMillis();
43 |                 DateFormat dateFormat =  new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
44 |                 Date date = new Date(time);
45 |                 String jsonSchemaDate = dateFormat.format(date);
46 |                 map.put("order_time", jsonSchemaDate);
47 |                 producer.send(
48 |                         new ProducerRecord<>(
49 |                                 topicName,
50 |                                 String.valueOf(time),
51 |                                 objectMapper.writeValueAsString(map)
52 |                         ), sendCallBack
53 | 
54 |                 );
55 |                 Thread.sleep(5);
56 | 
57 |             }
58 |             long timecast = System.currentTimeMillis() - timestart;
59 |             System.out.println((i + 1) * currencies.size() + " has sended to topic:[" + topicName + "] in " + timecast + "ms");
60 |             if (timecast < 2000) {
61 |                 System.out.println("begin sleep...." + System.currentTimeMillis());
62 |                 Thread.sleep(2000);
63 |                 System.out.println("end sleep...." + System.currentTimeMillis());
64 | 
65 |             }
66 |         }
67 |     }
68 | 
69 |     static class SendCallBack implements Callback {
70 | 
71 |         @Override
72 |         public void onCompletion(RecordMetadata recordMetadata, Exception e) {
73 |             if (e != null) {
74 |                 logger.error(e.getMessage(), e);
75 |             }
76 |         }
77 |     }
78 | 
79 |     private static List<String> initCurrencies() {
80 |         final List<String> currencies = new ArrayList<>();
81 |         currencies.add("US Dollar");
82 |         currencies.add("Euro");
83 |         currencies.add("Yen");
84 |         currencies.add("人民币");
85 |         return currencies;
86 |     }
87 | 
88 |     private static List<String> initItemNames() {
89 |         final List<String> itermNames = new ArrayList<>();
90 |         itermNames.add("Apple");
91 |         itermNames.add("橘子");
92 |         itermNames.add("Paper");
93 |         itermNames.add("牛奶");
94 |         itermNames.add("酸奶");
95 |         itermNames.add("豆腐");
96 |         return itermNames;
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/data-generator/src/main/java/kafka/KafkaGenerator.java:
--------------------------------------------------------------------------------
 1 | package kafka;
 2 | 
 3 | import com.fasterxml.jackson.core.JsonProcessingException;
 4 | import org.apache.kafka.clients.producer.ProducerConfig;
 5 | import org.apache.kafka.common.serialization.StringSerializer;
 6 | 
 7 | import java.util.Properties;
 8 | 
 9 | public class KafkaGenerator {
10 |     public static void main(String[] args) throws JsonProcessingException, InterruptedException {
11 |         Properties kafkaProperties = new Properties();
12 |         kafkaProperties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
13 |         kafkaProperties.put(ProducerConfig.ACKS_CONFIG, "all");
14 |         kafkaProperties.put(ProducerConfig.RETRIES_CONFIG, "0");
15 |         kafkaProperties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
16 |         kafkaProperties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
17 |         kafkaProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
18 |         kafkaProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
19 |         kafkaProperties.put(ProducerConfig.BATCH_SIZE_CONFIG, "163840");
20 |         kafkaProperties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, "100000");
21 | 
22 | //        Thread thread1 = new Thread(new Runnable() {
23 | //            @Override
24 | //            public void run() {
25 | //                try {
26 | //                    JsonCurrencySender.sendMessage(kafkaProperties, 1);
27 | //                } catch (Exception e) {
28 | //                    e.printStackTrace();
29 | //                }
30 | //            }
31 | //        });
32 | //        thread1.start();
33 | 
34 |         Thread thread2 = new Thread(new Runnable() {
35 |             @Override
36 |             public void run() {
37 |                 try {
38 |                     JsonOrderSender.sendMessage(kafkaProperties,  3);
39 |                 } catch (Exception e) {
40 |                     e.printStackTrace();
41 |                 }
42 |             }
43 |         });
44 |        thread2.start();
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/avro/UserAvro.avsc:
--------------------------------------------------------------------------------
1 | {"namespace": "kafka",
2 |  "type": "record",
3 |  "name": "UserAvro",
4 |  "fields": [
5 |      {"name": "user_name", "type": "string"},
6 |      {"name": "is_new", "type": "boolean"},
7 |      {"name": "content", "type": "string"}
8 |  ]
9 | }


--------------------------------------------------------------------------------
/data-generator/src/main/resources/dynamic_index.csv:
--------------------------------------------------------------------------------
1 | 1,apple,1584504734000,2020-03-18,2020-03-18 12:12:14.0,test1
2 | 2,peanut,1584591134000,2020-03-19,2020-03-19 12:22:21.0,test2
3 | 3,apple,1584504736000,2020-03-20,2020-03-18 12:12:14.0,test3
4 | 4,peanut,1584591138000,2020-03-19,2020-03-19 12:22:21.0,test4


--------------------------------------------------------------------------------
/data-generator/src/main/resources/hive_read.csv/1:
--------------------------------------------------------------------------------
1 | sam,true,content
2 | leonard,false,content
3 | 雪尽,true,content
4 | leonard2,true,contentsamd
5 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/hive_read.csv/2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/hive_read.csv/2


--------------------------------------------------------------------------------
/data-generator/src/main/resources/hive_read.csv/3:
--------------------------------------------------------------------------------
1 | sam,true,content
2 | leonard,false,content
3 | 雪尽,true,content
4 | leonard2,true,content
5 | samd,false,content_test
6 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/hive_read.csv/4:
--------------------------------------------------------------------------------
1 | sam,true,content
2 | leonard,false,content
3 | 雪尽,true,content
4 | leonard2,true,content
5 | sam,true,content
6 | leonard,false,content
7 | 雪尽,true,content
8 | leonard2,true,contentsamd
9 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/src.csv:
--------------------------------------------------------------------------------
1 | 1|aavb
2 | 2|dadsaf


--------------------------------------------------------------------------------
/data-generator/src/main/resources/test.csv:
--------------------------------------------------------------------------------
1 | 1,abc,China,2019-12-20 12:22:00.1234,1234.23
2 | 2,,America,2019-12-20 12:22:00.1234,1234.23
3 | ,,Japan,2019-12-20 12:22:00.1234,1234.23


--------------------------------------------------------------------------------
/data-generator/src/main/resources/test.json:
--------------------------------------------------------------------------------
1 | //{"w_es":1589870637000,"w_type":"INSERT","w_isDdl":false,"w_data":[{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.0"}],"w_ts":"2020-05-20T13:58:37.131Z","w_table":"cccc111"}
2 | {"w_es":1589870637000,"w_type":"INSERT","w_isDdl":false,"w_data":[{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.1"},{"pay_info":"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00","online_fee":"89.0","sign":"00","account_pay_fee":"0.0"}],"w_ts":"2020-05-20T13:58:37.131Z","w_table":"cccc111"}


--------------------------------------------------------------------------------
/data-generator/src/main/resources/test1.csv:
--------------------------------------------------------------------------------
1 | 1,10,Hello-1,100,1.01,false,Welt-1,2019-08-18 19:00:00.0,2019-08-18,19:00:00,2019-08-18 19:00:00.000000001,123456.0001
2 | 2,20,Hello-2,200,2.02,true,Welt-2,2019-08-18 19:01:00.0,2019-08-18,19:01:00,2019-08-18 19:00:00.000000002,123456.1234
3 | 3,30,Hello-3,300,3.03,false,Welt-3,2019-08-18 19:02:00.0,2019-08-18,19:02:00,2019-08-18 19:00:00.000000003,123456.1000
4 | 4,40,,400,4.04,true,Welt-4,2019-08-18 19:03:00.0,2019-08-18,19:03:00,2019-08-18 19:00:00.400000000,123456.2345


--------------------------------------------------------------------------------
/data-generator/src/main/resources/test15.csv:
--------------------------------------------------------------------------------
 1 | 0|HeadQuarters|0|HQ|0|1 Alameda Way|Alameda|CA|55555|USA||||||||||false|false|false|false|false
 2 | 1|Supermarket|28|Store 1|1|2853 Bailey Rd|Acapulco|Guerrero|55555|Mexico|Jones|262-555-5124|262-555-5121|1982-01-09 00:00:00|1990-12-05 00:00:00|23593|17475|3671|2447|false|false|false|false|false
 3 | 10|Supermarket|24|Store 10|10|7894 Rotherham Dr|Orizaba|Veracruz|55555|Mexico|Merz|212-555-4774|212-555-4771|1979-04-13 00:00:00|1982-01-30 00:00:00|34791|26354|5062|3375|false|false|true|true|false
 4 | 11|Supermarket|22|Store 11|11|5371 Holland Circle|Portland|OR|55555|USA|Erickson|685-555-8995|685-555-8991|1976-09-17 00:00:00|1982-05-15 00:00:00|20319|16232|2452|1635|false|false|false|false|false
 5 | 12|Deluxe Supermarket|25|Store 12|12|1120 Westchester Pl|Hidalgo|Zacatecas|55555|Mexico|Kalman|151-555-1702|151-555-1701|1968-03-25 00:00:00|1993-12-18 00:00:00|30584|21938|5188|3458|true|true|true|true|true
 6 | 13|Deluxe Supermarket|23|Store 13|13|5179 Valley Ave|Salem|OR|55555|USA|Inmon|977-555-2724|977-555-2721|1957-04-13 00:00:00|1997-11-10 00:00:00|27694|18670|5415|3610|true|true|true|true|true
 7 | 14|Small Grocery|1|Store 14|14|4365 Indigo Ct|San Francisco|CA|55555|USA|Strehlo|135-555-4888|135-555-4881|1957-11-24 00:00:00|1958-01-07 00:00:00|22478|15321|4294|2863|true|false|false|false|false
 8 | 15|Supermarket|18|Store 15|15|5006 Highland Drive|Seattle|WA|55555|USA|Ollom|893-555-1024|893-555-1021|1969-07-24 00:00:00|1973-10-19 00:00:00|21215|13305|4746|3164|true|false|false|false|false
 9 | 16|Supermarket|87|Store 16|16|5922 La Salle Ct|Spokane|WA|55555|USA|Mantle|643-555-3645|643-555-3641|1974-08-23 00:00:00|1977-07-13 00:00:00|30268|22063|4923|3282|false|false|false|false|false
10 | 17|Deluxe Supermarket|84|Store 17|17|490 Risdon Road|Tacoma|WA|55555|USA|Mays|855-555-5581|855-555-5581|1970-05-30 00:00:00|1976-06-23 00:00:00|33858|22123|7041|4694|true|false|true|true|true
11 | 18|Mid-Size Grocery|25|Store 18|18|6764 Glen Road|Hidalgo|Zacatecas|55555|Mexico|Brown|528-555-8317|528-555-8311|1969-06-28 00:00:00|1975-08-30 00:00:00|38382|30351|4819|3213|false|false|false|false|false
12 | 19|Deluxe Supermarket|5|Store 19|19|6644 Sudance Drive|Vancouver|BC|55555|Canada|Ruth|862-555-7395|862-555-7391|1977-03-27 00:00:00|1990-10-25 00:00:00|23112|16418|4016|2678|true|true|true|true|true
13 | 2|Small Grocery|78|Store 2|2|5203 Catanzaro Way|Bellingham|WA|55555|USA|Smith|605-555-8203|605-555-8201|1970-04-02 00:00:00|1973-06-04 00:00:00|28206|22271|3561|2374|true|false|false|false|false
14 | 20|Mid-Size Grocery|6|Store 20|20|3706 Marvelle Ln|Victoria|BC|55555|Canada|Cobb|897-555-1931|897-555-1931|1980-02-06 00:00:00|1987-04-09 00:00:00|34452|27463|4193|2795|true|false|false|false|true
15 | 21|Deluxe Supermarket|106|Store 21|21|4093 Steven Circle|San Andres|DF|55555|Mexico|Jones|493-555-4781|493-555-4781|1986-02-07 00:00:00|1990-04-16 00:00:00|||||true|false|true|true|true
16 | 22|Small Grocery|88|Store 22|22|9606 Julpum Loop|Walla Walla|WA|55555|USA|Byrg|881-555-5117|881-555-5111|1951-01-24 00:00:00|1969-10-17 00:00:00|||||false|false|false|false|false
17 | 23|Mid-Size Grocery|89|Store 23|23|3920 Noah Court|Yakima|WA|55555|USA|Johnson|170-555-8424|170-555-8421|1977-07-16 00:00:00|1987-07-24 00:00:00|||||false|false|false|false|false
18 | 24|Supermarket|7|Store 24|24|2342 Waltham St.|San Diego|CA|55555|USA|Byrd|111-555-0303|111-555-0304|1979-05-22 00:00:00|1986-04-20 00:00:00|||||true|false|true|false|true
19 | 3|Supermarket|76|Store 3|3|1501 Ramsey Circle|Bremerton|WA|55555|USA|Davis|509-555-1596|509-555-1591|1959-06-14 00:00:00|1967-11-19 00:00:00|39696|24390|9184|6122|false|false|true|true|false
20 | 4|Gourmet Supermarket|27|Store 4|4|433 St George Dr|Camacho|Zacatecas|55555|Mexico|Johnson|304-555-1474|304-555-1471|1994-09-27 00:00:00|1995-12-01 00:00:00|23759|16844|4149|2766|true|false|true|true|true
21 | 5|Small Grocery|4|Store 5|5|1250 Coggins Drive|Guadalajara|Jalisco|55555|Mexico|Green|801-555-4324|801-555-4321|1978-09-18 00:00:00|1991-06-29 00:00:00|24597|15012|5751|3834|true|false|false|false|false
22 | 6|Gourmet Supermarket|47|Store 6|6|5495 Mitchell Canyon Road|Beverly Hills|CA|55555|USA|Maris|958-555-5002|958-555-5001|1981-01-03 00:00:00|1991-03-13 00:00:00|23688|15337|5011|3340|true|true|true|true|true
23 | 7|Supermarket|3|Store 7|7|1077 Wharf Drive|Los Angeles|CA|55555|USA|White|477-555-7967|477-555-7961|1971-05-21 00:00:00|1981-10-20 00:00:00|23598|14210|5633|3755|false|false|false|false|true
24 | 8|Deluxe Supermarket|26|Store 8|8|3173 Buena Vista Ave|Merida|Yucatan|55555|Mexico|Williams|797-555-3417|797-555-3411|1958-09-23 00:00:00|1967-11-18 00:00:00|30797|20141|6393|4262|true|true|true|true|true
25 | 9|Mid-Size Grocery|2|Store 9|9|1872 El Pintado Road|Mexico City|DF|55555|Mexico|Stuber|439-555-3524|439-555-3521|1955-03-18 00:00:00|1959-06-07 00:00:00|36509|22450|8435|5624|false|false|false|false|false


--------------------------------------------------------------------------------
/data-generator/src/main/resources/test_csv.csv:
--------------------------------------------------------------------------------
1 | sam,true,content
2 | leonard,false,content
3 | leonard1,true,content
4 | leonard2,true,content
5 | sam,true,content
6 | leonard,false,content
7 | 雪尽,true,content
8 | leonard2,true,content
9 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/testdata.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/testdata.avro


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user.avro/part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/user.avro/part-6be7eb15-4ec0-4ff8-aa29-59d5ec37dfae-0-0


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user.csv:
--------------------------------------------------------------------------------
1 | sam,true,content
2 | leonard,false,content
3 | 雪尽,true,content
4 | leonard2,true,content
5 | 超级帅气的人,false,content_test
6 | 超级帅气的人,false,这是一个很长的中文字符串中文字符串中文字符串
7 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user19.json:
--------------------------------------------------------------------------------
1 | {"monitorId": 789, "deviceId": "ab;cd", "data": 144.0, "state": 2}
2 | {"monitorId": 788, "deviceId": "a;bcd", "data": 144.0, "state": 2}
3 | 


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user2.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/data-generator/src/main/resources/user2.csv


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user3.csv:
--------------------------------------------------------------------------------
1 | 1,click,2019-08-19 19:30:01
2 | 2,exposure,2019-08-19 20:30:02
3 | 3,click,2019-08-19 21:31:03
4 | 4,exposure,2019-08-19 19:31:04
5 | 5,click,2019-08-19 20:31:05
6 | 6,click,2019-08-19 21:32:06


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user4.json:
--------------------------------------------------------------------------------
1 | {"monitorId": 789, "deviceId": "abcd", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:19.600000"}
2 | {"monitorId": 788, "deviceId": "abcd", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:11.600000"}
3 | {"monitorId": 7887, "deviceId": "양현마을", "data": 144.0, "state": 2, "time_st": "2020-07-14T15:15:11.600000"}


--------------------------------------------------------------------------------
/data-generator/src/main/resources/user_part.csv:
--------------------------------------------------------------------------------
1 | sam,true,content,2020-03-01
2 | leonard,false,content,2020-03-02
3 | 雪尽,true,content,2020-03-03
4 | leonard2,true,content,2020-03-01
5 | samd,false,content_test,2020-03-02


--------------------------------------------------------------------------------
/etl-job/src/main/java/Test.java:
--------------------------------------------------------------------------------
 1 | import java.text.DateFormat;
 2 | import java.text.SimpleDateFormat;
 3 | import java.util.Date;
 4 | 
 5 | public class Test {
 6 |     public static void main(String[] args) {
 7 |         Long time = System.currentTimeMillis();
 8 |         DateFormat dateFormat =  new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
 9 |         Date date = new Date(time);
10 |         String jsonSchemaDate = dateFormat.format(date);
11 |         System.out.println(jsonSchemaDate);
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/TestGen.java:
--------------------------------------------------------------------------------
 1 | //
 2 | //public final class WatermarkGenerator$0
 3 | //        extends org.apache.flink.table.runtime.generated.WatermarkGenerator {
 4 | //
 5 | //    private transient org.apache.flink.table.planner.runtime.utils.JavaUserDefinedScalarFunctions$JavaFunc5 function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58;
 6 | //    private transient org.apache.flink.table.dataformat.DataFormatConverters.TimestampConverter converter$5;
 7 | //
 8 | //    public WatermarkGenerator$0(Object[] references) throws Exception {
 9 | //        function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58 = (((org.apache.flink.table.planner.runtime.utils.JavaUserDefinedScalarFunctions$JavaFunc5) references[0]));
10 | //        converter$5 = (((org.apache.flink.table.dataformat.DataFormatConverters.TimestampConverter) references[1]));
11 | //    }
12 | //
13 | //    @Override
14 | //    public void open(org.apache.flink.configuration.Configuration parameters) throws Exception {
15 | //
16 | //        function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.open(new org.apache.flink.table.functions.FunctionContext(getRuntimeContext()));
17 | //
18 | //    }
19 | //
20 | //    @Override
21 | //    public Long currentWatermark(org.apache.flink.table.dataformat.BaseRow row) throws Exception {
22 | //
23 | //        org.apache.flink.table.dataformat.SqlTimestamp field$1;
24 | //        boolean isNull$1;
25 | //        int field$2;
26 | //        boolean isNull$2;
27 | //        org.apache.flink.table.dataformat.SqlTimestamp result$3;
28 | //        org.apache.flink.table.dataformat.SqlTimestamp result$6;
29 | //        boolean isNull$6;
30 | //        isNull$1 = row.isNullAt(0);
31 | //        field$1 = null;
32 | //        if (!isNull$1) {
33 | //            field$1 = row.getTimestamp(0, 3);
34 | //        }
35 | //        isNull$2 = row.isNullAt(1);
36 | //        field$2 = -1;
37 | //        if (!isNull$2) {
38 | //            field$2 = row.getInt(1);
39 | //        }
40 | //
41 | //
42 | //
43 | //
44 | //
45 | //        java.sql.Timestamp javaResult$4 = (java.sql.Timestamp) function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.eval(isNull$1 ? null : ((org.apache.flink.table.dataformat.SqlTimestamp) field$1), isNull$2 ? null : ((java.lang.Integer) field$2));
46 | //        result$3 = javaResult$4 == null ? null : ((org.apache.flink.table.dataformat.SqlTimestamp) converter$5.toInternal((java.sql.Timestamp) javaResult$4));
47 | //
48 | //
49 | //        isNull$6 = result$3 == null;
50 | //        result$6 = null;
51 | //        if (!isNull$6) {
52 | //            result$6 = result$3;
53 | //        }
54 | //
55 | //        if (isNull$6) {
56 | //            return null;
57 | //        } else {
58 | //            return result$6.getMillisecond();
59 | //        }
60 | //    }
61 | //
62 | //    @Override
63 | //    public void close() throws Exception {
64 | //
65 | //        function_org$apache$flink$table$planner$runtime$utils$JavaUserDefinedScalarFunctions$JavaFunc5$ac4516f46aafeff3fbc8ae56b8d9fd58.close();
66 | //
67 | //    }
68 | //}
69 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2es/Kafka2AppendEs.java:
--------------------------------------------------------------------------------
  1 | package kafka2es;
  2 | 
  3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
  4 | import org.apache.flink.api.common.typeinfo.Types;
  5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  6 | import org.apache.flink.table.api.EnvironmentSettings;
  7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
  8 | import org.apache.flink.table.functions.ScalarFunction;
  9 | 
 10 | import java.sql.Timestamp;
 11 | 
 12 | public class Kafka2AppendEs {
 13 |     private static String csvSourceDDL = "create table csv(" +
 14 |             " pageId VARCHAR," +
 15 |             " eventId VARCHAR," +
 16 |             " recvTime VARCHAR" +
 17 |             ") with (" +
 18 |             " 'connector.type' = 'filesystem',\n" +
 19 |             " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',\n" +
 20 |             " 'format.type' = 'csv',\n" +
 21 |             " 'format.fields.0.name' = 'pageId',\n" +
 22 |             " 'format.fields.0.data-type' = 'STRING',\n" +
 23 |             " 'format.fields.1.name' = 'eventId',\n" +
 24 |             " 'format.fields.1.data-type' = 'STRING',\n" +
 25 |             " 'format.fields.2.name' = 'recvTime',\n" +
 26 |             " 'format.fields.2.data-type' = 'STRING')";
 27 |     private static String sinkDDL = "CREATE TABLE append_test (\n" +
 28 |             "  aggId varchar ,\n" +
 29 |             "  pageId varchar ,\n" +
 30 |             "  ts varchar ,\n" +
 31 |             "  expoCnt int ,\n" +
 32 |             "  clkCnt int\n" +
 33 |             ") WITH (\n" +
 34 |             "'connector.type' = 'elasticsearch',\n" +
 35 |             "'connector.version' = '6',\n" +
 36 |             "'connector.hosts' = 'http://localhost:9200',\n" +
 37 |             "'connector.index' = 'append_test7',\n" +
 38 |             "'connector.document-type' = '_doc',\n" +
 39 |             "'update-mode' = 'upsert',\n" +
 40 |             "'connector.key-delimiter' = '$',\n" +
 41 |             "'connector.key-null-literal' = 'n/a',\n" +
 42 |             "'connector.bulk-flush.interval' = '1000',\n" +
 43 |             "'format.type' = 'json'\n" +
 44 |             ")\n";
 45 |     private static String query = "INSERT INTO append_test\n" +
 46 |             "  SELECT  pageId,eventId,ts2Date(recvTime) as ts, 1, 1 from csv";
 47 | 
 48 | 
 49 |     public static void main(String[] args) throws Exception {
 50 |         System.out.println(csvSourceDDL);
 51 |         System.out.print(sinkDDL);
 52 |         System.out.print(query);
 53 | 
 54 |         // legacy planner test passed
 55 | //         testLegacyPlanner();
 56 | 
 57 |         // blink planner test passed
 58 |         testBlinkPlanner();
 59 | //        System.out.println(sinkDDL);
 60 |     }
 61 | 
 62 |     public static void testLegacyPlanner() throws Exception {
 63 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 64 |         env.setParallelism(1);
 65 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
 66 |                 .useOldPlanner()
 67 |                 .inStreamingMode()
 68 |                 .build();
 69 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
 70 |         tableEnvironment.registerFunction("ts2Date", new ts2Date());
 71 | 
 72 |         tableEnvironment.sqlUpdate(csvSourceDDL);
 73 |         tableEnvironment.sqlUpdate(sinkDDL);
 74 |         tableEnvironment.sqlUpdate(query);
 75 | 
 76 |         tableEnvironment.execute("Kafka2Es");
 77 |     }
 78 | 
 79 |     public static void testBlinkPlanner() throws Exception {
 80 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 81 |         env.setParallelism(2);
 82 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
 83 |                 .useBlinkPlanner()
 84 |                 .inStreamingMode()
 85 |                 .build();
 86 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
 87 |         tableEnvironment.registerFunction("ts2Date", new ts2Date());
 88 |         tableEnvironment.sqlUpdate(csvSourceDDL);
 89 |         tableEnvironment.sqlUpdate(sinkDDL);
 90 |         tableEnvironment.sqlUpdate(query);
 91 | 
 92 |         tableEnvironment.execute("Kafka2Es");
 93 |     }
 94 | 
 95 |     public static class ts2Date extends ScalarFunction {
 96 |         public String eval(String timeStr) {
 97 |             Timestamp t = Timestamp.valueOf(timeStr);
 98 |             return t.getDate() + " " + t.getHours() + "：" + t.getMinutes();
 99 |         }
100 | 
101 |         public TypeInformation<?> getResultType(Class<?>[] signature) {
102 |             return Types.STRING;
103 |         }
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/EventTimeBucketAssigner.java:
--------------------------------------------------------------------------------
 1 | package kafka2file;
 2 | 
 3 | import org.apache.flink.core.io.SimpleVersionedSerializer;
 4 | import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner;
 5 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer;
 6 | 
 7 | import com.fasterxml.jackson.databind.JsonNode;
 8 | import com.fasterxml.jackson.databind.ObjectMapper;
 9 | 
10 | import java.text.SimpleDateFormat;
11 | import java.util.Date;
12 | 
13 | public class EventTimeBucketAssigner implements BucketAssigner<String, String> {
14 |     private ObjectMapper mapper = new ObjectMapper();
15 |     @Override
16 |     public String getBucketId(String element, Context context) {
17 |         String partitionValue;
18 |         try {
19 |             JsonNode node = mapper.readTree(element);
20 |             long date = (long) (node.path("order_time").floatValue() * 1000);
21 |             partitionValue = new SimpleDateFormat("yyyyMMdd").format(new Date(date));
22 |         } catch (Exception e){
23 |             partitionValue = "00000000";
24 |         }
25 |         return "dt=" + partitionValue;
26 |     }
27 | 
28 |     @Override
29 |     public SimpleVersionedSerializer<String> getSerializer() {
30 |         return SimpleVersionedStringSerializer.INSTANCE;
31 |     }
32 | }


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/ReadHiveDataETL.java:
--------------------------------------------------------------------------------
 1 | //package kafka2file;
 2 | //
 3 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | //import org.apache.flink.table.api.EnvironmentSettings;
 5 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | //import org.apache.flink.table.catalog.hive.HiveCatalog;
 7 | //import org.apache.flink.types.Row;
 8 | //
 9 | //public class ReadHiveDataETL {
10 | //    public static void main(String[] args) throws Exception{
11 | //        StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
12 | //        EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
13 | //                .useBlinkPlanner()
14 | //                .inStreamingMode()
15 | //                .build();
16 | //        executionEnvironment.setParallelism(1);
17 | //        StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings);
18 | //        testReadHive(tableEnvironment);
19 | //    }
20 | //
21 | //    private static void testReadHive(StreamTableEnvironment tableEnvironment) throws Exception {
22 | //        HiveCatalog hiveCatalog = new HiveCatalog("myhive", "default", "/Users/bang/hive-3.1.2/conf", "3.1.2");
23 | //        tableEnvironment.registerCatalog("myhive", hiveCatalog);
24 | //        tableEnvironment.useCatalog("myhive");
25 | //        tableEnvironment.useDatabase("default");
26 | //        tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select * from user_info"), Row.class).print();
27 | //        tableEnvironment.execute("readHive");
28 | //    }
29 | //
30 | //}
31 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/StreamETLKafka2Hdfs.java:
--------------------------------------------------------------------------------
 1 | //package kafka2file;
 2 | //
 3 | //import org.apache.flink.api.common.serialization.SimpleStringEncoder;
 4 | //import org.apache.flink.api.common.serialization.SimpleStringSchema;
 5 | //import org.apache.flink.core.fs.Path;
 6 | //import org.apache.flink.runtime.state.StateBackend;
 7 | //import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 8 | //import org.apache.flink.streaming.api.datastream.DataStream;
 9 | //import org.apache.flink.streaming.api.environment.CheckpointConfig;
10 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | //import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
12 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
13 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
14 | //
15 | //import java.util.Properties;
16 | //
17 | //public class StreamETLKafka2Hdfs {
18 | //    public static void main(String[] args) throws Exception {
19 | //        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | //        env.setParallelism(1);
21 | //        //checkpoint
22 | //        env.enableCheckpointing(60_000);
23 | //        env.setStateBackend((StateBackend) new FsStateBackend("file:///tmp/flink/checkpoints"));
24 | //        env.getCheckpointConfig().enableExternalizedCheckpoints(
25 | //                CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION);
26 | //
27 | //        //source
28 | //        Properties props = new Properties();
29 | //        props.setProperty("bootstrap.servers", "localhost:9092");
30 | //        FlinkKafkaConsumer010<String> consumer = new FlinkKafkaConsumer010<>(
31 | //                "flink_orders", new SimpleStringSchema(), props);
32 | //
33 | //        //transformation
34 | //        DataStream<String> stream = env.addSource(consumer)
35 | //                .map(r -> r);
36 | //
37 | //        //sink
38 | //        StreamingFileSink<String> sink = StreamingFileSink
39 | //                .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder<String>())
40 | //                .withRollingPolicy(DefaultRollingPolicy.create().build())
41 | //                .withBucketAssigner(new EventTimeBucketAssigner())
42 | //                .build();
43 | //        stream.addSink(sink);
44 | //
45 | //        env.execute();
46 | //    }
47 | //}
48 | //
49 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/TestCsv2Csv.java:
--------------------------------------------------------------------------------
 1 | package kafka2file;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestCsv2Csv {
 9 |     public static void main(String[] args) throws Exception {
10 |         EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
11 |                 .useBlinkPlanner()
12 |                 .inStreamingMode()
13 |                 .build();
14 |         StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
15 |         executionEnvironment.setParallelism(1);
16 | 
17 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings);
18 | 
19 |         String csvSourceDDL = "create table csv(" +
20 |                 " id INT," +
21 |                 " note STRING," +
22 |                 " country STRING," +
23 |                 " record_time TIMESTAMP(4)," +
24 |                 " doub_val DECIMAL(6, 2)" +
25 |                 ") with (" +
26 |                 " 'connector.type' = 'filesystem',\n" +
27 |                 " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test.csv',\n" +
28 |                 " 'format.type' = 'csv'" +
29 |                 ")";
30 |         String csvSink = "create table csvSink(" +
31 |                 " jnlno STRING,\n" +
32 |                 "  taskid char(4),\n" +
33 |                 "   hit VARCHAR " +
34 |                 ") with (" +
35 |                 " 'connector.type' = 'filesystem',\n" +
36 |                 " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test12312.csv',\n" +
37 |                 " 'format.type' = 'csv'" +
38 |                 ")";
39 |         tableEnvironment.sqlUpdate(csvSourceDDL);
40 |         tableEnvironment.sqlUpdate(csvSink);
41 |         tableEnvironment.sqlUpdate("insert into  csvSink select a.country,'111111qeq','false' from csv a");
42 |         System.out.println(csvSourceDDL);
43 |         System.out.println(csvSink);
44 |         System.out.println("insert into  csvSink select a.country,'111111qeq','false' from csv a");
45 | 
46 |         //
47 | //        tableEnvironment.toAppendStream(
48 | //                tableEnvironment.sqlQuery("insert into  target select a.country,'111111qeq','false' from csv a"),
49 | //                Row.class).print();
50 |         tableEnvironment.execute("csvTest");
51 | 
52 |     }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/TestCsv2Csv1.java:
--------------------------------------------------------------------------------
 1 | package kafka2file;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestCsv2Csv1 {
 9 |     public static void main(String[] args) throws Exception {
10 |         EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
11 |                 .useBlinkPlanner()
12 |                 .inStreamingMode()
13 |                 .build();
14 |         StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
15 |         executionEnvironment.setParallelism(1);
16 | 
17 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings);
18 | 
19 |         String csvSourceDDL = "create table csv(" +
20 |                 "rowkey INT,\n" +
21 |                 "f1c1 INT,\n" +
22 |                 "f2c1 STRING,\n" +
23 |                 "f2c2 BIGINT,\n" +
24 |                 "f3c1 DOUBLE,\n" +
25 |                 "f3c2 BOOLEAN,\n" +
26 |                 "f3c3 STRING,\n" +
27 |                 "f4c1 TIMESTAMP(3),\n" +
28 |                 "f4c2 DATE,\n" +
29 |                 "f4c3 TIME(3),\n" +
30 |                 "f5c1 TIMESTAMP(4),\n" +
31 |                 "f5c2 DECIMAL(10, 4)" +
32 |                 ") with (" +
33 |                 " 'connector.type' = 'filesystem',\n" +
34 |                 " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test1.csv',\n" +
35 |                 " 'format.type' = 'csv'" +
36 |                 ")";
37 | 
38 |         tableEnvironment.sqlUpdate(csvSourceDDL);
39 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select f5c1, f5c2 from csv"), Row.class).print();
40 | 
41 |         executionEnvironment.execute("csvTest");
42 | 
43 |     }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/TestCsvError.java:
--------------------------------------------------------------------------------
 1 | package kafka2file;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestCsvError {
 9 |     public static void main(String[] args) throws Exception {
10 |         EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
11 |                 .useBlinkPlanner()
12 |                 .inStreamingMode()
13 |                 .build();
14 |         StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
15 |         executionEnvironment.setParallelism(1);
16 | 
17 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings);
18 | 
19 |         String csvSourceDDL = "CREATE TABLE `src` (\n" +
20 |             "key bigint,\n" +
21 |             "v varchar\n" +
22 |             ") WITH (\n" +
23 |             "'connector'='filesystem',\n" +
24 |             "'csv.field-delimiter'='|',\n" +
25 |             "'path'='file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/src.csv',\n" +
26 |             "'csv.null-literal'='',\n" +
27 |             "'format'='csv'\n" +
28 |             ")";
29 |         String csvSinkDDL = "CREATE TABLE `sink` (\n" +
30 |             "c1 decimal(10, 2),\n" +
31 |             "c2 varchar,\n" +
32 |             "c3 varchar" +
33 |             ") WITH (\n" +
34 |             "'connector'='filesystem',\n" +
35 |             "'csv.field-delimiter'='|',\n" +
36 |             "'path'='/Users/bang/sink.csv',\n" +
37 |             "'csv.null-literal'='',\n" +
38 |             "'format'='csv'\n" +
39 |             ")";
40 | 
41 |         tableEnvironment.sqlUpdate(csvSourceDDL);
42 |         tableEnvironment.executeSql(csvSinkDDL);
43 | //        tableEnvironment.executeSql("insert into sink select\n" +
44 | //            " cast(key as decimal(10,2)) as c1,\n" +
45 | //            " cast(key as char(10)) as c2,\n" +
46 | //            " cast(key as varchar(10)) as c3\n" +
47 | //            " from src\n").collect();
48 | 
49 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select\n" +
50 |             " cast(key as decimal(10,2)) as c1,\n" +
51 |                 " cast(key as char(10)) as c2,\n" +
52 |                 " cast(key as varchar(10)) as c3\n" +
53 |                 " from src\n"), Row.class).print();
54 |         executionEnvironment.execute("csvTest");
55 |     }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/TestFileSink.scala:
--------------------------------------------------------------------------------
 1 | //package kafka2file
 2 | //
 3 | //import java.util.Properties
 4 | //
 5 | //import org.apache.flink.api.common.serialization.SimpleStringEncoder
 6 | //import org.apache.flink.core.fs.Path
 7 | //import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
 8 | //import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner
 9 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy
10 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
11 | //
12 | ////import org.apache.flink.api.common.functions.MapFunction
13 | ////import org.apache.flink.api.common.serialization.{SimpleStringEncoder, SimpleStringSchema}
14 | ////import org.apache.flink.core.fs.Path
15 | ////import org.apache.flink.runtime.state.StateBackend
16 | ////import org.apache.flink.runtime.state.filesystem.FsStateBackend
17 | ////import org.apache.flink.streaming.api.environment.{CheckpointConfig, StreamExecutionEnvironment}
18 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
19 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner
20 | ////import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy
21 | ////import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
22 | //
23 | //object TestFileSink {
24 | //  def main(args: Array[String]): Unit = {
25 | //    val env = StreamExecutionEnvironment.getExecutionEnvironment
26 | //    env.setParallelism(1)
27 | //    //checkpoint
28 | //    //source
29 | //    val props = new Properties
30 | //    props.setProperty("bootstrap.servers", "localhost:9092")
31 | ////    val consumer = new FlinkKafkaConsumer010[String]("flink_orders", new SimpleStringSchema, props)
32 | //
33 | //    //transformation
34 | ////    val stream = env.addSource(consumer).map()
35 | //
36 | //    //        //sink
37 | //    //        Encoder<String> myEncoder = new SimpleStringEncoder<>();
38 | //    //        BucketAssigner<String, String> myBucketAssigner =  new EventTimeBucketAssigner();
39 | //    //        BucketAssigner<String, String> myBucketAssigner =  new DateTimeBucketAssigner();
40 | //
41 | //    val sink = StreamingFileSink
42 | //      .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder[String])
43 | //      .withRollingPolicy(DefaultRollingPolicy.builder().build())
44 | //      .withBucketAssigner(new DateTimeBucketAssigner[String, String])
45 | //      .build()
46 | //
47 | //    val sink1 = StreamingFileSink
48 | //      .forRowFormat(new Path("/tmp/kafka-loader"), new SimpleStringEncoder)
49 | //      .withRollingPolicy(DefaultRollingPolicy.builder().build())
50 | //      .withBucketAssigner(new DateTimeBucketAssigner)
51 | //      .build()
52 | //
53 | ////    stream.addSink(sink)
54 | //
55 | //    env.execute
56 | //  }
57 | //}
58 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2file/Write2Kafka.java:
--------------------------------------------------------------------------------
 1 | package kafka2file;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | 
 7 | public class Write2Kafka {
 8 |     public static void main(String[] args) throws Exception {
 9 |         EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
10 |                 .useBlinkPlanner()
11 |                 .inStreamingMode()
12 |                 .build();
13 |         StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
14 |         executionEnvironment.setParallelism(1);
15 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(executionEnvironment, environmentSettings);
16 |         constructKafkaData(tableEnvironment);
17 | 
18 |     }
19 | 
20 |     private static void constructKafkaData(StreamTableEnvironment tableEnvironment) throws Exception {
21 |         String csvSourceDDL = "create table csv( " +
22 |                 "user_name VARCHAR, " +
23 |                 "is_new BOOLEAN, " +
24 |                 "content VARCHAR, " +
25 |                 "date_col VARCHAR) with ( " +
26 |                 " 'connector.type' = 'filesystem',\n" +
27 |                 " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user_part.csv',\n" +
28 |                 " 'format.type' = 'csv',\n" +
29 |                 " 'format.fields.0.name' = 'user_name',\n" +
30 |                 " 'format.fields.0.data-type' = 'STRING',\n" +
31 |                 " 'format.fields.1.name' = 'is_new',\n" +
32 |                 " 'format.fields.1.data-type' = 'BOOLEAN',\n" +
33 |                 " 'format.fields.2.name' = 'content',\n" +
34 |                 " 'format.fields.2.data-type' = 'STRING',\n" +
35 |                 " 'format.fields.3.name' = 'date_col',\n" +
36 |                 " 'format.fields.3.data-type' = 'STRING')";
37 |         tableEnvironment.sqlUpdate(csvSourceDDL);
38 | 
39 |         String sinkTableDDL = "CREATE TABLE csvData (\n" +
40 |                 "  user_name STRING,\n" +
41 |                 "  is_new    BOOLEAN,\n" +
42 |                 "  content STRING,\n" +
43 |                 "  date_col STRING" +
44 |                 ") WITH (\n" +
45 |                 "  'connector.type' = 'kafka',\n" +
46 |                 "  'connector.version' = '0.10',\n" +
47 |                 "  'connector.topic' = 'csv_data',\n" +
48 |                 "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
49 |                 "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
50 |                 "  'connector.properties.group.id' = 'testGroup3',\n" +
51 |                 "  'connector.startup-mode' = 'earliest-offset',\n" +
52 |                 "  'format.type' = 'csv')";
53 |         tableEnvironment.sqlUpdate(sinkTableDDL);
54 | 
55 |         String querySql = "insert into csvData \n" +
56 |                 "select user_name, is_new, content, date_col from\n" +
57 |                 "csv";
58 |         tableEnvironment.sqlUpdate(querySql);
59 |         tableEnvironment.execute("flinkFileCsv2KafkaCsv");
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.java:
--------------------------------------------------------------------------------
 1 | package kafka2hbase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | import constants.FlinkSqlConstants;
 9 | 
10 | public class UnboundedKafkaJoinHbase2Hbase {
11 |     public static void main(String[] args) throws Exception {
12 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 |         env.setParallelism(1);
14 | 
15 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
16 |                 .useBlinkPlanner()
17 |                 .inStreamingMode()
18 |                 .build();
19 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
20 |         testJoinDDLHbaseWithFunction(env, tableEnvironment);
21 |      }
22 | 
23 | 
24 |     private static void testJoinDDLHbaseWithFunction(StreamExecutionEnvironment env, StreamTableEnvironment tableEnvironment) throws Exception {
25 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL11);
26 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL11);
27 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.hbaseCountryDDLWithPrecison11);
28 | 
29 |         String sinkTableDDL = "CREATE TABLE gmv (\n" +
30 |                 "  rowkey VARCHAR,\n" +
31 |                 "  f1 ROW<log_ts VARCHAR,item VARCHAR,country_name VARCHAR>," +
32 |                 "  f2 ROW<record_timestamp3 TIMESTAMP(3), record_timestamp9 TIMESTAMP(3), time3 TIME(3), time9 TIME(9), gdp DECIMAL(10,4)>" +
33 |                 ") WITH (\n" +
34 |                 "    'connector' = 'hbase-1.4',\n" +
35 |                 "    'table-name' = 'gmv',\n" +
36 |                 "    'zookeeper.quorum' = 'localhost:2182',\n" +
37 |                 "    'zookeeper.znode.parent' = '/hbase',\n" +
38 |                 "    'sink.buffer-flush.max-size' = '10mb', \n" +
39 |                 "    'sink.buffer-flush.max-rows' = '1000',  \n" +
40 |                 "    'sink.buffer-flush.interval' = '2s' " +
41 |                 ")";
42 |         tableEnvironment.sqlUpdate(sinkTableDDL);
43 | 
44 |         //test lookup
45 |         String querySQL =
46 |                 " select  rowkey, ROW(max(ts), max(item), max(country_name)) as f1, max(gdp), max(record_timestamp3)\n" +
47 |                 " from (" +
48 |                 "select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey,\n" +
49 |                         " cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name," +
50 |                         "co.gdp as gdp, co.record_timestamp3 as record_timestamp3\n" +
51 |                         " from orders as o \n" +
52 |                         " left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" +
53 |                         " on o.currency = c.currency_name\n" +
54 |                         " left outer join country FOR SYSTEM_TIME AS OF o.proc_time co\n" +
55 |                         " on c.country = co.rowkey \n" +
56 |                 ") a group by rowkey\n" ;
57 | 
58 |         tableEnvironment.toRetractStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
59 |         env.execute();
60 | 
61 | 
62 | //        test source
63 | //        tableEnvironment.toRetractStream(tableEnvironment.sqlQuery("select * from (select rowkey, f1.country_name,f1.country_name_cn, f2.record_timestamp3,f2.record_timestamp9, f2.gdp from country) a "), Row.class)
64 | //            .print();
65 | //        env.execute();
66 | 
67 | //        tableEnvironment.execute("KafkaJoinHbase2Hbase");
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2jdbc/KafkaJoinJdbc2JdbcProc.java:
--------------------------------------------------------------------------------
 1 | package kafka2jdbc;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | 
 7 | import constants.FlinkSqlConstants;
 8 | 
 9 | public class KafkaJoinJdbc2JdbcProc {
10 |     public static void main(String[] args) throws Exception {
11 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
12 |         env.setParallelism(1);
13 | 
14 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
15 |                 .useBlinkPlanner()
16 |                 .inStreamingMode()
17 |                 .build();
18 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
19 | 
20 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL);
21 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL);
22 | 
23 |         String sinkTableDDL =  "CREATE TABLE gmv (\n" +
24 |                 "  log_per_min STRING,\n" +
25 |                 "  item STRING,\n" +
26 |                 "  order_cnt BIGINT,\n" +
27 |                 "  currency_time TIMESTAMP(3),\n" +
28 |                 "  gmv DECIMAL(38, 18)," +
29 |                 "  timestamp9 TIMESTAMP(3),\n" +
30 |                 "  time9 TIME(3),\n" +
31 |                 "  gdp DECIMAL(38, 18)\n" +
32 |                 ") WITH (\n" +
33 |                 "   'connector.type' = 'jdbc',\n" +
34 |                 "   'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" +
35 |                 "   'connector.username' = 'root'," +
36 |                 "   'connector.table' = 'gmv',\n" +
37 |                 "   'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
38 |                 "   'connector.write.flush.max-rows' = '5000', \n" +
39 |                 "   'connector.write.flush.interval' = '2s', \n" +
40 |                 "   'connector.write.max-retries' = '3'" +
41 |                 ")";
42 |         tableEnvironment.sqlUpdate(sinkTableDDL);
43 | 
44 |         String querySQL = "insert into gmv \n" +
45 |                 "select cast(TUMBLE_END(o.proc_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts,\n" +
46 |                 " o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 4))  as gmv,\n" +
47 |                 " c.timestamp9, c.time9, c.gdp\n" +
48 |                 "from orders as o \n" +
49 |                 "join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" +
50 |                 "on o.currency = c.currency_name\n" +
51 |                 "group by o.item, c.currency_time, c.rate, c.timestamp9, c.time9, c.gdp, TUMBLE(o.proc_time, INTERVAL '10' SECOND)\n" ;
52 |         tableEnvironment.sqlUpdate(querySQL);
53 | 
54 |         tableEnvironment.execute("KafkaJoinJdbc2Jdbc");
55 |     }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2jdbc/TestJdbc.java:
--------------------------------------------------------------------------------
 1 | package kafka2jdbc;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestJdbc {
 9 |     public static void main(String[] args) throws Exception {
10 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
11 |         env.setParallelism(1);
12 | 
13 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
14 |                 .useBlinkPlanner()
15 |                 .inStreamingMode()
16 |                 .build();
17 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
18 |         String mysqlCurrencyDDL = "CREATE TABLE currency (\n" +
19 |                 "  currency_id BIGINT,\n" +
20 |                 "  currency_name STRING,\n" +
21 |                 "  rate DOUBLE,\n" +
22 |                 "  currency_time TIMESTAMP(3),\n" +
23 |                 "  country STRING,\n" +
24 |                 "  timestamp9 TIMESTAMP(6),\n" +
25 |                 "  time9 TIME(3),\n" +
26 |                 "  gdp DECIMAL(10, 6)\n" +
27 |                 ") WITH (\n" +
28 |                 "   'connector' = 'jdbc',\n" +
29 |                 "   'url' = 'jdbc:mysql://localhost:3306/test',\n" +
30 |                 "   'username' = 'root'," +
31 |                 "   'password' = ''," +
32 |                 "   'table-name' = 'currency',\n" +
33 |                 "   'driver' = 'com.mysql.jdbc.Driver',\n" +
34 |                 "   'lookup.cache.max-rows' = '500', \n" +
35 |                 "   'lookup.cache.ttl' = '10s',\n" +
36 |                 "   'lookup.max-retries' = '3'" +
37 |                 ")";
38 |         System.out.println(mysqlCurrencyDDL);
39 | 
40 |         tableEnvironment.sqlUpdate(mysqlCurrencyDDL);
41 | 
42 | 
43 |         String querySQL = "select * from currency" ;
44 | 
45 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
46 |         env.execute();
47 | //        tableEnvironment.execute("KafkaJoinJdbc2Jdbc");
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2jdbc/UnboundedKafkaJoinJdbc2Jdbc.java:
--------------------------------------------------------------------------------
 1 | package kafka2jdbc;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 4 | import org.apache.flink.api.common.typeinfo.Types;
 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 6 | import org.apache.flink.table.api.EnvironmentSettings;
 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 8 | import org.apache.flink.table.functions.ScalarFunction;
 9 | import org.apache.flink.types.Row;
10 | 
11 | import constants.FlinkSqlConstants;
12 | 
13 | public class UnboundedKafkaJoinJdbc2Jdbc {
14 |     public static void main(String[] args) throws Exception {
15 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
16 |         env.setParallelism(1);
17 | 
18 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
19 |                 .useBlinkPlanner()
20 |                 .inStreamingMode()
21 |                 .build();
22 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
23 | 
24 |         tableEnvironment.registerFunction("add_one_fun", new AddOneFunc());
25 | 
26 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL);
27 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL);
28 | 
29 |         String sinkTableDDL =  "CREATE TABLE gmv (\n" +
30 |                 "  log_per_min STRING,\n" +
31 |                 "  item STRING,\n" +
32 |                 "  order_cnt BIGINT,\n" +
33 |                 "  currency_time TIMESTAMP(3),\n" +
34 |                 "  gmv DECIMAL(38, 18)," +
35 |                 "  timestamp9 TIMESTAMP(3),\n" +
36 |                 "  time9 TIME(3),\n" +
37 |                 "  gdp  DECIMAL(38, 18)\n" +
38 |                  ") WITH (\n" +
39 |                 "   'connector.type' = 'jdbc',\n" +
40 |                 "   'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" +
41 |                 "   'connector.username' = 'root'," +
42 |                 "   'connector.table' = 'gmv',\n" +
43 |                 "   'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
44 |                 "   'connector.write.flush.max-rows' = '5000', \n" +
45 |                 "   'connector.write.flush.interval' = '2s', \n" +
46 |                 "   'connector.write.max-retries' = '3'" +
47 |                 ")";
48 |         tableEnvironment.sqlUpdate(sinkTableDDL);
49 | 
50 |         String querySQL = "insert into gmv \n" +
51 |                 "select max(log_ts),\n" +
52 |                 " item, COUNT(order_id) as order_cnt, max(currency_time), cast(sum(amount_kg) * max(rate) as DOUBLE)  as gmv,\n" +
53 |                 " max(timestamp9), max(time9), max(gdp) \n" +
54 |                 " from ( \n" +
55 |                 " select cast(o.ts as VARCHAR) as log_ts, o.item as item, o.order_id as order_id, c.currency_time as currency_time,\n" +
56 |                 " o.amount_kg as amount_kg, c.rate as rate, c.timestamp9 as timestamp9, c.time9 as time9, c.gdp as gdp \n" +
57 |                 " from orders as o \n" +
58 |                 " join currency FOR SYSTEM_TIME AS OF o.proc_time c \n" +
59 |                 " on o.currency = c.currency_name \n" +
60 |                 " ) a group by item\n" ;
61 | 
62 |         System.out.println(FlinkSqlConstants.ordersTableDDL);
63 |         System.out.println(FlinkSqlConstants.mysqlCurrencyDDL);
64 |         System.out.println(sinkTableDDL);
65 |         System.out.println(querySQL);
66 | //        tableEnvironment.toRetractStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
67 |         tableEnvironment.sqlUpdate(querySQL);
68 |         tableEnvironment.execute("KafkaJoinJdbc2Jdbc");
69 |     }
70 | 
71 |     public static class AddOneFunc extends ScalarFunction {
72 |         public Long eval(long t) {
73 |              return t + 1;
74 |         }
75 | 
76 |         public TypeInformation<?> getResultType(Class<?>[] signature) {
77 |             return Types.LONG;
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2jdbc/testNonExistedTable.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package kafka2jdbc;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.streaming.api.transformations.ShuffleMode;
23 | import org.apache.flink.table.api.EnvironmentSettings;
24 | import org.apache.flink.table.api.TableEnvironment;
25 | import org.apache.flink.table.api.config.ExecutionConfigOptions;
26 | import org.apache.flink.table.api.config.OptimizerConfigOptions;
27 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
28 | 
29 | public class testNonExistedTable {
30 |     public static void main(String[] args) throws Exception {
31 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
32 |         env.setParallelism(4);
33 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
34 |                 .useBlinkPlanner()
35 |                 .inStreamingMode()
36 |                 .build();
37 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
38 | 
39 |         String csvSourceDDL = "create table csv(" +
40 |                 " id INT," +
41 |                 " note VARCHAR," +
42 |                 " country VARCHAR," +
43 |                 " record_time TIMESTAMP(3)," +
44 |                 " doub_val DECIMAL(6, 2)," +
45 |                 " date_val DATE," +
46 |                 " time_val TIME" +
47 |                 ") with (" +
48 |                 " 'connector.type' = 'filesystem',\n" +
49 |                 " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test_nonexistedTable.csv',\n" +
50 |                 " 'format.type' = 'csv'" +
51 |                 ")";
52 |         String mysqlSinkDDL = "CREATE TABLE nonExisted (\n" +
53 |                 "  c0 BOOLEAN," +
54 |                 "  c1 INTEGER," +
55 |                 "  c2 BIGINT," +
56 |                 "  c3 FLOAT," +
57 |                 "  c4 DOUBLE," +
58 |                 "  c5 DECIMAL(38, 18)," +
59 |                 "  c6 VARCHAR," +
60 |                 "  c7 DATE," +
61 |                 "  c8 TIME," +
62 |                 "  c9 TIMESTAMP(3)" +
63 |                 ") WITH (\n" +
64 |                 "   'connector.type' = 'jdbc',\n" +
65 |                 "   'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" +
66 |                 "   'connector.username' = 'root'," +
67 |                 "   'connector.table' = 'nonExisted3',\n" +
68 |                 "   'connector.driver' = 'com.mysql.jdbc.Driver',\n" +
69 |                 "   'connector.write.auto-create-table' = 'true' " +
70 |                 ")";
71 |         String query = "insert into nonExisted " +
72 |                 "select max(c0),c1,c2,c3,c4,max(c5),max(c6),max(c7),max(c8),max(c9) from " +
73 |                 " (select true as c0, id as c1, cast(id as bigint) as c2,cast(doub_val as float)as c3,cast(doub_val as double) as c4," +
74 |                 " doub_val as c5, country as c6, date_val as c7, time_val as c8, record_time as c9 from csv)" +
75 |                 " a group by c1, c2, c3, c4";
76 | //        String query = "insert into nonExisted select true as c0, id as c1, cast(id as bigint) as c2,cast(doub_val as float)as c3,cast(doub_val as double) as c4," +
77 | //                " doub_val as c5, country as c6, date_val as c7, time_val as c8, record_time as c9 from csv";
78 |         tableEnvironment.sqlUpdate(csvSourceDDL);
79 |         tableEnvironment.sqlUpdate(mysqlSinkDDL);
80 |         tableEnvironment.sqlUpdate(query);
81 |         tableEnvironment.execute("csvTest");
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2kafka/ConsumeConfluentAvroTest.java:
--------------------------------------------------------------------------------
 1 | package kafka2kafka;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | import io.confluent.kafka.serializers.KafkaAvroSerializer;
 9 | import kafka.UserAvro;
10 | import org.apache.kafka.clients.producer.KafkaProducer;
11 | import org.apache.kafka.clients.producer.ProducerConfig;
12 | import org.apache.kafka.clients.producer.ProducerRecord;
13 | import org.apache.kafka.common.serialization.StringSerializer;
14 | 
15 | import java.io.IOException;
16 | import java.util.Properties;
17 | import java.util.Random;
18 | import java.util.stream.IntStream;
19 | 
20 | public class ConsumeConfluentAvroTest {
21 | 
22 |     public static void main(String[] args) throws Exception {
23 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         env.setParallelism(1);
25 | 
26 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
27 |                 .useBlinkPlanner()
28 |                 .inStreamingMode()
29 |                 .build();
30 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
31 | 
32 |         String tableDDL = "CREATE TABLE WikipediaFeed (\n" +
33 |                 "  user_name STRING,\n" +
34 |                 "  is_new    BOOLEAN,\n" +
35 |                 "  content STRING" +
36 |                 ") WITH (\n" +
37 |                 "  'connector.type' = 'kafka',\n" +
38 |                 "  'connector.version' = '0.10',\n" +
39 |                 "  'connector.topic' = 'WikipediaFeed',\n" +
40 |                 "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
41 |                 "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
42 |                 "  'connector.properties.group.id' = 'testGroup3',\n" +
43 |                 "  'connector.startup-mode' = 'earliest-offset',\n" +
44 |                 "  'format.type' = 'avro',\n" +
45 |                 "  'format.avro-schema' =\n" +
46 |                 "    '{ \n" +
47 |                 "    \"type\": \"record\",\n" +
48 |                 "    \"name\": \"UserAvro\",\n" +
49 |                 "    \"fields\": [\n" +
50 |                 "      {\"name\": \"user_name\", \"type\": \"string\"},\n" +
51 |                 "      {\"name\": \"is_new\", \"type\": \"boolean\"},\n" +
52 |                 "      {\"name\": \"content\", \"type\": \"string\"}\n" +
53 |                 "      ]\n" +
54 |                 "    }'" +
55 |                 ")\n";
56 |         tableEnvironment.sqlUpdate(tableDDL);
57 | 
58 |         String querySQL = "select user_name, is_new, content \n" +
59 |                 "from WikipediaFeed\n" ;
60 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
61 |         tableEnvironment.execute("KafkaAvro2Kafka");
62 |     }
63 | 
64 |     // prepare confluent avro foramt data
65 |     private static void produceInputs() throws IOException {
66 |         final String[] users = {"leonard", "bob", "joe", "damian", "tania", "phil", "sam", "lauren", "joseph"};
67 |         final Properties props = new Properties();
68 |         props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
69 |         props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
70 |         props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
71 |         props.put("schema.registry.url", "http://localhost:8081");
72 |         final KafkaProducer<String, UserAvro> producer = new KafkaProducer<>(props);
73 |         final Random random = new Random();
74 | 
75 |         IntStream.range(0, 10)
76 |                 .mapToObj(value -> new UserAvro(users[random.nextInt(users.length)], true, "content"))
77 |                 .forEach(
78 |                         record -> {
79 |                             System.out.println(record.toString()) ;
80 |                             producer.send(new ProducerRecord<>("WikipediaFeed", record.getUserName(), record));
81 |                         });
82 | 
83 |         producer.flush();
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2kafka/KafkaJoinJdbc2Kafka.java:
--------------------------------------------------------------------------------
 1 | package kafka2kafka;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.EnvironmentSettings;
 6 | import org.apache.flink.table.api.Types;
 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 8 | import org.apache.flink.table.functions.ScalarFunction;
 9 | import org.apache.flink.types.Row;
10 | 
11 | import constants.FlinkSqlConstants;
12 | 
13 | import java.math.BigDecimal;
14 | 
15 | public class KafkaJoinJdbc2Kafka {
16 |     public static void main(String[] args) throws Exception {
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 |         env.setParallelism(1);
19 | 
20 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
21 |                 .useBlinkPlanner()
22 |                 .inStreamingMode()
23 |                 .build();
24 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
25 | 
26 |         tableEnvironment.registerFunction("func", new Func());
27 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.ordersTableDDL);
28 |         tableEnvironment.sqlUpdate(FlinkSqlConstants.mysqlCurrencyDDL);
29 | 
30 |         String sinkTableDDL = "CREATE TABLE gmv (\n" +
31 |                 "  log_per_min STRING,\n" +
32 |                 "  item STRING,\n" +
33 |                 "  order_cnt BIGINT,\n" +
34 |                 "  currency_time TIMESTAMP(3),\n" +
35 |                 "  gmv DECIMAL(38, 18)" +
36 |                 ") WITH (\n" +
37 |                 "  'connector.type' = 'kafka',\n" +
38 |                 "  'connector.version' = '0.10',\n" +
39 |                 "  'connector.topic' = 'gmv',\n" +
40 |                 "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
41 |                 "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
42 |                 "  'format.type' = 'json',\n" +
43 |                 "  'format.derive-schema' = 'true'\n" +
44 |                 ")";
45 |         tableEnvironment.sqlUpdate(sinkTableDDL);
46 | 
47 |         String querySQL =
48 |                 "insert into gmv \n" +
49 |                 "select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_per_min,\n" +
50 |                 " o.item, COUNT(o.order_id) as order_cnt, c.currency_time, " +
51 |                 " cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18))  as gmv \n" +
52 |                 " from orders as o \n" +
53 |                 " join currency FOR SYSTEM_TIME AS OF o.proc_time c\n" +
54 |                 " on o.currency = c.currency_name\n" +
55 |                 " group by o.item, c.currency_time,c.rate,TUMBLE(o.order_time, INTERVAL '10' SECOND)\n";
56 | 
57 |         tableEnvironment.sqlUpdate(querySQL);
58 |         System.out.println(FlinkSqlConstants.ordersTableDDL);
59 |         System.out.println(FlinkSqlConstants.mysqlCurrencyDDL);
60 |         System.out.println(sinkTableDDL);
61 |         System.out.println(querySQL);
62 | 
63 |         tableEnvironment.execute("KafkaJoinJdbc2Kafka.sql");
64 |     }
65 | 
66 |     public static class Func extends ScalarFunction {
67 |         public BigDecimal eval(BigDecimal amount) {
68 |             return amount.multiply(new BigDecimal("100.0"));
69 |         }
70 | 
71 |         @Override
72 |         public TypeInformation<?> getResultType(Class<?>[] signature) {
73 |             return Types.DECIMAL();
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2kafka/KafkaJoinKafka2Kafka.java:
--------------------------------------------------------------------------------
 1 | package kafka2kafka;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.EnvironmentSettings;
 6 | import org.apache.flink.table.api.Types;
 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 8 | import org.apache.flink.table.functions.ScalarFunction;
 9 | import org.apache.flink.types.Row;
10 | 
11 | import constants.FlinkSqlConstants;
12 | 
13 | import java.math.BigDecimal;
14 | 
15 | public class KafkaJoinKafka2Kafka {
16 |     public static String ordersTableDDL = "CREATE TABLE orders (\n" +
17 |         "  order_id STRING,\n" +
18 |         "  item    STRING,\n" +
19 |         "  currency STRING,\n" +
20 |         "  amount INT,\n" +
21 |         "  order_time TIMESTAMP(3),\n" +
22 |         "  proc_time as PROCTIME()" +
23 | //        ", WATERMARK FOR order_time AS order_time\n" +
24 |         ") WITH (\n" +
25 |         "  'connector' = 'kafka-0.10',\n" +
26 |         "  'topic' = 'flink_orders',\n" +
27 |         "  'properties.zookeeper.connect' = 'localhost:2181',\n" +
28 |         "  'properties.bootstrap.servers' = 'localhost:9092',\n" +
29 |         "  'properties.group.id' = 'testGroup',\n" +
30 |         "  'scan.startup.mode' = 'earliest-offset',\n" +
31 |         "  'format' = 'json'\n" +
32 |         ")\n";
33 | 
34 |     public static final String currencyTableDDL = "CREATE TABLE currency (\n" +
35 |         "  country STRING,\n" +
36 |         "  currency STRING,\n" +
37 |         "  rate INT,\n" +
38 |         "  rowtime TIMESTAMP(3)" +
39 | //        ",WATERMARK FOR currency_time AS currency_time\n" +
40 |         ") WITH (\n" +
41 |         "  'connector' = 'kafka-0.10',\n" +
42 |         "  'topic' = 'flink_currency',\n" +
43 |         "  'properties.zookeeper.connect' = 'localhost:2181',\n" +
44 |         "  'properties.bootstrap.servers' = 'localhost:9092',\n" +
45 |         "  'properties.group.id' = 'testGroup',\n" +
46 |         "  'scan.startup.mode' = 'earliest-offset',\n" +
47 |         "  'format' = 'json'\n" +
48 |         ")";
49 | 
50 |     public static void main(String[] args) throws Exception {
51 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
52 |         env.setParallelism(1);
53 | 
54 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
55 |                 .useBlinkPlanner()
56 |                 .inStreamingMode()
57 |                 .build();
58 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
59 | 
60 |         tableEnvironment.executeSql(ordersTableDDL);
61 |         tableEnvironment.executeSql(currencyTableDDL);
62 | 
63 |         String querySQL =
64 |                 " select *  \n" +
65 |                 " from orders as o \n" +
66 |                 " join currency c\n" +
67 |                 " on o.currency = c.currency\n";
68 |         String querySQL2 = "SELECT *\n" +
69 |             "FROM currency AS r\n" +
70 |             "WHERE r.rowtime = (\n" +
71 |             "  SELECT MAX(rowtime)\n" +
72 |             "  FROM currency AS r2\n" +
73 |             "  WHERE r2.currency = r.currency\n" +
74 |             "  AND r2.rowtime <=  '10:58:00')";
75 | 
76 |         System.out.println(tableEnvironment.sqlQuery(querySQL2).explain());
77 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL2), Row.class).print();
78 |         env.execute();
79 |     }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/kafka2kafka/KafkaJson2Kafka.java:
--------------------------------------------------------------------------------
 1 | package kafka2kafka;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class KafkaJson2Kafka {
 9 | 
10 |     public static void main(String[] args) throws Exception {
11 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
12 |         env.setParallelism(1);
13 | 
14 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
15 |                 .useBlinkPlanner()
16 |                 .inStreamingMode()
17 |                 .build();
18 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
19 | 
20 |         String sourceTableDDL = "CREATE TABLE orders (\n" +
21 |                 "  order_id STRING,\n" +
22 |                 "  item    STRING,\n" +
23 |                 "  currency STRING,\n" +
24 |                 "  amount DOUBLE,\n" +
25 |                 "  order_time TIMESTAMP(3),\n" +
26 |                 "  proc_time as PROCTIME(),\n" +
27 |                 "  amount_kg as amount * 1000,\n" +
28 |                 "  ts as order_time + INTERVAL '1' SECOND,\n" +
29 |                 "  WATERMARK FOR order_time AS order_time" +
30 |                 ") WITH (\n" +
31 |                 "  'connector.type' = 'kafka',\n" +
32 |                 "  'connector.version' = '0.10',\n" +
33 |                 "  'connector.topic' = 'flink_orders',\n" +
34 |                 "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
35 |                 "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
36 |                 "  'connector.properties.group.id' = 'testGroup3',\n" +
37 |                 "  'connector.startup-mode' = 'earliest-offset',\n" +
38 |                 "  'format.type' = 'json',\n" +
39 |                 "  'format.derive-schema' = 'true'\n" +
40 |                 ")\n";
41 |         tableEnvironment.sqlUpdate(sourceTableDDL);
42 | 
43 |         String sinkTableDDL = "CREATE TABLE order_cnt (\n" +
44 |                 "  log_per_min TIMESTAMP(3),\n" +
45 |                 "  item STRING,\n" +
46 |                 "  order_cnt BIGINT,\n" +
47 |                 "  total_quality BIGINT\n" +
48 |                 ") WITH (\n" +
49 |                 "  'connector.type' = 'kafka',\n" +
50 |                 "  'connector.version' = '0.10',\n" +
51 |                 "  'connector.topic' = 'order_cnt',\n" +
52 |                 "  'update-mode' = 'append',\n" +
53 |                 "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
54 |                 "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
55 |                 "  'format.type' = 'json',\n" +
56 |                 "  'format.derive-schema' = 'true'\n" +
57 |                 ")";
58 |         tableEnvironment.sqlUpdate(sinkTableDDL);
59 | 
60 |         String querySQL = "insert into order_cnt \n" +
61 |                 "select TUMBLE_END(order_time, INTERVAL '10' SECOND),\n" +
62 |                 " item, COUNT(order_id) as order_cnt, CAST(sum(amount_kg) as BIGINT) as total_quality\n" +
63 |                 "from orders\n" +
64 |                 "group by item, TUMBLE(order_time, INTERVAL '10' SECOND)\n" ;
65 | 
66 |         tableEnvironment.sqlUpdate(querySQL);
67 |         System.out.println(sourceTableDDL);
68 |         System.out.println(sinkTableDDL);
69 |         System.out.println(querySQL);
70 | 
71 |         tableEnvironment.execute("StreamKafka2KafkaJob");
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/pge2e/PgCatalogTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package pge2e;
20 | 
21 | import org.apache.flink.connector.jdbc.catalog.JdbcCatalog;
22 | import org.apache.flink.table.api.EnvironmentSettings;
23 | import org.apache.flink.table.api.TableEnvironment;
24 | import org.apache.flink.table.catalog.ObjectPath;
25 | 
26 | import java.util.Arrays;
27 | 
28 | public class PgCatalogTest {
29 |     public static void main(String[] args) throws Exception {
30 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
31 |         TableEnvironment tableEnv = TableEnvironment.create(settings);
32 | 
33 |         String name            = "mypg";
34 |         String defaultDatabase = "mydb";
35 |         String username        = "postgres";
36 |         String password        = "postgres";
37 |         String baseUrl         = "jdbc:postgresql://localhost:5432/";
38 | 
39 |         JdbcCatalog catalog = new JdbcCatalog(name, defaultDatabase, username, password, baseUrl);
40 |         tableEnv.registerCatalog("mypg", catalog);
41 | 
42 |         // set the JdbcCatalog as the current catalog of the session
43 |         tableEnv.useCatalog("mypg");
44 | 
45 |         System.out.println(tableEnv.getCatalog("mypg").get().databaseExists("mydb"));
46 |         System.out.println(tableEnv.getCatalog("mypg").get().tableExists(new ObjectPath("mydb","public.primitive_arr_table")));
47 | 
48 |         Arrays.stream(tableEnv.listDatabases()).forEach(System.out::println);
49 | 
50 |         Arrays.stream(tableEnv.listTables()).forEach(System.out::println);
51 | 
52 |         tableEnv.executeSql("select * from `public.primitive_arr_table`").print();
53 | // true
54 | // true
55 | // postgres
56 | // mydb
57 | //bang.primitive_table
58 | //public.primitive_arr_table
59 | //public.primitive_serial_table
60 | //public.primitive_table
61 | //public.primitive_table2
62 | //public.simple_t1
63 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+
64 | //| row_kind |   int_arr |                      bytea_arr | short_arr |  long_arr |        real_arr | double_precision_arr |                 numeric_arr |            numeric_arr_default |          decimal_arr |         boolean_arr |  text_arr |  char_arr |   character_arr | character_varying_arr |                  timestamp_arr |                 date_arr |             time_arr |
65 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+
66 | //|       +I | [1, 2, 3] |  [[92, 120, 51, 50], [92, 1... | [3, 4, 5] | [4, 5, 6] | [5.5, 6.6, 7.7] |      [6.6, 7.7, 8.8] | [7.70000, 8.80000, 9.90000] |  [8.800000000000000000, 9.9... | [9.90, 10.10, 11.11] | [true, false, true] | [a, b, c] | [b, c, d] | [b  , c  , d  ] |             [b, c, d] |  [2016-06-22T19:10:25, 2019... | [2015-01-01, 2020-01-01] | [00:51:03, 00:59:03] |
67 | //+----------+-----------+--------------------------------+-----------+-----------+-----------------+----------------------+-----------------------------+--------------------------------+----------------------+---------------------+-----------+-----------+-----------------+-----------------------+--------------------------------+--------------------------+----------------------+
68 | //1 row in set
69 | //
70 | //Process finished with exit code 0
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue10.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
23 | 
24 | import java.sql.Timestamp;
25 | import java.time.Instant;
26 | 
27 | public class TestUserIssue10 {
28 |     public static void main(String[] args) throws Exception {
29 |         System.out.println(Timestamp.from(Instant.ofEpochMilli( 1593443236124L)));
30 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
32 |         tableEnvironment.executeSql("CREATE TABLE ods_foo (\n" +
33 |             "    id INT,\n" +
34 |             "    usera ARRAY<ROW<name STRING>>\n" +
35 |             ") WITH (" +
36 |             " 'connector.type' = 'filesystem',\n" +
37 |             " 'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test1.csv',\n" +
38 |             " 'format.type' = 'csv'" +
39 |             ")");
40 |         environment.execute();
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue11.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
23 | 
24 | import java.sql.Timestamp;
25 | import java.time.Instant;
26 | 
27 | public class TestUserIssue11 {
28 |     public static void main(String[] args) throws Exception {
29 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
30 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
31 |         tableEnvironment.executeSql("CREATE TABLE people (\n" +
32 |             "    user_name  STRING,\n" +
33 |             "    content STRING\n" +
34 |             ") WITH (\n" +
35 |             "    'connector' = 'filesystem',\n" +
36 |             "    'path'     = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/avro/UserAvro.avsc',\n" +
37 |             "    'format'    = 'avro',\n" +
38 |             "    'record-class'    = 'avro.Person',\n" +
39 |             "    'property-version'    = '1',\n" +
40 |             "    'properties.bootstrap.servers' = 'kafka:9092'\n" +
41 |             ")");
42 | 
43 |         System.out.println("CREATE TABLE people (\n" +
44 |             "    user_name  STRING,\n" +
45 |             "    content STRING\n" +
46 |             ") WITH (\n" +
47 |             "    'connector' = 'filesystem',\n" +
48 |             "    'path'     = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/avro/UserAvro.avsc',\n" +
49 |             "    'format'    = 'avro',\n" +
50 |             "    'record-class'    = 'avro.Person',\n" +
51 |             "    'property-version'    = '1',\n" +
52 |             "    'properties.bootstrap.servers' = 'kafka:9092'\n" +
53 |             ")");
54 | 
55 | 
56 |         tableEnvironment.executeSql("select * from people");
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue12.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.EnvironmentSettings;
23 | import org.apache.flink.table.api.Table;
24 | import org.apache.flink.table.api.TableResult;
25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
26 | 
27 | public class TestUserIssue12 {
28 |     public static void main(String[] args) throws Exception {
29 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
30 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
31 |         environment.setParallelism(1);
32 | 
33 |         tableEnvironment.executeSql("create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector' = 'filesystem',\n" +
34 |             " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',\n" +
35 |             " 'format' = 'csv')");
36 |         tableEnvironment.executeSql("CREATE TABLE es_table (\n" +
37 |             "  aggId varchar ,\n" +
38 |             "  pageId varchar ,\n" +
39 |             "  ts varchar ,\n" +
40 |             "  expoCnt int ,\n" +
41 |             "  clkCnt int\n" +
42 |             ") WITH (\n" +
43 |             "'connector' = 'elasticsearch-6',\n" +
44 |             "'hosts' = 'http://localhost:9200',\n" +
45 |             "'index' = 'usercase111',\n" +
46 |             "'document-type' = '_doc',\n" +
47 |             "'document-id.key-delimiter' = '$',\n" +
48 |             "'sink.bulk-flush.interval' = '1000',\n" +
49 |             "'format' = 'json'\n" +
50 |             ")");
51 |         Table res = tableEnvironment.sqlQuery(" SELECT  pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv");
52 |         TableResult tableResult = res.executeInsert("es_table");
53 |         tableResult.getJobClient().get();
54 | 
55 | 
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue13.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
23 | import org.apache.flink.table.api.Table;
24 | import org.apache.flink.table.api.TableResult;
25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
26 | import org.apache.flink.types.Row;
27 | import org.apache.flink.util.CloseableIterator;
28 | 
29 | import static org.apache.flink.configuration.CheckpointingOptions.CHECKPOINTS_DIRECTORY;
30 | 
31 | public class TestUserIssue13 {
32 |     public static void main(String[] args) throws Exception {
33 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
34 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
35 |         environment.setParallelism(1);
36 | 
37 |         tableEnvironment.executeSql("create table jsonT ( " +
38 |             "        `monitorId` STRING,\n" +
39 |             "        `deviceId` STRING,\n" +
40 |             "        `state` INT,\n" +
41 |             "        `time_st` TIMESTAMP(3),\n" +
42 |             "        WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" +
43 |             "        `data` DOUBLE) with ( 'connector' = 'filesystem',\n" +
44 |             "       'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" +
45 |             "       'format' = 'json')");
46 |         System.out.println("create table jsonT ( " +
47 |             "        `monitorId` STRING,\n" +
48 |             "        `deviceId` STRING,\n" +
49 |             "        `state` INT,\n" +
50 |             "        `time_st` TIMESTAMP(3),\n" +
51 |             "        WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" +
52 |             "        `data` DOUBLE) with ( 'connector' = 'filesystem',\n" +
53 |             "       'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" +
54 |             "       'format' = 'json')");
55 |         CloseableIterator<Row> tableResult = tableEnvironment.executeSql(" SELECT  * from jsonT").collect();
56 |         while(tableResult.hasNext()) {
57 |             System.out.println(tableResult.next());
58 |         }
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue14.java:
--------------------------------------------------------------------------------
 1 | ///*
 2 | // * Licensed to the Apache Software Foundation (ASF) under one
 3 | // * or more contributor license agreements.  See the NOTICE file
 4 | // * distributed with this work for additional information
 5 | // * regarding copyright ownership.  The ASF licenses this file
 6 | // * to you under the Apache License, Version 2.0 (the
 7 | // * "License"); you may not use this file except in compliance
 8 | // * with the License.  You may obtain a copy of the License at
 9 | // *
10 | // *     http://www.apache.org/licenses/LICENSE-2.0
11 | // *
12 | // * Unless required by applicable law or agreed to in writing, software
13 | // * distributed under the License is distributed on an "AS IS" BASIS,
14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | // * See the License for the specific language governing permissions and
16 | // * limitations under the License.
17 | // */
18 | //
19 | //package usercase;
20 | //
21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | //import org.apache.flink.table.api.DataTypes;
23 | //import org.apache.flink.table.api.EnvironmentSettings;
24 | //import org.apache.flink.table.api.Table;
25 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
26 | //import org.apache.flink.types.Row;
27 | //
28 | //
29 | //public class TestUserIssue14 {
30 | //    public static void main(String[] args) throws Exception {
31 | //        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
32 | //        EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
33 | //        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings);
34 | //        env.setParallelism(1);
35 | //
36 | //        final Table inputTable = tableEnv.fromValues(//
37 | //            DataTypes.ROW(//
38 | //                DataTypes.FIELD("col1", DataTypes.STRING()), //
39 | //                DataTypes.FIELD("col2", DataTypes.STRING())//
40 | //            ), //
41 | //            Row.of(1L, "Hello"), //
42 | //            Row.of(2L, "Hello"), //
43 | //            Row.of(3L, ""), //
44 | //            Row.of(4L, "Ciao"));
45 | //        tableEnv.createTemporaryView("ParquetDataset", inputTable);
46 | //        tableEnv.executeSql(//
47 | //            "CREATE TABLE `out` (\n" + //
48 | //                "col1 STRING,\n" + //
49 | //                "col2 STRING\n" + //
50 | //                ") WITH (\n" + //
51 | //                " 'connector' = 'filesystem',\n" + //
52 | //                " 'format' = 'parquet',\n" + //
53 | //                " 'path' = 'file:///Users/bang/test',\n" + //
54 | //                " 'sink.shuffle-by-partition.enable' = 'true'\n" + //
55 | //                ")");
56 | //
57 | //        tableEnv.executeSql("INSERT INTO `out` SELECT * FROM ParquetDataset").getJobClient()
58 | //        .get().getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();
59 | //    }
60 | //}
61 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue15.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.DataTypes;
23 | import org.apache.flink.table.api.EnvironmentSettings;
24 | import org.apache.flink.table.api.Table;
25 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
26 | import org.apache.flink.types.Row;
27 | import org.apache.flink.util.CloseableIterator;
28 | 
29 | 
30 | public class TestUserIssue15 {
31 |     public static void main(String[] args) throws Exception {
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
34 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings);
35 |         env.setParallelism(1);
36 | 
37 |         tableEnv.executeSql("CREATE TABLE test (\n" +
38 |             "store_id INT,\n" +
39 |             "store_type VARCHAR,\n" +
40 |             "region_id INT,\n" +
41 |             "store_name VARCHAR,\n" +
42 |             "store_number INT,\n" +
43 |             "store_street_address VARCHAR,\n" +
44 |             "store_city VARCHAR,\n" +
45 |             "store_state VARCHAR,\n" +
46 |             "store_postal_code VARCHAR,\n" +
47 |             "store_country VARCHAR,\n" +
48 |             "store_manager VARCHAR,\n" +
49 |             "store_phone VARCHAR,\n" +
50 |             "store_fax VARCHAR,\n" +
51 |             "first_opened_date TIMESTAMP,\n" +
52 |             "last_remodel_date DATE,\n" +
53 |             "store_sqft INT,\n" +
54 |             "grocery_sqft INT,\n" +
55 |             "frozen_sqft INT,\n" +
56 |             "meat_sqft INT,\n" +
57 |             "coffee_bar BOOLEAN,\n" +
58 |             "video_store BOOLEAN,\n" +
59 |             "salad_bar BOOLEAN,\n" +
60 |             "prepared_food BOOLEAN,\n" +
61 |             "florist BOOLEAN" +
62 |             ") WITH (" +
63 |             " 'connector' = 'filesystem',\n" +
64 |             " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test15.csv',\n" +
65 |             " 'format' = 'csv'," +
66 |             "  'csv.field-delimiter' = '|'," +
67 |             "  'csv.null-literal'=''" +
68 |             ")");
69 |         CloseableIterator<Row> it = tableEnv.executeSql(" SELECT * FROM test").collect();
70 | 
71 |         while(it.hasNext()) {
72 |             System.out.println(it.next());
73 |         }
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue16.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
23 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
24 | import org.apache.flink.types.Row;
25 | import org.apache.flink.util.CloseableIterator;
26 | 
27 | import static org.apache.flink.configuration.CheckpointingOptions.CHECKPOINTS_DIRECTORY;
28 | 
29 | public class TestUserIssue16 {
30 |     public static void main(String[] args) throws Exception {
31 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
32 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
33 |         environment.setParallelism(1);
34 | 
35 |         tableEnvironment.executeSql("create table jsonT ( " +
36 |             "        `monitorId` STRING,\n" +
37 |             "        `deviceId` STRING,\n" +
38 |             "        `state` INT,\n" +
39 |             "        `time_st` TIMESTAMP(3),\n" +
40 |             "         WATERMARK FOR time_st AS time_st - INTERVAL '2' SECOND,\n" +
41 |             "        `data` DOUBLE) with ( " +
42 |             "        'connector' = 'filesystem',\n" +
43 |             "        'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user4.json',\n" +
44 |             "        'format' = 'json')");
45 |         CloseableIterator<Row> tableResult = tableEnvironment.executeSql(" SELECT  * from jsonT " +
46 |             "where deviceId LIKE '%양현마을%' ").collect();
47 |         while(tableResult.hasNext()) {
48 |             System.out.println(tableResult.next());
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue17.java:
--------------------------------------------------------------------------------
 1 | ///*
 2 | // * Licensed to the Apache Software Foundation (ASF) under one
 3 | // * or more contributor license agreements.  See the NOTICE file
 4 | // * distributed with this work for additional information
 5 | // * regarding copyright ownership.  The ASF licenses this file
 6 | // * to you under the Apache License, Version 2.0 (the
 7 | // * "License"); you may not use this file except in compliance
 8 | // * with the License.  You may obtain a copy of the License at
 9 | // *
10 | // *     http://www.apache.org/licenses/LICENSE-2.0
11 | // *
12 | // * Unless required by applicable law or agreed to in writing, software
13 | // * distributed under the License is distributed on an "AS IS" BASIS,
14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | // * See the License for the specific language governing permissions and
16 | // * limitations under the License.
17 | // */
18 | //
19 | //package usercase;
20 | //
21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | //import org.apache.flink.table.api.TableResult;
23 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
24 | //import org.apache.flink.table.catalog.DataTypeFactory;
25 | //import org.apache.flink.table.functions.ScalarFunction;
26 | //import org.apache.flink.table.types.inference.TypeInference;
27 | //
28 | //import java.sql.Date;
29 | //import java.time.LocalDate;
30 | //
31 | //public class TestUserIssue17 {
32 | //    public static void main(String[] args) throws Exception {
33 | //        StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
34 | //        StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
35 | //        environment.setParallelism(1);
36 | //
37 | //
38 | //        tableEnvironment.executeSql("CREATE TABLE orders (\n" +
39 | //            "  order_number INT,\n" +
40 | //            "  order_date INT NULL,\n" +
41 | //            "  purchaser INT,\n" +
42 | //            "  quantity INT,\n" +
43 | //            "  product_id INT\n" +
44 | //            " ) WITH (\n" +
45 | //            "  'connector' = 'kafka',\n" +
46 | //            "  'topic' = 'dbserver1.inventory.orders',\n" +
47 | //            "  'scan.startup.mode' = 'earliest-offset',\n" +
48 | //            "  'properties.bootstrap.servers' = 'localhost:9092',\n" +
49 | //            "  'properties.group.id' = 'xxtestgroup1',\n" +
50 | //            "  'format' = 'debezium-json',\n" +
51 | //            "  'debezium-json.schema-include' = 'true' " +
52 | //            " )");
53 | //        tableEnvironment.executeSql("create table orders1 ( " +
54 | //            "  order_number INT,\n" +
55 | //            "  order_date date,\n" +
56 | //            "  purchaser INT,\n" +
57 | //            "  quantity INT,\n" +
58 | //            "  product_id INT," +
59 | //            "  PRIMARY KEY(order_number) NOT ENFORCED\n" +
60 | //            ") with ( " +
61 | //            "  'connector' = 'jdbc',\n" +
62 | //            "    'url' = 'jdbc:mysql://localhost:3306/inventory',\n" +
63 | //            "    'username' = 'mysqluser',\n" +
64 | //            "    'password' = 'mysqlpw',\n" +
65 | //            "    'table-name' = 'orders2',\n" +
66 | //            "    'driver' = 'com.mysql.jdbc.Driver')");
67 | //        tableEnvironment.registerFunction("int2Date", new Int2DateFunc());
68 | //        TableResult result = tableEnvironment.executeSql("insert into orders1 SELECT  order_number, int2Date(order_date),purchaser,quantity,product_id from orders ");
69 | //        result.getJobClient().get()
70 | //            .getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();
71 | //    }
72 | //
73 | //    public static class Int2DateFunc extends ScalarFunction {
74 | //
75 | //        public Date eval(int epochDay) {
76 | //            return Date.valueOf(LocalDate.ofEpochDay(epochDay));
77 | //        }
78 | //
79 | //        @Override
80 | //        public TypeInference getTypeInference(DataTypeFactory typeFactory) {
81 | //            return super.getTypeInference(typeFactory);
82 | //        }
83 | //    }
84 | //}
85 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue18.java:
--------------------------------------------------------------------------------
 1 | ///*
 2 | // * Licensed to the Apache Software Foundation (ASF) under one
 3 | // * or more contributor license agreements.  See the NOTICE file
 4 | // * distributed with this work for additional information
 5 | // * regarding copyright ownership.  The ASF licenses this file
 6 | // * to you under the Apache License, Version 2.0 (the
 7 | // * "License"); you may not use this file except in compliance
 8 | // * with the License.  You may obtain a copy of the License at
 9 | // *
10 | // *     http://www.apache.org/licenses/LICENSE-2.0
11 | // *
12 | // * Unless required by applicable law or agreed to in writing, software
13 | // * distributed under the License is distributed on an "AS IS" BASIS,
14 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | // * See the License for the specific language governing permissions and
16 | // * limitations under the License.
17 | // */
18 | //
19 | //package usercase;
20 | //
21 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | //import org.apache.flink.table.api.StatementSet;
23 | //import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
24 | //import org.apache.flink.table.catalog.DataTypeFactory;
25 | //import org.apache.flink.table.functions.ScalarFunction;
26 | //import org.apache.flink.table.types.inference.TypeInference;
27 | //
28 | //import java.sql.Date;
29 | //import java.time.LocalDate;
30 | //
31 | //public class TestUserIssue18 {
32 | //    public static void main(String[] args) throws Exception {
33 | //        StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
34 | //        StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
35 | //        environment.setParallelism(1);
36 | //
37 | //
38 | //        tableEnvironment.executeSql("create table online_example (\n" +
39 | //            "    face_id varchar,\n" +
40 | //            "    device_id varchar,\n" +
41 | //            "    feature_data double\n" +
42 | //            ") with (\n" +
43 | //            "    'connector' = 'kafka',\n" +
44 | //            "    'topic' = 'json-test-2',\n" +
45 | //            "    'properties.bootstrap.servers' = 'localhost:9092',\n" +
46 | //            "    'properties.group.id' = 'read_example',\n" +
47 | //            "    'format' = 'csv',\n" +
48 | //            "    'csv.field-delimiter' = ' '," +
49 | //            "    'scan.startup.mode' = 'earliest-offset'                 \n" +
50 | //            ")");
51 | //        tableEnvironment.executeSql("create table write_example (\n" +
52 | //            "     face_id varchar,\n" +
53 | //            "     device_id varchar " +
54 | //            " ) with (\n" +
55 | //            "     'connector' = 'kafka',\n" +
56 | //            "     'topic' = 'tianchi_write_example-3',\n" +
57 | //            "     'properties.bootstrap.servers' = 'localhost:9092',\n" +
58 | //            "     'properties.group.id' = 'write_example',\n" +
59 | //            "     'format' = 'csv',\n" +
60 | //            "     'scan.startup.mode' = 'earliest-offset'\n" +
61 | //            " )");
62 | //
63 | //        StatementSet statementSet = tableEnvironment.createStatementSet();
64 | //        statementSet.addInsertSql("insert into write_example SELECT  face_id, device_id from online_example");
65 | //
66 | //        statementSet.execute().getJobClient().get()
67 | //       .getJobExecutionResult(Thread.currentThread().getContextClassLoader()).get();
68 | //    }
69 | //
70 | //    public static class Int2DateFunc extends ScalarFunction {
71 | //
72 | //        public Date eval(int epochDay) {
73 | //            return Date.valueOf(LocalDate.ofEpochDay(epochDay));
74 | //        }
75 | //
76 | //        @Override
77 | //        public TypeInference getTypeInference(DataTypeFactory typeFactory) {
78 | //            return super.getTypeInference(typeFactory);
79 | //        }
80 | //    }
81 | //}
82 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue19.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
23 | import org.apache.flink.table.catalog.DataTypeFactory;
24 | import org.apache.flink.table.functions.ScalarFunction;
25 | import org.apache.flink.table.types.inference.TypeInference;
26 | 
27 | import java.sql.Date;
28 | import java.time.LocalDate;
29 | 
30 | public class TestUserIssue19 {
31 |     public static void main(String[] args) throws Exception {
32 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
34 |         environment.setParallelism(1);
35 |         environment.enableCheckpointing(200);
36 | 
37 |         tableEnvironment.executeSql("create table test_tbl ( " +
38 |             "        `monitorId` STRING,\n" +
39 |             "        `deviceId` STRING,\n" +
40 |             "        `state` DOUBLE ) with ( " +
41 |             "        'connector' = 'filesystem',\n" +
42 |             "        'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user19.json',\n" +
43 |             "        'format' = 'json')");
44 |         //tableEnvironment.executeSql("select  SPLIT_INDEX(deviceId, ';', 0) from test_tbl").print();
45 |         tableEnvironment.executeSql("select  SPLIT_INDEX(deviceId, U&'\\003B', 0) from test_tbl").print();
46 | 
47 |     }
48 | 
49 |     public static class Int2DateFunc extends ScalarFunction {
50 | 
51 |         public Date eval(int epochDay) {
52 |             return Date.valueOf(LocalDate.ofEpochDay(epochDay));
53 |         }
54 | 
55 |         @Override
56 |         public TypeInference getTypeInference(DataTypeFactory typeFactory) {
57 |             return super.getTypeInference(typeFactory);
58 |         }
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue2.java:
--------------------------------------------------------------------------------
 1 | package usercase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestUserIssue2 {
 9 |     private static String  kafkaOrdersDDL =  "CREATE TABLE user_log (\n" +
10 |         "  order_id STRING,\n" +
11 |         "  item    STRING,\n" +
12 |         "  currency STRING,\n" +
13 |         "  amount INT,\n" +
14 |         "  order_time TIMESTAMP(3),\n" +
15 |         "  rowtime as order_time,\n" +
16 |         "  amount_kg as amount * 1000,\n" +
17 |         "  WATERMARK FOR rowtime AS rowtime\n" +
18 |         ") WITH (\n" +
19 |         "  'connector.type' = 'kafka',\n" +
20 |         "  'connector.version' = '0.10',\n" +
21 |         "  'connector.topic' = 'flink_orders3',\n" +
22 |         "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
23 |         "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
24 |         "  'connector.properties.group.id' = 'testGroup4',\n" +
25 |         "  'connector.startup-mode' = 'earliest-offset',\n" +
26 |         "  'format.type' = 'json',\n" +
27 |         "  'format.derive-schema' = 'true'\n" +
28 |         ")\n";
29 | 
30 |     private static String mysqlSinkDDL = "CREATE TABLE test_mysql_2 (\n" +
31 |         "vid string,\n" +
32 |         "rss BIGINT,\n" +
33 |         "start_time string\n" +
34 |         ") with ( \n" +
35 |         "   'connector.type' = 'jdbc',\n" +
36 |         "   'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" +
37 |         "   'connector.username' = 'root'," +
38 |         "   'connector.table' = 'task_flink_table_3',\n" +
39 |         "   'connector.write.flush.max-rows' = '100'\n" +
40 |         ")";
41 | 
42 |     private static String query = "INSERT INTO test_mysql_2\n" +
43 |         " SELECT order_id,rss, start_time FROM(" +
44 |         " SELECT order_id,rss, start_time FROM (\n" +
45 |         "  SELECT  order_id,rss, start_time,\n" +
46 |         "    ROW_NUMBER() OVER (PARTITION BY start_time ORDER BY rss desc) AS rownum\n" +
47 |         "  FROM (\n" +
48 |         "   SELECT order_id,\n" +
49 |         "DATE_FORMAT(TUMBLE_START(rowtime, INTERVAL '5' MINUTE),'yyyy-MM-dd HH:00') AS start_time,\n" +
50 |         "SUM(amount) AS rss\n" +
51 |         "FROM user_log\n" +
52 |         "GROUP BY order_id, TUMBLE(rowtime, INTERVAL '5' MINUTE)\n" +
53 |         "  )\n" +
54 |         ")\n" +
55 |         "WHERE rownum <= 10"
56 |     +") group by order_id,rss, start_time\n";
57 | 
58 |     public static void main(String[] args) throws Exception {
59 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
60 |         env.setParallelism(1);
61 | 
62 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
63 |                 .useBlinkPlanner()
64 |                 .inStreamingMode()
65 |                 .build();
66 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
67 | 
68 |         System.out.println(query);
69 |         tableEnvironment.sqlUpdate(kafkaOrdersDDL);
70 |         tableEnvironment.sqlUpdate(mysqlSinkDDL);
71 |         tableEnvironment.sqlUpdate(query);
72 | //
73 | //        //check the plan
74 | //        System.out.println(tableEnvironment.explain(tableEnvironment.sqlQuery(query)));
75 | 
76 | //        tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(query), Row.class).print();
77 |         tableEnvironment.execute("reproduce_user_issue");
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue20.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
23 | import org.apache.flink.table.catalog.DataTypeFactory;
24 | import org.apache.flink.table.functions.ScalarFunction;
25 | import org.apache.flink.table.types.inference.TypeInference;
26 | 
27 | import java.sql.Date;
28 | import java.time.LocalDate;
29 | 
30 | public class TestUserIssue20 {
31 |     public static void main(String[] args) throws Exception {
32 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
34 |         environment.setParallelism(1);
35 |         environment.enableCheckpointing(200);
36 | 
37 |         tableEnvironment.executeSql("CREATE TABLE es_table (\n" +
38 |             "  aggId varchar ,\n" +
39 |             "  pageId varchar ,\n" +
40 |             "  ts varchar ,\n" +
41 |             "  expoCnt int ,\n" +
42 |             "  clkCnt int\n" +
43 |             ") WITH (\n" +
44 |             "'connector' = 'elasticsearch-6',\n" +
45 |             "'hosts' = 'http://localhost:9200',\n" +
46 |             "'index' = 'usercase13',\n" +
47 |             "'document-type' = '_doc',\n" +
48 |             "'document-id.key-delimiter' = '$',\n" +
49 |             "'sink.bulk-flush.interval' = '1000',\n" +
50 |             "'format' = 'json'\n" +
51 |             ")");
52 |         //tableEnvironment.executeSql("select  SPLIT_INDEX(deviceId, ';', 0) from test_tbl").print();
53 |         tableEnvironment.executeSql("select  * from es_table").print();
54 | 
55 |     }
56 | 
57 |     public static class Int2DateFunc extends ScalarFunction {
58 | 
59 |         public Date eval(int epochDay) {
60 |             return Date.valueOf(LocalDate.ofEpochDay(epochDay));
61 |         }
62 | 
63 |         @Override
64 |         public TypeInference getTypeInference(DataTypeFactory typeFactory) {
65 |             return super.getTypeInference(typeFactory);
66 |         }
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue21.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.Table;
23 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
24 | import org.apache.flink.table.catalog.Catalog;
25 | 
26 | import java.util.Arrays;
27 | 
28 | public class TestUserIssue21 {
29 |     public static void main(String[] args) throws Exception {
30 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
32 | 
33 |         String inTablePath = "CREATE TABLE datagen (  " +
34 |             " id INT,  " +
35 |             " total string,  " +
36 |             " ts AS localtimestamp,  " +
37 |             " WATERMARK FOR ts AS ts  " +
38 |             ") WITH (  " +
39 |             " 'connector' = 'datagen',  " +
40 |             " 'rows-per-second'='5',  " +
41 |             " 'fields.id.min'='1',  " +
42 |             " 'fields.id.max'='10',  " +
43 |             " 'fields.total.length'='10'  " +
44 |             ")";
45 |         // tableEnvironment
46 |         tableEnvironment.executeSql(inTablePath);
47 | 
48 |         Table table = tableEnvironment.sqlQuery("select id, total, 12 as col_1 from datagen");
49 |         tableEnvironment.createTemporaryView("table1", table);
50 |         Arrays.stream(tableEnvironment.listTables()).forEach(t -> System.out.println(t));
51 | 
52 |         Catalog catalog = tableEnvironment.getCatalog(tableEnvironment.getCurrentCatalog()).get();
53 |         catalog.listTables(tableEnvironment.getCurrentDatabase()).stream().forEach(t -> System.out.println(t));
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue3.java:
--------------------------------------------------------------------------------
 1 | package usercase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestUserIssue3 {
 9 | 
10 |     private static String kafkaSourceDDL = "create table json_table(" +
11 |         " w_es BIGINT," +
12 |         " w_type STRING," +
13 |         " w_isDdl BOOLEAN," +
14 |         " w_data ARRAY<ROW<pay_info STRING, online_fee DOUBLE, sign STRING, account_pay_fee DOUBLE>>," +
15 |         " w_ts TIMESTAMP(3)," +
16 |         " w_table STRING" +
17 |         ") WITH (\n" +
18 |         "  'connector.type' = 'kafka',\n" +
19 |         "  'connector.version' = '0.10',\n" +
20 |         "  'connector.topic' = 'json-test1',\n" +
21 |         "  'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
22 |         "  'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
23 |         "  'connector.properties.group.id' = 'test-jdb',\n" +
24 |         "  'connector.startup-mode' = 'earliest-offset',\n" +
25 |         "  'format.type' = 'json',\n" +
26 |         "  'format.derive-schema' = 'true'\n" +
27 |         ")\n";
28 |     private static String csvSinkDDL = "create table csv(" +
29 |         " w_ts TIMESTAMP(3)," +
30 |         " city_id VARCHAR," +
31 |         " pay_info VARCHAR," +
32 |         " w_type STRING" +
33 |         ") with (" +
34 |         " 'connector.type' = 'filesystem',\n" +
35 |         " 'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test_codegen.csv',\n" +
36 |         " 'format.type' = 'csv')";
37 |     public static void main(String[] args) throws Exception {
38 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
39 |         env.setParallelism(1);
40 | 
41 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
42 |                 .useBlinkPlanner()
43 |                 .inStreamingMode()
44 |                 .build();
45 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
46 |         tableEnvironment.executeSql(kafkaSourceDDL);
47 |         tableEnvironment.executeSql(csvSinkDDL);
48 |         String querySQL = "insert into csv select w_ts," +
49 |             " 'test' as city_id, " +
50 |             " w_data[cast(w_es/1000 as INT) - 1589870637 + 1].pay_info," +
51 |             " w_type " +
52 |             "from json_table";
53 | 
54 |         tableEnvironment.sqlUpdate(querySQL);
55 |         tableEnvironment.execute("test");
56 | //        tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
57 | //         tableEnvironment.execute("reproduce_user_issue");
58 |     }
59 | 
60 |     //@Test
61 |     //	public void testArray() throws Exception {
62 |     //		String jsonStr = "{" +
63 |     //			"\"w_es\":1589870637000," +
64 |     //			"\"w_type\":\"INSERT\"," +
65 |     //			"\"w_isDdl\":false," +
66 |     //			"\"w_data\":[" +
67 |     //			"{\"pay_info\":\"channelId=82&onlineFee=89.0&outTradeNo=0&payId=0&payType=02&rechargeId=4&totalFee=89.0&tradeStatus=success&userId=32590183789575&sign=00\"," +
68 |     //			"\"online_fee\":\"89.0\"," +
69 |     //			"\"sign\":\"00\"," +
70 |     //			"\"account_pay_fee\":\"0.0\"}]," +
71 |     //			"\"w_ts\":\"2020-05-20T13:58:37.131Z\"," +
72 |     //			"\"w_table\":\"cccc111\"}";
73 |     //		System.out.println(jsonStr);
74 |     //		DataType rowType = ROW(
75 |     //			FIELD("w_es", DataTypes.BIGINT()),
76 |     //			FIELD("w_type", DataTypes.STRING()),
77 |     //			FIELD("w_isDdl", DataTypes.BOOLEAN()),
78 |     //			FIELD("w_data", ARRAY(ROW(
79 |     //				FIELD("pay_info", DataTypes.STRING()),
80 |     //				FIELD("online_fee", DataTypes.DECIMAL(38, 4)),
81 |     //				FIELD("sign", DataTypes.STRING()),
82 |     //				FIELD("account_pay_fee", DataTypes.DECIMAL(38, 4))
83 |     //			))),
84 |     //			FIELD("w_ts", DataTypes.TIMESTAMP()),
85 |     //			FIELD("w_table", DataTypes.STRING()));
86 |     //		JsonRowDeserializationSchema deserializationSchema = new JsonRowDeserializationSchema.Builder(
87 |     //			(TypeInformation<Row>) TypeConversions.fromDataTypeToLegacyInfo(rowType))
88 |     //			.build();
89 |     //		Row row = deserializationSchema.deserialize(jsonStr.getBytes());
90 |     //		System.out.println(row);
91 |     //	}
92 | }
93 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue4.java:
--------------------------------------------------------------------------------
 1 | package usercase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class TestUserIssue4 {
 9 | 
10 |     private static String mysqlTable = "create table tb(id string, cooper bigint, user_sex string) with(\n" +
11 |         "    'connector.type' = 'jdbc',\n" +
12 |         "    'connector.url' = 'jdbc:mysql://localhost:3306/test',\n" +
13 |         "    'connector.username' = 'root',\n" +
14 |         "    'connector.table' = 'tb'\n" +
15 |         ")";
16 |     public static void main(String[] args) throws Exception {
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 |         env.setParallelism(1);
19 | 
20 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
21 |                 .useBlinkPlanner()
22 |                 .inStreamingMode()
23 |                 .build();
24 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
25 |         tableEnvironment.sqlUpdate(mysqlTable);
26 |         String querySQL = "select id, cooper from tb";
27 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
28 | 
29 |          tableEnvironment.execute("reproduce_user_issue");
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue5.java:
--------------------------------------------------------------------------------
 1 | package usercase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | import java.net.URL;
 9 | import java.net.URLClassLoader;
10 | 
11 | public class TestUserIssue5 {
12 | 
13 |     private static String hbaseSourceDDL = "CREATE TABLE country (\n" +
14 |             "  rowkey VARCHAR,\n" +
15 |             "  f1 ROW<country_id INT, country_name VARCHAR, country_name_cn VARCHAR, currency VARCHAR, region_name VARCHAR> \n" +
16 |             " " +
17 |             ") WITH (\n" +
18 |             "    'connector.type' = 'hbase',\n" +
19 |             "    'connector.version' = '1.4.3',\n" +
20 |             "    'connector.table-name' = 'country',\n" +
21 |             "    'connector.zookeeper.quorum' = 'localhost:2182',\n" +
22 |             "    'connector.zookeeper.znode.parent' = '/hbase' " +
23 |             ")";
24 |     public static void main(String[] args) throws Exception {
25 | 
26 |         ClassLoader cl = ClassLoader.getSystemClassLoader();
27 | 
28 |         URL[] urls = ((URLClassLoader)cl).getURLs();
29 | 
30 |         for(URL url: urls){
31 |             System.out.println(url.getFile());
32 |         }
33 | 
34 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 |         env.setParallelism(1);
36 | 
37 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
38 |             .useBlinkPlanner()
39 |             .inStreamingMode()
40 |             .build();
41 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
42 |         tableEnvironment.sqlUpdate(hbaseSourceDDL);
43 | 
44 |         String querySQL = "select * from country\n";
45 | 
46 |         tableEnvironment.toAppendStream(tableEnvironment.sqlQuery(querySQL), Row.class).print();
47 | 
48 |         tableEnvironment.execute("read_hbase_sql");
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue6.java:
--------------------------------------------------------------------------------
 1 | package usercase;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 6 | import org.apache.flink.table.descriptors.Json;
 7 | import org.apache.flink.table.descriptors.Kafka;
 8 | import org.apache.flink.table.descriptors.Schema;
 9 | import org.apache.flink.types.Row;
10 | 
11 | import static org.apache.flink.table.api.DataTypes.FIELD;
12 | import static org.apache.flink.table.api.DataTypes.INT;
13 | import static org.apache.flink.table.api.DataTypes.ROW;
14 | import static org.apache.flink.table.api.DataTypes.STRING;
15 | 
16 | public class TestUserIssue6 {
17 |     public static void main(String[] args) throws Exception {
18 |         System.out.println("\u65E0");
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 |         env.setParallelism(1);
21 | 
22 |         EnvironmentSettings envSettings = EnvironmentSettings.newInstance()
23 |             .useBlinkPlanner()
24 |             .inStreamingMode()
25 |             .build();
26 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env, envSettings);
27 | 
28 |         tableEnvironment.connect(
29 |             new Kafka()
30 |                 .topic("test-json")
31 |                 .version("0.10")
32 |                 .property("bootstrap.servers", "localhost:9092")
33 |                 .property("zookeeper.connect", "localhost:2181")
34 |                 .property("group.id", "testGroup"))
35 |             .withFormat(new Json())
36 |             .withSchema(new Schema()
37 |                 .field("general",STRING())
38 |                  .field("data", ROW(
39 |                     FIELD("reference_id", STRING()),
40 |                     FIELD("transaction_type", INT()),
41 |                     FIELD("merchant_id", INT()),
42 |                     FIELD("status", INT()),
43 |                     FIELD("create_time", INT())
44 |                     )
45 |                 )
46 |             )
47 |             .createTemporaryTable("KafkaSource");
48 |             tableEnvironment.toAppendStream(tableEnvironment.sqlQuery("select general, reference_id, data.reference_id from KafkaSource"), Row.class)
49 |             .print();
50 |            tableEnvironment.execute("case6");
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue8.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
23 | 
24 | public class TestUserIssue8 {
25 |     public static void main(String[] args) throws Exception {
26 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
27 |         StreamTableEnvironment tEnv = StreamTableEnvironment.create(environment);
28 |         tEnv.sqlQuery("DESCRIBE fact_table");
29 |         tEnv.execute("");
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/etl-job/src/main/java/usercase/TestUserIssue9.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package usercase;
20 | 
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 | import org.apache.flink.table.api.Table;
23 | import org.apache.flink.table.api.Tumble;
24 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
25 | import org.apache.flink.table.descriptors.Json;
26 | import org.apache.flink.table.descriptors.Kafka;
27 | import org.apache.flink.table.descriptors.Rowtime;
28 | import org.apache.flink.table.descriptors.Schema;
29 | import org.apache.flink.types.Row;
30 | 
31 | import java.sql.Timestamp;
32 | import java.time.Instant;
33 | 
34 | public class TestUserIssue9 {
35 |     public static void main(String[] args) throws Exception {
36 |         //2020-06-29 21:12:04.471
37 |         //2020-06-29 23:07:01.1245406
38 |         System.out.println(Timestamp.from(Instant.ofEpochMilli( 1593443236124L)));
39 | //        StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
40 | //        StreamTableEnvironment tEnv = StreamTableEnvironment.create(environment);
41 | //        tEnv.connect(new Kafka()
42 | //            .version("0.10")
43 | //            .topic("jes_topic_evtime")
44 | //            .property("zookeeper.connect", "localhost:2181")
45 | //            .property("bootstrap.servers", "localhost:9092")
46 | //            .property("group.id", "grp1")
47 | //            .startFromEarliest()
48 | //        ).withFormat(new Json()
49 | //            .failOnMissingField(false).deriveSchema())
50 | //            .withSchema(new Schema()
51 | //                .field("acct", "STRING")
52 | //                .field("evtime", "LONG")
53 | //                .field("logictime","TIMESTAMP(3)")
54 | //                .rowtime(new Rowtime().timestampsFromField("evtime").watermarksPeriodicBounded(5000)))
55 | //            .inAppendMode().createTemporaryTable("testTableName");
56 | //
57 | //
58 | //
59 | //        Table testTab = tEnv.sqlQuery("SELECT acct, evtime, logictime FROM testTableName")
60 | //            .window(Tumble.over("5.seconds").on("logictime").as("w1"))
61 | //            .groupBy("w1, acct")
62 | //            .select("w1.rowtime, acctno");
63 | //
64 | //        tEnv.toRetractStream(testTab, Row.class).print();
65 | //        environment.execute();
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-scripts/kafak2kafka_etl_run.sh:
--------------------------------------------------------------------------------
 1 | # start kafka cluster
 2 | cd /Users/bang/kafka_2.11-0.10.2.0
 3 | ./bin/zookeeper-server-start.sh -daemon ./config/zookeeper.properties
 4 | ./bin/kafka-server-start.sh -daemon ./config/server.properties
 5 | #optional
 6 | ./bin/kafka-topics.sh --create --topic json-test --zookeeper localhost:2181 --partitions 1 --replication-factor 1
 7 | ./bin/kafka-console-producer.sh --topic json-test --broker-list localhost:9092
 8 | 
 9 | ## relate command
10 | cd ~/confluent-3.2.0/
11 | ./bin/kafka-avro-console-producer --broker-list localhost:9092 --topic t1   --property value.schema='{"type":"record","name":"myrecord","fields":[{"name":"f1","type":"string"}]}'
12 | cd kafka_2.11-0.10.2.0
13 | ./bin/kafka-topics.sh --list --zookeeper localhost:2181
14 | ./bin/kafka-console-consumer.sh --topic csv_data --bootstrap-server localhost:9092 --from-beginning
15 | 
16 | # start hdfs
17 | cd /Users/bang/hadoop-2.8.5
18 | hadoop namenode -format
19 | cd /Users/bang/hadoop-2.8.5/sbin
20 | ./start-dfs.sh
21 | ./start-yarn.sh
22 | 
23 | # start mysql
24 | /usr/local/opt/mysql/support-files/mysql.server start
25 | # stop mysql
26 | /usr/local/opt/mysql/support-files/mysql.server stop
27 | 
28 | # start es
29 | cd /Users/bang/elasticsearch-6.3.1
30 | ./bin/elasticsearch
31 | ## es cli：
32 | ./bin/elasticsearch-sql-cli
33 | 
34 | 
35 | # start hbase
36 | cd /Users/bang/hbase-1.4.3
37 | ./bin/start-hbase.sh
38 | ## use own zookeeper wich client port is setted 2182 to avoid conficts with Kafka zookeeper
39 | ## : ./bin/hbase shell
40 | ## list; create 't1','f1'; scan 'gmv';
41 | ## care hbase conflict with hive
42 | 
43 | # start hive
44 | ## start haoop first
45 | ./start-dfs.sh
46 | ./start-yarn.sh
47 | ## start mysql(metastore)
48 | cd /Users/bang/hive-3.1.2
49 | ## init hive schema (only requiired intialization once)
50 | bin/schematool -initSchema -dbType mysql
51 | bin/hive
52 | ## start metastore service
53 | bin/hive --service metastore -p 9083 &
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2AppendEs.sql:
--------------------------------------------------------------------------------
 1 | ## batch
 2 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem',
 3 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 4 |  'format.type' = 'csv',
 5 |  'format.fields.0.name' = 'pageId',
 6 |  'format.fields.0.data-type' = 'STRING',
 7 |  'format.fields.1.name' = 'eventId',
 8 |  'format.fields.1.data-type' = 'STRING',
 9 |  'format.fields.2.name' = 'recvTime',
10 |  'format.fields.2.data-type' = 'STRING')
11 | 
12 | CREATE TABLE es_table (
13 |   aggId varchar ,
14 |   pageId varchar ,
15 |   ts varchar ,
16 |   expoCnt int ,
17 |   clkCnt int
18 | ) WITH (
19 | 'connector.type' = 'elasticsearch',
20 | 'connector.version' = '6',
21 | 'connector.hosts' = 'http://localhost:9200',
22 | 'connector.index' = '66_test',
23 | 'connector.document-type' = '_doc',
24 | 'update-mode' = 'upsert',
25 | 'connector.key-delimiter' = '$',
26 | 'connector.key-null-literal' = 'n/a',
27 | 'connector.bulk-flush.interval' = '1000',
28 | 'format.type' = 'json'
29 | )
30 | INSERT INTO es_table
31 |   SELECT  pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv
32 | 
33 | 
34 |  ## streaming
35 | 
36 |  create table csv_user( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem',
37 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',
38 |  'format.type' = 'csv',
39 |  'format.fields.0.name' = 'user_name',
40 |  'format.fields.0.data-type' = 'STRING',
41 |  'format.fields.1.name' = 'is_new',
42 |  'format.fields.1.data-type' = 'BOOLEAN',
43 |  'format.fields.2.name' = 'content',
44 |  'format.fields.2.data-type' = 'STRING')
45 | CREATE TABLE kafka_user (
46 |   user_name STRING,
47 |   is_new    BOOLEAN,
48 |   content STRING) WITH (
49 |   'connector.type' = 'kafka',
50 |   'connector.version' = '0.10',
51 |   'connector.topic' = 'kafka_user',
52 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
53 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
54 |   'connector.properties.group.id' = 'testGroup3',
55 |   'connector.startup-mode' = 'earliest-offset',
56 |   'format.type' = 'csv')
57 | insert into kafka_user
58 | select user_name, is_new, content from
59 | csv_user;
60 | 
61 | CREATE TABLE es_user (
62 |   user_name STRING,
63 |   is_new    BOOLEAN,
64 |   content STRING
65 | ) WITH (
66 | 'connector.type' = 'elasticsearch',
67 | 'connector.version' = '7',
68 | 'connector.hosts' = 'http://localhost:9200',
69 | 'connector.index' = 'es_user',
70 | 'connector.document-type' = '_doc',
71 | 'update-mode' = 'upsert',
72 | 'connector.key-delimiter' = '$',
73 | 'connector.key-null-literal' = 'n/a',
74 | 'connector.bulk-flush.interval' = '1000',
75 | 'format.type' = 'json'
76 | );
77 | insert into es_user
78 | select user_name, is_new, content from
79 | kafka_user;
80 | 
81 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2DynamicIndexEs.sql:
--------------------------------------------------------------------------------
 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime TIMESTAMP(3)) with ( 'connector.type' = 'filesystem',
 2 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 3 |  'format.type' = 'csv',
 4 |  'format.fields.0.name' = 'pageId',
 5 |  'format.fields.0.data-type' = 'STRING',
 6 |  'format.fields.1.name' = 'eventId',
 7 |  'format.fields.1.data-type' = 'STRING',
 8 |  'format.fields.2.name' = 'recvTime',
 9 |  'format.fields.2.data-type' = 'TIMESTAMP(3)')
10 | 
11 | CREATE TABLE append_test (
12 |   aggId varchar ,
13 |   pageId varchar ,
14 |   ts timestamp(3) ,
15 |   expoCnt int ,
16 |   clkCnt int
17 | ) WITH (
18 | 'connector.type' = 'elasticsearch',
19 | 'connector.version' = '6',
20 | 'connector.hosts' = 'http://localhost:9200',
21 | 'connector.index' = 'dadynamic-index-{clkCnt}',
22 | 'connector.document-type' = '_doc',
23 | 'update-mode' = 'upsert',
24 | 'connector.key-delimiter' = '$',
25 | 'connector.key-null-literal' = 'n/a',
26 | 'connector.bulk-flush.interval' = '1000',
27 | 'format.type' = 'json'
28 | )
29 | 
30 | INSERT INTO append_test
31 |   SELECT  pageId,eventId,recvTime ts, 1, 1 from csv
32 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2es/Kafka2UpsertEs.sql:
--------------------------------------------------------------------------------
 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem',
 2 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 3 |  'format.type' = 'csv',
 4 |  'format.fields.0.name' = 'pageId',
 5 |  'format.fields.0.data-type' = 'STRING',
 6 |  'format.fields.1.name' = 'eventId',
 7 |  'format.fields.1.data-type' = 'STRING',
 8 |  'format.fields.2.name' = 'recvTime',
 9 |  'format.fields.2.data-type' = 'STRING')
10 | 
11 | CREATE TABLE test_upsert (
12 |   aggId varchar ,
13 |   pageId varchar ,
14 |   ts varchar ,
15 |   expoCnt bigint ,
16 |   clkCnt bigint
17 | ) WITH (
18 | 'connector.type' = 'elasticsearch',
19 | 'connector.version' = '6',
20 | 'connector.hosts' = 'http://localhost:9200',
21 | 'connector.index' = 'flink_zhangle_pageview',
22 | 'connector.document-type' = '_doc',
23 | 'update-mode' = 'upsert',
24 | 'connector.key-delimiter' = '$',
25 | 'connector.key-null-literal' = 'n/a',
26 | 'connector.bulk-flush.interval' = '1000',
27 | 'format.type' = 'json'
28 | )
29 | 
30 | INSERT INTO test_upsert
31 |   SELECT aggId, pageId, ts,
32 |   count(case when eventId = 'exposure' then 1 else null end) as expoCnt,
33 |   count(case when eventId = 'click' then 1 else null end) as clkCnt
34 |   FROM
35 |   (
36 |     SELECT
37 |         'ZL_001' as aggId,
38 |         pageId,
39 |         eventId,
40 |         recvTime,
41 |         ts2Date(recvTime) as ts
42 |     from csv
43 |     where eventId in ('exposure', 'click')
44 |   ) as t1
45 |   group by aggId, pageId, ts


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Csv2HivePartition.sql:
--------------------------------------------------------------------------------
1 | create table test_csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR, date_col VARCHAR) with (
2 |  'connector.type' = 'filesystem',
3 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user_part.csv',
4 |  'format.type' = 'csv')
5 | 
6 | -- table user_info_partition is a hive partition table, we create hive table first, and then use hivecatalog to load hive table, then flink can insert
7 | -- data to hive table, the hive create table command is:
8 | -- create table user_info_partition(user_name string, is_new boolean, content string) PARTITIONED BY (date_col string) row format delimited fields terminated by '\t';
9 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Csv2HiveSink.sql:
--------------------------------------------------------------------------------
 1 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with (  'connector.type' = 'filesystem',
 2 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',
 3 |  'format.type' = 'csv')
 4 | 
 5 | -- table user_ino_no_part is a hive table, we create hive table first, and then use hivecatalog to load hive table, then flink can insert
 6 | -- data to hive table, the hive create table command is:
 7 | -- hive> create table user_ino_no_part(user_name string, is_new boolean, content string)  row format delimited fields terminated by '\t';
 8 | 
 9 |  insert into user_ino_no_part select user_name, is_new, content from csv
10 | 
11 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/FileSystem2FileSystem.sql:
--------------------------------------------------------------------------------
 1 | create table csv( id INT, note STRING, country STRING, record_time TIMESTAMP(4), doub_val DECIMAL(6, 2)) with ( 'connector.type' = 'filesystem',
 2 |  'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test.csv',
 3 |  'format.type' = 'csv')
 4 | 
 5 | create table csvSink( jnlno STRING,
 6 |   taskid char(4),
 7 |    hit VARCHAR ) with ( 'connector.type' = 'filesystem',
 8 |  'connector.path' = '/Users/bang/sourcecode/project/Improve/flinkstream/src/main/resources/test12312.csv',
 9 |  'format.type' = 'csv')
10 | 
11 | insert into  csvSink select a.country,'111111qeq','false' from csv a


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2filesystemandhive/Kafka2HiveSink.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE csvData (
 2 |   user_name STRING,
 3 |   is_new    BOOLEAN,
 4 |   content STRING,
 5 |   date_col STRING) WITH (
 6 |   'connector.type' = 'kafka',
 7 |   'connector.version' = '0.10',
 8 |   'connector.topic' = 'csv_data',
 9 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
10 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
11 |   'connector.properties.group.id' = 'testGroup-1',
12 |   'connector.startup-mode' = 'earliest-offset',
13 |   'format.type' = 'csv')
14 | 
15 | -- read from kafka, and then write to hive
16 | 
17 | insert into myhive.hive_test.user_info_kafka select user_name, is_new, content from csvData


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2hbase/KafkaJoinHbaseJoinMysql2Hbase.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   ts as order_time + INTERVAL '1' SECOND,
10 |   WATERMARK FOR order_time AS order_time
11 | ) WITH (
12 |   'connector.type' = 'kafka',
13 |   'connector.version' = '0.10',
14 |   'connector.topic' = 'flink_orders3',
15 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
16 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
17 |   'connector.properties.group.id' = 'testGroup4',
18 |   'connector.startup-mode' = 'earliest-offset',
19 |   'format.type' = 'json',
20 |   'format.derive-schema' = 'true'
21 | )
22 | 
23 | CREATE TABLE country (
24 |   rowkey VARCHAR,
25 |   f1 ROW<country_id INT, country_name VARCHAR, country_name_cn VARCHAR, currency VARCHAR, region_name VARCHAR>
26 |  ) WITH (
27 |     'connector.type' = 'hbase',
28 |     'connector.version' = '1.4.3',
29 |     'connector.table-name' = 'country',
30 |     'connector.zookeeper.quorum' = 'localhost:2182',
31 |     'connector.zookeeper.znode.parent' = '/hbase' )
32 | 
33 | CREATE TABLE currency (
34 |   currency_id BIGINT,
35 |   currency_name STRING,
36 |   rate DOUBLE,
37 |   currency_time TIMESTAMP(3),
38 |   country STRING,
39 |   timestamp9 TIMESTAMP(3),
40 |   time9 TIME(3),
41 |   gdp DOUBLE
42 | ) WITH (
43 |    'connector.type' = 'jdbc',
44 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
45 |    'connector.username' = 'root',   'connector.table' = 'currency',
46 |    'connector.driver' = 'com.mysql.jdbc.Driver',
47 |    'connector.lookup.cache.max-rows' = '500',
48 |    'connector.lookup.cache.ttl' = '10s',
49 |    'connector.lookup.max-retries' = '3')
50 | 
51 | 
52 | CREATE TABLE gmv (
53 |   rowkey VARCHAR,
54 |   f1 ROW<log_ts VARCHAR,item VARCHAR,country_name VARCHAR,country_name_cn VARCHAR,region_name VARCHAR,
55 |    currency VARCHAR,order_cnt BIGINT,currency_time TIMESTAMP(3), gmv DOUBLE>
56 | ) WITH (
57 |     'connector.type' = 'hbase',
58 |     'connector.version' = '1.4.3',
59 |     'connector.table-name' = 'gmv1',
60 |     'connector.zookeeper.quorum' = 'localhost:2182',
61 |     'connector.zookeeper.znode.parent' = '/hbase',
62 |     'connector.write.buffer-flush.max-size' = '10mb',
63 |     'connector.write.buffer-flush.max-rows' = '1000',
64 |     'connector.write.buffer-flush.interval' = '2s' )
65 | 
66 | 
67 | insert into gmv  select concat(log_ts,'_',item) as rowkey,
68 |  ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1 from  (select  co.f1.country_name as country_name, co.f1.country_name_cn as country_name_cn, co.f1.region_name as region_name, co.f1.currency as currency, cast(TUMBLE_END(o.ts, INTERVAL '10' SECOND) as VARCHAR) as log_ts,
69 |  o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE)  as gmv
70 |  from orders as o
71 |  left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
72 |  on o.currency = c.currency_name
73 |  left outer join country FOR SYSTEM_TIME AS OF o.proc_time co
74 |  on c.country = co.rowkey group by o.item, c.currency_time, c.rate, co.f1.country_name, co.f1.country_name_cn, co.f1.region_name, co.f1.currency, TUMBLE(o.ts, INTERVAL '10' SECOND)) a


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   ts as order_time + INTERVAL '1' SECOND,
10 |   WATERMARK FOR order_time AS order_time
11 | ) WITH (
12 |   'connector.type' = 'kafka',
13 |   'connector.version' = '0.10',
14 |   'connector.topic' = 'flink_orders2',
15 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
16 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
17 |   'connector.properties.group.id' = 'testGroup3',
18 |   'connector.startup-mode' = 'earliest-offset',
19 |   'format.type' = 'json',
20 |   'format.derive-schema' = 'true'
21 | )
22 | 
23 | CREATE TABLE currency (
24 |   currency_id BIGINT,
25 |   currency_name STRING,
26 |   rate DOUBLE,
27 |   currency_time TIMESTAMP(3),
28 |   country STRING,
29 |   timestamp9 TIMESTAMP(3),
30 |   time9 TIME(3),
31 |   gdp DECIMAL(38, 18)
32 | ) WITH (
33 |    'connector.type' = 'jdbc',
34 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
35 |    'connector.username' = 'root',   'connector.table' = 'currency',
36 |    'connector.driver' = 'com.mysql.jdbc.Driver',
37 |    'connector.lookup.cache.max-rows' = '500',
38 |    'connector.lookup.cache.ttl' = '10s',
39 |    'connector.lookup.max-retries' = '3')
40 | 
41 | CREATE TABLE country (
42 |   rowkey VARCHAR,
43 |   f1 ROW<country_id INT, country_name VARCHAR, country_name_cn VARCHAR, currency VARCHAR, region_name VARCHAR>
44 |   ,f2 ROW<record_timestamp3 TIMESTAMP(3), record_timestamp9 TIMESTAMP(9), time3 TIME(3), time9 TIME(9), gdp DECIMAL(38,18)>) WITH (
45 |     'connector.type' = 'hbase',
46 |     'connector.version' = '1.4.3',
47 |     'connector.table-name' = 'country',
48 |     'connector.zookeeper.quorum' = 'localhost:2182',
49 |     'connector.zookeeper.znode.parent' = '/hbase' )
50 | 
51 | CREATE TABLE gmv (
52 |   rowkey VARCHAR,
53 |   f1 ROW<log_ts VARCHAR,item VARCHAR,country_name VARCHAR>
54 | ) WITH (
55 |     'connector.type' = 'hbase',
56 |     'connector.version' = '1.4.3',
57 |     'connector.table-name' = 'gmv',
58 |     'connector.zookeeper.quorum' = 'localhost:2182',
59 |     'connector.zookeeper.znode.parent' = '/hbase',
60 |     'connector.write.buffer-flush.max-size' = '10mb',
61 |     'connector.write.buffer-flush.max-rows' = '1000',
62 |     'connector.write.buffer-flush.interval' = '2s' )
63 | 
64 |  insert into gmv
65 |  select  rowkey, ROW(max(ts), max(item), max(country_name)) as f1
66 |  from (select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey,
67 |  cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name
68 |  from orders as o
69 |  left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
70 |  on o.currency = c.currency_name
71 |  left outer join country FOR SYSTEM_TIME AS OF o.proc_time co
72 |  on c.country = co.rowkey
73 | ) a group by rowkey
74 | 
75 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2jdbc/KafkaJoinJdbc2Jdbc.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   WATERMARK FOR order_time AS order_time
10 | ) WITH (
11 |   'connector.type' = 'kafka',
12 |   'connector.version' = '0.10',
13 |   'connector.topic' = 'flink_orders3',
14 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
15 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
16 |   'connector.properties.group.id' = 'test-jdbc',
17 |   'connector.startup-mode' = 'earliest-offset',
18 |   'format.type' = 'json',
19 |   'format.derive-schema' = 'true'
20 | )
21 | 
22 | CREATE TABLE currency (
23 |   currency_id BIGINT,
24 |   currency_name STRING,
25 |   rate DOUBLE,
26 |   currency_time TIMESTAMP(3),
27 |   country STRING,
28 |   timestamp9 TIMESTAMP(3),
29 |   time9 TIME(3),
30 |   gdp DECIMAL(38, 18)
31 | ) WITH (
32 |    'connector.type' = 'jdbc',
33 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
34 |    'connector.username' = 'root',   'connector.table' = 'currency',
35 |    'connector.driver' = 'com.mysql.jdbc.Driver',
36 |    'connector.lookup.cache.max-rows' = '500',
37 |    'connector.lookup.cache.ttl' = '10s',
38 |    'connector.lookup.max-retries' = '3')
39 | 
40 | 
41 | CREATE TABLE gmv (
42 |   log_per_min STRING,
43 |   item STRING,
44 |   order_cnt BIGINT,
45 |   currency_time TIMESTAMP(3),
46 |   gmv DECIMAL(38, 18),  timestamp9 TIMESTAMP(3),
47 |   time9 TIME(3),
48 |   gdp DECIMAL(38, 18)
49 | ) WITH (
50 |    'connector.type' = 'jdbc',
51 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
52 |    'connector.username' = 'root',   'connector.table' = 'gmv_table',
53 |    'connector.driver' = 'com.mysql.jdbc.Driver',
54 |    'connector.write.flush.max-rows' = '3',
55 |    'connector.write.flush.interval' = '120s',
56 |    'connector.write.max-retries' = '2')
57 | 
58 | insert into gmv
59 | select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts,
60 |  o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18))  as gmv,
61 |  c.timestamp9, c.time9, c.gdp
62 | from orders as o
63 | join currency FOR SYSTEM_TIME AS OF o.proc_time c
64 | on o.currency = c.currency_name
65 | group by o.item, c.currency_time, c.rate, c.timestamp9, c.time9, c.gdp, TUMBLE(o.order_time, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2jdbc/UnboundedKafkaJoinJdbc2Jdbc.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   ts as order_time + INTERVAL '1' SECOND,
10 |   WATERMARK FOR order_time AS order_time
11 | ) WITH (
12 |   'connector.type' = 'kafka',
13 |   'connector.version' = '0.10',
14 |   'connector.topic' = 'flink_orders2',
15 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
16 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
17 |   'connector.properties.group.id' = 'testGroup3',
18 |   'connector.startup-mode' = 'earliest-offset',
19 |   'format.type' = 'json',
20 |   'format.derive-schema' = 'true'
21 | )
22 | 
23 | CREATE TABLE currency (
24 |   currency_id BIGINT,
25 |   currency_name STRING,
26 |   rate DOUBLE,
27 |   currency_time TIMESTAMP(3),
28 |   country STRING,
29 |   timestamp9 TIMESTAMP(3),
30 |   time9 TIME(3),
31 |   gdp DECIMAL(38, 18)
32 | ) WITH (
33 |    'connector.type' = 'jdbc',
34 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
35 |    'connector.username' = 'root',
36 |    'connector.table' = 'currency',
37 |    'connector.driver' = 'com.mysql.jdbc.Driver',
38 |    'connector.lookup.cache.max-rows' = '500',
39 |    'connector.lookup.cache.ttl' = '10s',
40 |    'connector.lookup.max-retries' = '3')
41 | CREATE TABLE gmv (
42 |   log_per_min STRING,
43 |   item STRING,
44 |   order_cnt BIGINT,
45 |   currency_time TIMESTAMP(3),
46 |   gmv DECIMAL(38, 18),  timestamp9 TIMESTAMP(3),
47 |   time9 TIME(3),
48 |   gdp  DECIMAL(38, 18)
49 | ) WITH (
50 |    'connector.type' = 'jdbc',
51 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
52 |    'connector.username' = 'root',
53 |    'connector.table' = 'gmv',
54 |    'connector.driver' = 'com.mysql.jdbc.Driver',
55 |    'connector.write.flush.max-rows' = '5000',
56 |    'connector.write.flush.interval' = '2s',
57 |    'connector.write.max-retries' = '3')
58 | insert into gmv
59 | select max(log_ts),
60 |  item, COUNT(order_id) as order_cnt, max(currency_time), cast(sum(amount_kg) * max(rate) as DOUBLE)  as gmv,
61 |  max(timestamp9), max(time9), max(gdp)
62 |  from (
63 |  select cast(o.ts as VARCHAR) as log_ts, o.item as item, o.order_id as order_id, c.currency_time as currency_time,
64 |  o.amount_kg as amount_kg, c.rate as rate, c.timestamp9 as timestamp9, c.time9 as time9, c.gdp as gdp
65 |  from orders as o
66 |  join currency FOR SYSTEM_TIME AS OF o.proc_time c
67 |  on o.currency = c.currency_name
68 |  ) a group by item
69 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaAvro2Kafka.sql:
--------------------------------------------------------------------------------
 1 | -- first job: build avro format data from csv and write to kafka topic
 2 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem',
 3 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',
 4 |  'format.type' = 'csv')
 5 | CREATE TABLE AvroTest (
 6 |   user_name VARCHAR,
 7 |   is_new BOOLEAN,
 8 |   content VARCHAR) WITH (
 9 |   'connector.type' = 'kafka',
10 |   'connector.version' = '0.10',
11 |   'connector.topic' = 'avro_from_csv',
12 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
13 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
14 |   'connector.properties.group.id' = 'testGroup3',
15 |   'connector.startup-mode' = 'earliest-offset',
16 |   'format.type' = 'avro',
17 |   'format.record-class' = 'kafka.UserAvro'
18 | )
19 | 
20 | insert into AvroTest select user_name, is_new, content from csv
21 | 
22 | -- second job: consume avro format data from kafka and write to another kafka topic
23 | 
24 | CREATE TABLE AvroTest (
25 |   user_name VARCHAR,
26 |   is_new BOOLEAN,
27 |   content VARCHAR) WITH (
28 |   'connector.type' = 'kafka',
29 |   'connector.version' = '0.10',
30 |   'connector.topic' = 'avro_from_csv',
31 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
32 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
33 |   'connector.properties.group.id' = 'testGroup4',
34 |   'connector.startup-mode' = 'earliest-offset',
35 |   'format.type' = 'avro',
36 |   'format.record-class' = 'kafka.UserAvro'
37 | )
38 | 
39 | CREATE TABLE WikipediaFeed_filtered (
40 |   user_name STRING,
41 |   is_new    BOOLEAN,
42 |   content STRING) WITH (
43 |   'connector.type' = 'kafka',
44 |   'connector.version' = '0.10',
45 |   'connector.topic' = 'WikipediaFeed2_filtered',
46 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
47 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
48 |   'connector.properties.group.id' = 'testGroup3',
49 |   'connector.startup-mode' = 'earliest-offset',
50 |   'format.type' = 'avro',
51 |   'format.avro-schema' =
52 |     '{
53 |     "type": "record",
54 |     "name": "UserAvro",
55 |     "fields": [
56 |       {"name": "user_name", "type": "string"},
57 |       {"name": "is_new", "type": "boolean"},
58 |       {"name": "content", "type": "string"}
59 |       ]
60 |     }')
61 | 
62 | insert into WikipediaFeed_filtered
63 | select user_name, is_new, content
64 | from AvroTest


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaCsv2Kafka.sql:
--------------------------------------------------------------------------------
 1 | -- from csv data to kafka
 2 | create table csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem',
 3 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',
 4 |  'format.type' = 'csv')
 5 | CREATE TABLE csvData (
 6 |   user_name STRING,
 7 |   is_new    BOOLEAN,
 8 |   content STRING) WITH (
 9 |   'connector.type' = 'kafka',
10 |   'connector.version' = '0.10',
11 |   'connector.topic' = 'csv_data',
12 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
13 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
14 |   'connector.properties.group.id' = 'testGroup3',
15 |   'connector.startup-mode' = 'earliest-offset',
16 |   'format.type' = 'csv')
17 | insert into csvData
18 | select user_name, is_new, content from
19 | csv
20 | 
21 | -- from kafka to csv
22 | CREATE TABLE csvData (
23 |   user_name STRING,
24 |   is_new    BOOLEAN,
25 |   content STRING) WITH (
26 |   'connector.type' = 'kafka',
27 |   'connector.version' = '0.10',
28 |   'connector.topic' = 'csv_data',
29 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
30 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
31 |   'connector.properties.group.id' = 'testGroup4',
32 |   'connector.startup-mode' = 'earliest-offset',
33 |   'format.type' = 'csv')
34 | create table csvTest( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'connector.type' = 'filesystem',
35 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/test.csv',
36 |  'format.type' = 'csv',
37 |  'update-mode' = 'append',
38 |  'format.fields.0.name' = 'user_name',
39 |  'format.fields.0.data-type' = 'STRING',
40 |  'format.fields.1.name' = 'is_new',
41 |  'format.fields.1.data-type' = 'BOOLEAN',
42 |  'format.fields.2.name' = 'content',
43 |  'format.fields.2.data-type' = 'STRING')
44 | insert into csvTest select user_name, is_new, content from csvData
45 | 
46 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2kafka/KafkaJoinJdbc2Kafka.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   ts as order_time + INTERVAL '1' SECOND,
10 |   WATERMARK FOR order_time AS order_time
11 | ) WITH (
12 |   'connector.type' = 'kafka',
13 |   'connector.version' = '0.10',
14 |   'connector.topic' = 'flink_orders2',
15 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
16 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
17 |   'connector.properties.group.id' = 'testGroup3',
18 |   'connector.startup-mode' = 'earliest-offset',
19 |   'format.type' = 'json',
20 |   'format.derive-schema' = 'true'
21 | )
22 | 
23 | CREATE TABLE currency (
24 |   currency_id BIGINT,
25 |   currency_name STRING,
26 |   rate DOUBLE,
27 |   currency_time TIMESTAMP(3),
28 |   country STRING,
29 |   timestamp9 TIMESTAMP(3),
30 |   time9 TIME(3),
31 |   gdp DECIMAL(38, 18)
32 | ) WITH (
33 |    'connector.type' = 'jdbc',
34 |    'connector.url' = 'jdbc:mysql://localhost:3306/test',
35 |    'connector.username' = 'root',   'connector.table' = 'currency',
36 |    'connector.driver' = 'com.mysql.jdbc.Driver',
37 |    'connector.lookup.cache.max-rows' = '500',
38 |    'connector.lookup.cache.ttl' = '10s',
39 |    'connector.lookup.max-retries' = '3')
40 | CREATE TABLE gmv (
41 |   log_per_min STRING,
42 |   item STRING,
43 |   order_cnt BIGINT,
44 |   currency_time TIMESTAMP(3),
45 |   gmv DECIMAL(38, 18)) WITH (
46 |   'connector.type' = 'kafka',
47 |   'connector.version' = '0.10',
48 |   'connector.topic' = 'gmv',
49 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
50 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
51 |   'format.type' = 'json',
52 |   'format.derive-schema' = 'true'
53 | )
54 | insert into gmv
55 | select cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_per_min,
56 |  o.item, COUNT(o.order_id) as order_cnt, c.currency_time,  cast(sum(o.amount_kg) * c.rate as DECIMAL(38, 18))  as gmv
57 |  from orders as o
58 |  join currency FOR SYSTEM_TIME AS OF o.proc_time c
59 |  on o.currency = c.currency_name
60 |  group by o.item, c.currency_time,c.rate,TUMBLE(o.order_time, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.10/kafka2kafka/kafkaJson2kafka.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount DOUBLE,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   ts as order_time + INTERVAL '1' SECOND,
10 |   WATERMARK FOR order_time AS order_time) WITH (
11 |   'connector.type' = 'kafka',
12 |   'connector.version' = '0.10',
13 |   'connector.topic' = 'flink_orders',
14 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
15 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
16 |   'connector.properties.group.id' = 'testGroup3',
17 |   'connector.startup-mode' = 'earliest-offset',
18 |   'format.type' = 'json',
19 |   'format.derive-schema' = 'true'
20 | )
21 | 
22 | CREATE TABLE order_cnt (
23 |   log_per_min TIMESTAMP(3),
24 |   item STRING,
25 |   order_cnt BIGINT,
26 |   total_quality BIGINT
27 | ) WITH (
28 |   'connector.type' = 'kafka',
29 |   'connector.version' = '0.10',
30 |   'connector.topic' = 'order_cnt',
31 |   'update-mode' = 'append',
32 |   'connector.properties.zookeeper.connect' = 'localhost:2181',
33 |   'connector.properties.bootstrap.servers' = 'localhost:9092',
34 |   'format.type' = 'json',
35 |   'format.derive-schema' = 'true'
36 | )
37 | insert into order_cnt
38 | select TUMBLE_END(order_time, INTERVAL '10' SECOND),
39 |  item, COUNT(order_id) as order_cnt, CAST(sum(amount_kg) as BIGINT) as total_quality
40 | from orders
41 | group by item, TUMBLE(order_time, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/catalog/PgcatalogE2eTest.sql:
--------------------------------------------------------------------------------
 1 | # create table by pgadmin
 2 | CREATE TABLE public.primitive_table(id integer);
 3 | CREATE TABLE bang.primitive_table(id integer);
 4 | CREATE TABLE public.primitive_table(int integer, bytea bytea, short smallint, long bigint, real real, double_precision double precision, numeric numeric(10, 5), decimal decimal(10, 1), boolean boolean, text text, char char, character character(3), character_varying character varying(20), timestamp timestamp(5), date date,time time(0), default_numeric numeric, CONSTRAINT test_pk PRIMARY KEY (short, int));
 5 | CREATE TABLE public.primitive_table(int_arr integer[], bytea_arr bytea[], short_arr smallint[], long_arr bigint[], real_arr real[], double_precision_arr double precision[], numeric_arr numeric(10, 5)[], numeric_arr_default numeric[], decimal_arr decimal(10,2)[], boolean_arr boolean[], text_arr text[], char_arr char[], character_arr character(3)[], character_varying_arr character varying(20)[], timestamp_arr timestamp(5)[], date_arr date[], time_arr time(0)[]);
 6 | CREATE TABLE public.primitive_table(f0 smallserial, f1 serial, f2 serial2, f3 serial4, f4 serial8, f5 bigserial);
 7 | CREATE TABLE public.primitive_table2(int integer, bytea bytea, short smallint, long bigint, real real, double_precision double precision, numeric numeric(10, 5), decimal decimal(10, 1), boolean boolean, text text, char char, character character(3), character_varying character varying(20), timestamp timestamp(5), date date,time time(0), default_numeric numeric, CONSTRAINT test_pk1 PRIMARY KEY (short, int));
 8 | 
 9 | # insert test data
10 | insert into public.t1 values (1);
11 | insert into primitive_table values (1,'2',3,4,5.5,6.6,7.7,8.8,true,'a','b','c','d','2016-06-22 19:10:25','2015-01-01','00:51:02.746572', 500);
12 | insert into array_table values ('{1,2,3}','{2,3,4}','{3,4,5}','{4,5,6}','{5.5,6.6,7.7}','{6.6,7.7,8.8}','{7.7,8.8,9.9}','{8.8,9.9,10.10}','{9.9,10.10,11.11}','{true,false,true}','{a,b,c}','{b,c,d}','{b,c,d}','{b,c,d}','{"2016-06-22 19:10:25", "2019-06-22 19:10:25"}','{"2015-01-01", "2020-01-01"}','{"00:51:02.746572", "00:59:02.746572"}');
13 | insert into serial_table values (32767,2147483647,32767,2147483647,9223372036854775807,9223372036854775807);
14 | 
15 | # test in sql-client
16 | 
17 | (1) config conf/sql-client-defaults.yaml
18 | catalogs:
19 |   - name: mypg
20 |     type: jdbc
21 |     default-database: mydb
22 |     username: postgres
23 |     password: postgres
24 |     base-url: jdbc:postgresql://localhost/
25 | 
26 | (2) add necessary dependency to /lib
27 | flink-connector-jdbc_2.11-1.12-SNAPSHOT.jar
28 | postgresql-42.2.9.jar
29 | 
30 | (3) sql-client test
31 | Flink SQL> show tables;
32 | bang.primitive_table
33 | public.primitive_arr_table
34 | public.primitive_serial_table
35 | public.primitive_table
36 | public.primitive_table2
37 | public.simple_t1
38 | 
39 | # test read/write
40 | Flink SQL> insert into `public.primitive_table2` select * from `public.primitive_table`;
41 | [INFO] Submitting SQL update statement to the cluster...
42 | [INFO] Table update statement has been successfully submitted to the cluster:
43 | Job ID: aa953b785dea9903acaf4caafa50987a
44 | 
45 | #check result
46 | Flink SQL> select * from `public.primitive_table2`;
47 | [INFO] Result retrieval cancelled.
48 | -- int                     bytea                     short                      long                      real          double_precision                   numeric
49 | -- 1                      [50]                         3                         4                       5.5                       6.6      7.700000000000000000
50 | 
51 | -- See FLINK-17948, sql-client bug
52 | Flink SQL> select * from `public.primitive_arr_table`;
53 | [ERROR] Could not execute SQL statement. Reason:
54 | org.apache.flink.table.planner.codegen.CodeGenException: Unsupported cast from 'ARRAY<DECIMAL(10, 5)>' to 'ARRAY<DECIMAL(38, 18)>'.


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/jdbc/kafka2mysql.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE currency (
 2 |   currency_id BIGINT,
 3 |   currency_name STRING,
 4 |   rate DOUBLE,
 5 |   currency_time TIMESTAMP(3),
 6 |   country STRING,
 7 |   timestamp9 TIMESTAMP(3),
 8 |   time9 TIME(3),
 9 |   gdp DECIMAL(38, 18)
10 | ) WITH (
11 |    'connector' = 'jdbc',
12 |    'url' = 'jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=utf-8',
13 |    'username' = 'root',
14 |     'password' = '',
15 |    'table-name' = 'currency',
16 |    'driver' = 'com.mysql.jdbc.Driver')
17 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/jdbc/kafkajoinmysql.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |    order_id STRING,
 3 |    item    STRING,
 4 |    currency STRING,
 5 |    amount INT,
 6 |    order_time TIMESTAMP(3),
 7 |    proc_time as PROCTIME(),
 8 |    amount_kg as amount * 1000,
 9 |    WATERMARK FOR order_time AS order_time
10 |  ) WITH (
11 |    'connector' = 'kafka',
12 |    'topic' = 'flink_orders3',
13 |    'properties.zookeeper.connect' = 'localhost:2181',
14 |    'properties.bootstrap.servers' = 'localhost:9092',
15 |    'properties.group.id' = 'testGroup3',
16 |    'scan.startup.mode' = 'earliest-offset',
17 |    'format' = 'json'
18 |  );
19 | 
20 | CREATE TABLE currency (
21 |    currency_id BIGINT,
22 |    currency_name STRING,
23 |    rate DOUBLE,
24 |    currency_time TIMESTAMP(3),
25 |    country STRING,
26 |    timestamp9 TIMESTAMP(3),
27 |    time9 TIME(3),
28 |    gdp DECIMAL(38, 18)
29 |  ) WITH (
30 |     'connector' = 'jdbc',
31 |     'url' = 'jdbc:mysql://localhost:3306/test',
32 |     'username' = 'root',
33 |     'password' = '',
34 |     'table-name' = 'currency',
35 |     'driver' = 'com.mysql.jdbc.Driver',
36 |     'lookup.cache.max-rows' = '500',
37 |     'lookup.cache.ttl' = '3s',
38 |     'lookup.max-retries' = '3');
39 | 
40 | select o.order_id, o.item, c.currency_name, c.rate from orders as o
41 |  join currency FOR SYSTEM_TIME AS OF o.proc_time c
42 |  on o.currency = c.currency_name;
43 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2AppendEs.sql:
--------------------------------------------------------------------------------
 1 | ## batch
 2 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector' = 'filesystem',
 3 |  'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 4 |  'format' = 'csv')
 5 | 
 6 | CREATE TABLE es_table (
 7 |   aggId varchar ,
 8 |   pageId varchar ,
 9 |   ts varchar ,
10 |   expoCnt int ,
11 |   clkCnt int
12 | ) WITH (
13 | 'connector' = 'elasticsearch-6',
14 | 'hosts' = 'http://localhost:9200',
15 | 'index' = 'usercase13',
16 | 'document-type' = '_doc',
17 | 'document-id.key-delimiter' = '$',
18 | 'sink.bulk-flush.interval' = '1000',
19 | 'format' = 'json'
20 | )
21 | INSERT INTO es_table
22 |   SELECT  pageId,eventId,cast(recvTime as varchar) as ts, 1, 1 from csv
23 | 
24 | 
25 |  ## streaming
26 | 
27 |  create table csv_user( user_name VARCHAR, is_new BOOLEAN, content VARCHAR) with ( 'type' = 'filesystem',
28 |  'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',
29 |  'format.type' = 'csv',
30 |  'format.fields.0.name' = 'user_name',
31 |  'format.fields.0.data-type' = 'STRING',
32 |  'format.fields.1.name' = 'is_new',
33 |  'format.fields.1.data-type' = 'BOOLEAN',
34 |  'format.fields.2.name' = 'content',
35 |  'format.fields.2.data-type' = 'STRING')
36 | CREATE TABLE kafka_user (
37 |   user_name STRING,
38 |   is_new    BOOLEAN,
39 |   content STRING) WITH (
40 |   'type' = 'kafka',
41 |   'version' = '0.10',
42 |   'topic' = 'kafka_user',
43 |   'properties.zookeeper.connect' = 'localhost:2181',
44 |   'properties.bootstrap.servers' = 'localhost:9092',
45 |   'properties.group.id' = 'testGroup3',
46 |   'startup-mode' = 'earliest-offset',
47 |   'format.type' = 'csv')
48 | insert into kafka_user
49 | select user_name, is_new, content from
50 | csv_user;
51 | 
52 | CREATE TABLE es_user (
53 |   user_name STRING,
54 |   is_new    BOOLEAN,
55 |   content STRING
56 | ) WITH (
57 | 'type' = 'elasticsearch',
58 | 'version' = '7',
59 | 'hosts' = 'http://localhost:9200',
60 | 'index' = 'es_user',
61 | 'document-type' = '_doc',
62 | 'update-mode' = 'upsert',
63 | 'key-delimiter' = '$',
64 | 'key-null-literal' = 'n/a',
65 | 'bulk-flush.interval' = '1000',
66 | 'format.type' = 'json'
67 | );
68 | insert into es_user
69 | select user_name, is_new, content from
70 | kafka_user;
71 | 
72 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2DynamicIndexEs.sql:
--------------------------------------------------------------------------------
 1 | create table csv1( pageId VARCHAR, eventId VARCHAR, recvTime TIMESTAMP(3)) with ( 'connector' = 'filesystem',
 2 |  'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 3 |  'format' = 'csv')
 4 | 
 5 | CREATE TABLE append_test (
 6 |   aggId varchar ,
 7 |   pageId varchar ,
 8 |   ts timestamp(3) ,
 9 |   expoCnt int ,
10 |   clkCnt int
11 | ) WITH (
12 | 'connector' = 'elasticsearch-7',
13 | 'hosts' = 'http://localhost:9200',
14 | 'index' = 'xudynamic-index-{clkCnt}',
15 | 'document-id.key-delimiter' = '$',
16 | 'sink.bulk-flush.interval' = '1000',
17 | 'format' = 'json'
18 | );
19 | 
20 | INSERT INTO append_test
21 |   SELECT  pageId,eventId,recvTime ts, 1, 1 from csv1;
22 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2es/Kafka2UpsertEs.sql:
--------------------------------------------------------------------------------
 1 | create table csv( pageId VARCHAR, eventId VARCHAR, recvTime VARCHAR) with ( 'connector.type' = 'filesystem',
 2 |  'connector.path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user3.csv',
 3 |  'format.type' = 'csv',
 4 |  'format.fields.0.name' = 'pageId',
 5 |  'format.fields.0.data-type' = 'STRING',
 6 |  'format.fields.1.name' = 'eventId',
 7 |  'format.fields.1.data-type' = 'STRING',
 8 |  'format.fields.2.name' = 'recvTime',
 9 |  'format.fields.2.data-type' = 'STRING')
10 | 
11 | CREATE TABLE test_upsert (
12 |   aggId varchar ,
13 |   pageId varchar ,
14 |   ts varchar ,
15 |   expoCnt bigint ,
16 |   clkCnt bigint
17 | ) WITH (
18 | 'connector.type' = 'elasticsearch',
19 | 'connector.version' = '6',
20 | 'connector.hosts' = 'http://localhost:9200',
21 | 'connector.index' = 'flink_zhangle_pageview',
22 | 'connector.document-type' = '_doc',
23 | 'update-mode' = 'upsert',
24 | 'connector.key-delimiter' = '$',
25 | 'connector.key-null-literal' = 'n/a',
26 | 'connector.bulk-flush.interval' = '1000',
27 | 'format.type' = 'json'
28 | )
29 | 
30 | INSERT INTO test_upsert
31 |   SELECT aggId, pageId, ts,
32 |   count(case when eventId = 'exposure' then 1 else null end) as expoCnt,
33 |   count(case when eventId = 'click' then 1 else null end) as clkCnt
34 |   FROM
35 |   (
36 |     SELECT
37 |         'ZL_001' as aggId,
38 |         pageId,
39 |         eventId,
40 |         recvTime,
41 |         ts2Date(recvTime) as ts
42 |     from csv
43 |     where eventId in ('exposure', 'click')
44 |   ) as t1
45 |   group by aggId, pageId, ts


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2hbase/KafkaJoinHbaseJoinMysql2Hbase.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE orders (
 2 |   order_id STRING,
 3 |   item    STRING,
 4 |   currency STRING,
 5 |   amount INT,
 6 |   order_time TIMESTAMP(3),
 7 |   proc_time as PROCTIME(),
 8 |   amount_kg as amount * 1000,
 9 |   WATERMARK FOR order_time AS order_time
10 | ) WITH (
11 |   'connector' = 'kafka',
12 |   'topic' = 'flink_orders3',
13 |   'properties.zookeeper.connect' = 'localhost:2181',
14 |   'properties.bootstrap.servers' = 'localhost:9092',
15 |   'properties.group.id' = 'testGroup3',
16 |   'scan.startup.mode' = 'earliest-offset',
17 |   'format' = 'json'
18 | );
19 | 
20 | CREATE TABLE currency (
21 |   currency_id BIGINT,
22 |   currency_name STRING,
23 |   rate DOUBLE,
24 |   currency_time TIMESTAMP(3),
25 |   country STRING,
26 |   timestamp9 TIMESTAMP(3),
27 |   time9 TIME(3),
28 |   gdp DECIMAL(38, 18)
29 | ) WITH (
30 |    'connector' = 'jdbc',
31 |    'url' = 'jdbc:mysql://localhost:3306/test',
32 |    'username' = 'root',
33 |    'table-name' = 'currency',
34 |    'password' = '',
35 |    'driver' = 'com.mysql.jdbc.Driver',
36 |    'lookup.cache.max-rows' = '500',
37 |    'lookup.cache.ttl' = '10s',
38 |    'lookup.max-retries' = '3');
39 | 
40 | CREATE TABLE country (
41 |   rowkey VARCHAR,
42 |   f1 ROW<country_id INT, country_name VARCHAR, country_name_cn VARCHAR, currency VARCHAR, region_name VARCHAR>
43 |   ,f2 ROW<record_timestamp3 TIMESTAMP(3), record_timestamp9 TIMESTAMP(3), time3 TIME(3), time9 TIME(3), gdp DECIMAL(38,18)>) WITH (
44 |     'connector' = 'hbase-1.4',
45 |     'table-name' = 'country',
46 |     'zookeeper.quorum' = 'localhost:2182',
47 |     'zookeeper.znode.parent' = '/hbase' );
48 | 
49 | CREATE TABLE gmv (
50 |   rowkey VARCHAR,
51 |   f1 ROW<log_ts VARCHAR,item VARCHAR,country_name VARCHAR,country_name_cn VARCHAR,region_name VARCHAR,
52 |    currency VARCHAR,order_cnt BIGINT,currency_time TIMESTAMP(3), gmv DOUBLE>
53 | ) WITH (
54 |     'connector' = 'hbase-1.4',
55 |     'table-name' = 'gmv',
56 |     'zookeeper.quorum' = 'localhost:2182',
57 |     'zookeeper.znode.parent' = '/hbase',
58 |     'sink.buffer-flush.max-size' = '10mb',
59 |     'sink.buffer-flush.max-rows' = '1000',
60 |     'sink.buffer-flush.interval' = '2s' );
61 | 
62 | insert into gmv  select concat(log_ts,'_',item) as rowkey,
63 |  ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1
64 |  from  (select  co.f1.country_name as country_name, co.f1.country_name_cn as country_name_cn, co.f1.region_name as region_name, co.f1.currency as currency, cast(TUMBLE_END(o.ts, INTERVAL '10' SECOND) as VARCHAR) as log_ts,
65 |  o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE)  as gmv
66 |  from orders as o
67 |  left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
68 |  on o.currency = c.currency_name
69 |  -- see FLINK-18072
70 | --  left outer join country FOR SYSTEM_TIME AS OF o.proc_time co
71 | --  on c.country = co.rowkey
72 |  group by o.item, c.currency_time, c.rate, co.f1.country_name, co.f1.country_name_cn, co.f1.region_name, co.f1.currency, TUMBLE(o.ts, INTERVAL '10' SECOND)) a
73 | 
74 | insert into gmv  select concat(log_ts,'_',item) as rowkey,
75 |  ROW(log_ts, item, country_name, country_name_cn, region_name, currency, order_cnt, currency_time, gmv) as f1 from  (
76 |  select  'test' as country_name, 'test' as country_name_cn,'test' as region_name, 'test' as currency, cast(TUMBLE_END(o.order_time, INTERVAL '10' SECOND) as VARCHAR) as log_ts,
77 |  o.item, COUNT(o.order_id) as order_cnt, c.currency_time, cast(sum(o.amount_kg) * c.rate as DOUBLE)  as gmv
78 |  from orders as o
79 |  left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
80 |  on o.currency = c.currency_name
81 |  group by o.item, c.currency_time, c.rate, 'test', 'test', 'test', 'test', TUMBLE(o.order_time, INTERVAL '10' SECOND)) a
82 | 
83 | 
84 | -- result in hbase
85 | -- 2020-06-08 18:13:00.000_\xE9\x85\xB8\xE column=f1:region_name, timestamp=1591630452428, value=test
86 | --  5\xA5\xB6
87 | -- 233 row(s) in 0.2560 seconds


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2hbase/UnboundedKafkaJoinHbase2Hbase.sql:
--------------------------------------------------------------------------------
  1 | CREATE TABLE orders (
  2 |   order_id STRING,
  3 |   item    STRING,
  4 |   currency STRING,
  5 |   amount INT,
  6 |   order_time TIMESTAMP(3),
  7 |   proc_time as PROCTIME(),
  8 |   amount_kg as amount * 1000,
  9 |   ts as order_time + INTERVAL '1' SECOND,
 10 |   WATERMARK FOR order_time AS order_time
 11 | ) WITH (
 12 |   'connector' = 'kafka',
 13 |   'topic' = 'flink_orders2',
 14 |   'properties.zookeeper.connect' = 'localhost:2181',
 15 |   'properties.bootstrap.servers' = 'localhost:9092',
 16 |   'properties.group.id' = 'testGroup3',
 17 |   'scan.startup.mode' = 'earliest-offset',
 18 |   'format' = 'json'
 19 | );
 20 | 
 21 | CREATE TABLE currency (
 22 |   currency_id BIGINT,
 23 |   currency_name STRING,
 24 |   rate DOUBLE,
 25 |   currency_time TIMESTAMP(3),
 26 |   country STRING,
 27 |   timestamp9 TIMESTAMP(3),
 28 |   time9 TIME(3),
 29 |   gdp DECIMAL(38, 18)
 30 | ) WITH (
 31 |    'connector' = 'jdbc',
 32 |    'url' = 'jdbc:mysql://localhost:3306/test',
 33 |    'username' = 'root',
 34 |    'table-name' = 'currency',
 35 |    'password' = '',
 36 |    'driver' = 'com.mysql.jdbc.Driver',
 37 |    'lookup.cache.max-rows' = '500',
 38 |    'lookup.cache.ttl' = '10s',
 39 |    'lookup.max-retries' = '3');
 40 | 
 41 | CREATE TABLE country (
 42 |   rowkey VARCHAR,
 43 |   f1 ROW<country_id INT, country_name VARCHAR, country_name_cn VARCHAR, currency VARCHAR, region_name VARCHAR>
 44 |   ,f2 ROW<record_timestamp3 TIMESTAMP(3), record_timestamp9 TIMESTAMP(3), time3 TIME(3), time9 TIME(3), gdp DECIMAL(38,18)>) WITH (
 45 |     'connector' = 'hbase-1.4',
 46 |     'table-name' = 'country',
 47 |     'zookeeper.quorum' = 'localhost:2182',
 48 |     'zookeeper.znode.parent' = '/hbase' );
 49 | 
 50 | CREATE TABLE gmv1 (
 51 |   rowkey VARCHAR,
 52 |   f1 ROW<log_ts VARCHAR,item VARCHAR,country_name VARCHAR>
 53 | ) WITH (
 54 |     'connector' = 'hbase-1.4',
 55 |     'table-name' = 'gmv',
 56 |     'zookeeper.quorum' = 'localhost:2182',
 57 |     'zookeeper.znode.parent' = '/hbase',
 58 |     'sink.buffer-flush.max-size' = '10mb',
 59 |     'sink.buffer-flush.max-rows' = '1000',
 60 |     'sink.buffer-flush.interval' = '2s' );
 61 | 
 62 | 
 63 | 
 64 |  insert into gmv1
 65 |  select  rowkey, ROW(max(ts), max(item), max(country_name)) as f1
 66 |  from (select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey,
 67 |  cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name
 68 |  from orders as o
 69 |  left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
 70 |  on o.currency = c.currency_name
 71 | --  see FLINK-18072
 72 | --  left outer join country FOR SYSTEM_TIME AS OF o.proc_time co
 73 | --  on c.country = co.rowkey
 74 | ) a group by rowkey
 75 | 
 76 | 
 77 | insert into gmv
 78 | select  rowkey, ROW(ts), max(item), max(country_name)) as f1 from
 79 | select concat(cast(o.ts as VARCHAR), '_', item, '_', co.f1.country_name) as rowkey,
 80 |              cast(o.ts as VARCHAR) as ts, o.item as item, co.f1.country_name as country_name
 81 |       from orders as o
 82 |                left outer join currency FOR SYSTEM_TIME AS OF o.proc_time c
 83 |       on o.currency = c.currency_name
 84 | 
 85 | 
 86 | --  see FLINK-18072
 87 | --  left outer join country FOR SYSTEM_TIME AS OF o.proc_time co
 88 | --  on c.country = co.rowkey
 89 |      ) a group by rowkey
 90 | 
 91 | --
 92 | -- result in hbase:
 93 | --  2020-06-08 18:12:53.061_Apple_\xE4\xBA\ column=f1:item, timestamp=1591611172859, value=Apple
 94 | --  xBA\xE6\xB0\x91\xE5\xB8\x81
 95 | --  2020-06-08 18:12:53.061_Apple_\xE4\xBA\ column=f1:log_ts, timestamp=1591611172859, value=2020-06-08 18:12:53.061
 96 | --  xBA\xE6\xB0\x91\xE5\xB8\x81
 97 | -- 52 row(s) in 0.0280 seconds
 98 | --
 99 | -- hbase(main):026:0> scan 'gmv'
100 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.11/kafka2hbase/hbase_cdc:
--------------------------------------------------------------------------------
 1 | // prepare cdc data
 2 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic product_binlog
 3 | cat ~/sourcecode/project/flink-1.11/flink/flink-formats/flink-json/src/test/resources/debezium-data-schema-exclude.txt | bin/kafka-console-producer.sh --broker-list localhost:9092 --topic product_binlog
 4 | bin/kafka-console-consumer.sh --topic product_binlog --bootstrap-server localhost:9092 --from-beginning
 5 | 
 6 | // test write to hbase
 7 | CREATE TABLE product_binlog (
 8 |   id INT NOT NULL,
 9 |   name STRING,
10 |   description STRING,
11 |   weight DECIMAL(10,3)
12 |  ) WITH (
13 |   'connector' = 'kafka',
14 |   'topic' = 'product_binlog',
15 |   'properties.bootstrap.servers' = 'localhost:9092',
16 |   'scan.startup.mode' = 'earliest-offset',
17 |   'format' = 'debezium-json'
18 |  );
19 | 
20 |  CREATE TABLE hbase_product (
21 |   id INT NOT NULL PRIMARY KEY NOT ENFORCED,
22 |   f1 ROW<name STRING, description STRING>
23 |  ) WITH (
24 |     'connector' = 'hbase-2.2',
25 |     'table-name' = 'product1',
26 |     'zookeeper.quorum' = 'localhost:2181',
27 |     'zookeeper.znode.parent' = '/hbase',
28 |     'sink.buffer-flush.max-size' = '10mb',
29 |     'sink.buffer-flush.max-rows' = '1000',
30 |     'sink.buffer-flush.interval' = '2s' );
31 | 
32 | insert into hbase_product select id, ROW(name,description) from product_binlog;


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.12/hbase2_test.sql:
--------------------------------------------------------------------------------
 1 | // prepare cdc data
 2 | bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic product_binlog
 3 | cat ~/sourcecode/project/flink-1.11/flink/flink-formats/flink-json/src/test/resources/debezium-data-schema-exclude.txt | bin/kafka-console-producer.sh --broker-list localhost:9092 --topic product_binlog
 4 | bin/kafka-console-consumer.sh --topic product_binlog --bootstrap-server localhost:9092 --from-beginning
 5 | 
 6 | // test write to hbase
 7 | CREATE TABLE product_binlog1 (
 8 |   id INT NOT NULL,
 9 |   name STRING,
10 |   description STRING,
11 |   weight DECIMAL(10,3)
12 |  ) WITH (
13 |   'connector' = 'kafka',
14 |   'topic' = 'product_binlog1',
15 |   'properties.bootstrap.servers' = 'localhost:9092',
16 |   'scan.startup.mode' = 'earliest-offset',
17 |   'format' = 'debezium-json'
18 |  );
19 | 
20 |  CREATE TABLE hbase_product (
21 |   id INT NOT NULL PRIMARY KEY NOT ENFORCED,
22 |   f1 ROW<name STRING, description STRING>
23 |  ) WITH (
24 |     'connector' = 'hbase-2.2',
25 |     'table-name' = 'product1',
26 |     'zookeeper.quorum' = 'localhost:2182',
27 |     'zookeeper.znode.parent' = '/hbase',
28 |     'sink.buffer-flush.max-size' = '10mb',
29 |     'sink.buffer-flush.max-rows' = '1000',
30 |     'sink.buffer-flush.interval' = '2s' );
31 | 
32 | insert into hbase_product select id, ROW(name,description) from product_binlog1;


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.12/hive_latest_dim.sql:
--------------------------------------------------------------------------------
 1 | --  build local hive environment
 2 | -- $:git clone git@github.com:big-data-europe/docker-hive.git
 3 | -- $:cd docker-hive
 4 | -- $:docker-compose up -d
 5 | 
 6 | -- create hive partition table
 7 | create table user_info_latest(user_name string, is_new boolean, content string)
 8 |   PARTITIONED BY (date_col STRING) TBLPROPERTIES (
 9 |    -- using default partition-name order to load the latest partition every 12h (the most recommended and convenient way)
10 |    'streaming-source.enable' = 'true',
11 |    'streaming-source.partition.include' = 'latest',
12 |    'streaming-source.monitor-interval' = '10 s',
13 |    'streaming-source.partition-order' = 'partition-name'
14 |    );
15 | 
16 | -- create kafka fact table
17 | CREATE TABLE kafkaTable (
18 |    user_name STRING,
19 |    is_new    BOOLEAN,
20 |    content STRING,
21 |    date_col STRING,proctime as PROCTIME()) WITH (
22 |    'connector' = 'kafka',
23 |    'topic' = 'kafka_user',
24 |    'properties.zookeeper.connect' = 'localhost:2181',
25 |    'properties.bootstrap.servers' = 'localhost:9092',
26 |    'properties.group.id' = 'testCsv',
27 |    'scan.startup.mode' = 'earliest-offset',
28 |    'format' = 'csv');
29 | 
30 | create table test_csv( user_name VARCHAR, is_new BOOLEAN, content VARCHAR, date_col VARCHAR) with (
31 |  'connector.type' = 'filesystem',
32 |  'connector.path' = '/opt/user_part.csv',
33 |  'format.type' = 'csv')
34 | 
35 | -- join the latest hive partition
36 | select * from kafkaTable LEFT JOIN user_info_latest
37 |   for system_time as of kafkaTable.proctime as h
38 |   on kafkaTable.user_name = h.user_name;
39 | 


--------------------------------------------------------------------------------
/etl-job/src/main/resources/job-sql-1.12/upsert-kafka.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | -- create an upsert-kafka table
 3 |  CREATE TABLE pageviews_per_region (
 4 |    region STRING,
 5 |    pv BIGINT,
 6 |    uv BIGINT,
 7 |    PRIMARY KEY (region) NOT ENFORCED
 8 |  ) WITH (
 9 |    'connector' = 'upsert-kafka',
10 |    'topic' = 'pageviews_per_region',
11 |    'properties.bootstrap.servers' = 'localhost:9092',
12 |    'key.format' = 'json',
13 |    'value.format' = 'json'
14 |  );
15 | 
16 |  -- write test data to  upsert-kafka table
17 | insert into pageviews_per_region values('test1', 100, 20);
18 | insert into pageviews_per_region values('test2', 200, 20);
19 | insert into pageviews_per_region values('test1', 101, 20);
20 | 
21 | 
22 | -- check data has been writen into kafka
23 | ./bin/kafka-console-consumer.sh --topic pageviews_per_region --bootstrap-server localhost:9092 --from-beginning --property print.key=true --property key.separator="-"
24 | -- {"region":"test1"}-{"region":"test1","pv":100,"uv":20}
25 | -- {"region":"test2"}-{"region":"test2","pv":200,"uv":200}
26 | -- {"region":"test1"}-{"region":"test1","pv":101,"uv":20}
27 | 
28 | -- Read upsert kafka in sql client, the key {"region":"test1"} should update with the new value
29 | select * from pageviews_per_region;
30 | -- region    pv    uv
31 | --  test2   200   200
32 | --  test1   101    20


--------------------------------------------------------------------------------
/etl-job/src/main/resources/pictures/CURRRENT_TIMESTAMP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/etl-job/src/main/resources/pictures/CURRRENT_TIMESTAMP.png


--------------------------------------------------------------------------------
/etl-job/src/main/resources/readme.md:
--------------------------------------------------------------------------------
 1 | we can use flink-sql-client to do all SQL tests.
 2 | 
 3 | * (1) Add necessary connector jar to flink intall directory's lib, eg: if you want to test read from kafka and write to elasticsearch
 4 | please add flink-sql-connector-kafka.jar and flink-sql-connector-elasticsearch.jar to lib.
 5 | 
 6 | * (2) Set up necessary component like kafka cluster/elasticsearch cluster/mysql/hbase.
 7 | 
 8 | * (3) start flink cluster, start sql-client.
 9 | 
10 | * (4) post related sql to sql-client to test. 


--------------------------------------------------------------------------------
/flink-demo/flink-jdbc-demo/docker-compose-flink-demo.yaml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | services:
 3 |   zookeeper:
 4 |     image: wurstmeister/zookeeper:3.4.6
 5 |     ports:
 6 |       - "2181:2181"
 7 |   kafka:
 8 |     image: wurstmeister/kafka:2.12-2.2.1
 9 |     ports:
10 |       - "9092:9092"
11 |       - "9094:9094"
12 |     depends_on:
13 |       - zookeeper
14 |     environment:
15 |       - KAFKA_ADVERTISED_LISTENERS=INSIDE://:9094,OUTSIDE://localhost:9092
16 |       - KAFKA_LISTENERS=INSIDE://:9094,OUTSIDE://:9092
17 |       - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
18 |       - KAFKA_INTER_BROKER_LISTENER_NAME=INSIDE
19 |       - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
20 |     volumes:
21 |       - /var/run/docker.sock:/var/run/docker.sock
22 |   mysql:
23 |     image: debezium/example-mysql:1.1
24 |     ports:
25 |      - 3306:3306
26 |     environment:
27 |      - MYSQL_ROOT_PASSWORD=debezium
28 |      - MYSQL_USER=mysqluser
29 |      - MYSQL_PASSWORD=mysqlpw
30 |   postgres:
31 |     image: debezium/example-postgres:1.1
32 |     ports:
33 |      - 5432:5432
34 |     environment:
35 |      - POSTGRES_USER=postgres
36 |      - POSTGRES_PASSWORD=postgres
37 |   connect:
38 |     image: debezium/connect:1.1
39 |     ports:
40 |      - 8083:8083
41 |     depends_on:
42 |      - kafka
43 |      - mysql
44 |     environment:
45 |      - BOOTSTRAP_SERVERS=kafka:9094
46 |      - GROUP_ID=1
47 |      - CONFIG_STORAGE_TOPIC=my_connect_configs
48 |      - OFFSET_STORAGE_TOPIC=my_connect_offsets
49 |      - STATUS_STORAGE_TOPIC=my_connect_statuses
50 | 


--------------------------------------------------------------------------------
/flink-demo/flink-jdbc-demo/flink-demo-udf.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/leonardBang/flink-sql-etl/d19f81a0c0c831d124d0a6f29767b2364b50a457/flink-demo/flink-jdbc-demo/flink-demo-udf.jar


--------------------------------------------------------------------------------
/flink-demo/flink-temporal-join-demo/register-mysql.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "mysql-inventory-connector",
 3 |     "config": {
 4 |         "connector.class": "io.debezium.connector.mysql.MySqlConnector",
 5 |         "tasks.max": "1",
 6 |         "tombstones.on.delete":"false",
 7 |  	    "database.hostname": "mysql",
 8 |         "database.port": "3306",
 9 |         "database.user": "debezium",
10 |         "database.password": "dbz",
11 |         "database.server.id": "184054",
12 |         "database.server.name": "dbserver1",
13 |         "database.whitelist": "inventory",
14 |         "database.history.kafka.bootstrap.servers": "kafka:9094",
15 |         "database.history.kafka.topic": "schema-changes.inventory"
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/flink-demo/flink-temporal-join-demo/temporal-join-versioned-table.yaml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   zookeeper:
 4 |     image: wurstmeister/zookeeper:3.4.6
 5 |     ports:
 6 |       - "2181:2181"
 7 |   kafka:
 8 |     image: wurstmeister/kafka:2.12-2.2.1
 9 |     ports:
10 |       - "9092:9092"
11 |       - "9094:9094"
12 |     depends_on:
13 |       - zookeeper
14 |     environment:
15 |       - KAFKA_ADVERTISED_LISTENERS=INSIDE://:9094,OUTSIDE://localhost:9092
16 |       - KAFKA_LISTENERS=INSIDE://:9094,OUTSIDE://:9092
17 |       - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
18 |       - KAFKA_INTER_BROKER_LISTENER_NAME=INSIDE
19 |       - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
20 |     volumes:
21 |       - /var/run/docker.sock:/var/run/docker.sock
22 |   mysql:
23 |     image: debezium/example-mysql:1.1
24 |     ports:
25 |      - 3306:3306
26 |     environment:
27 |      - MYSQL_ROOT_PASSWORD=debezium
28 |      - MYSQL_USER=mysqluser
29 |      - MYSQL_PASSWORD=mysqlpw
30 |   connect:
31 |     image: debezium/connect:1.1
32 |     ports:
33 |      - 8083:8083
34 |     depends_on:
35 |      - kafka
36 |      - mysql
37 |     environment:
38 |      - BOOTSTRAP_SERVERS=kafka:9094
39 |      - GROUP_ID=1
40 |      - CONFIG_STORAGE_TOPIC=my_connect_configs
41 |      - OFFSET_STORAGE_TOPIC=my_connect_offsets
42 |      - STATUS_STORAGE_TOPIC=my_connect_statuses
43 | 


--------------------------------------------------------------------------------
/flink-demo/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>flink-sql-etl</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>flink-demo</artifactId>
13 |     <dependencies>
14 |         <dependency>
15 |             <groupId>org.apache.flink</groupId>
16 |             <artifactId>flink-table-common</artifactId>
17 |             <version>${flink.version}</version>
18 |             <scope>provided</scope>
19 |         </dependency>
20 |     </dependencies>
21 | 
22 |     <build>
23 |         <plugins>
24 |             <plugin>
25 |                 <groupId>org.apache.maven.plugins</groupId>
26 |                 <artifactId>maven-jar-plugin</artifactId>
27 |                 <version>3.0.0</version>
28 |                 <configuration>
29 |                     <finalName>flink-demo-udf</finalName>
30 |                 </configuration>
31 |             </plugin>
32 |         </plugins>
33 |     </build>
34 | 
35 | </project>


--------------------------------------------------------------------------------
/flink-demo/src/main/java/udf/Int2DateUDF.java:
--------------------------------------------------------------------------------
 1 | package udf;/*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | import org.apache.flink.table.catalog.DataTypeFactory;
20 | import org.apache.flink.table.functions.ScalarFunction;
21 | import org.apache.flink.table.types.inference.TypeInference;
22 | 
23 | import java.sql.Date;
24 | import java.time.LocalDate;
25 | 
26 | public class Int2DateUDF extends ScalarFunction {
27 | 
28 |     public Date eval(int epochDay) {
29 |         return Date.valueOf(LocalDate.ofEpochDay(epochDay));
30 |     }
31 | 
32 |     @Override
33 |     public TypeInference getTypeInference(DataTypeFactory typeFactory) {
34 |         return super.getTypeInference(typeFactory);
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |     <!-- global properties -->
 8 |     <properties>
 9 |         <flink.version>1.12-vvr-3.0.0-SNAPSHOT</flink.version>
10 |         <scala.binary.version>2.11</scala.binary.version>
11 |         <hadoop.version>2.8.3</hadoop.version>
12 |         <hive.version>3.1.0</hive.version>
13 |     </properties>
14 | 
15 |     <groupId>org.example</groupId>
16 |     <artifactId>flink-sql-etl</artifactId>
17 |     <packaging>pom</packaging>
18 |     <version>1.0-SNAPSHOT</version>
19 |     <modules>
20 |         <module>data-generator</module>
21 |         <module>etl-job</module>
22 |         <module>sql-avro</module>
23 |         <module>flink-demo</module>
24 |         <module>state-process</module>
25 |     </modules>
26 | 
27 |     <dependencies>
28 |         <dependency>
29 |             <groupId>org.scala-lang</groupId>
30 |             <artifactId>scala-library</artifactId>
31 |             <version>2.11.12</version>
32 |         </dependency>
33 |     </dependencies>
34 | 
35 |     <repositories>
36 |         <repository>
37 |             <id>confluent</id>
38 |             <url>https://mvnrepository.com/artifact/io.confluent/kafka-avro-serializer</url>
39 |         </repository>
40 |         <repository>
41 |             <id>confluent1</id>
42 |             <url>http://packages.confluent.io/maven/</url>
43 |         </repository>
44 |     </repositories>
45 | 
46 | </project>


--------------------------------------------------------------------------------
/sql-avro/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>flink-sql-etl</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>sql-avro</artifactId>
13 | 
14 | 
15 |     <dependencies>
16 |         <!--  flink table program dependency    -->
17 |         <dependency>
18 |             <groupId>org.apache.flink</groupId>
19 |             <artifactId>flink-table-common</artifactId>
20 |             <version>${flink.version}</version>
21 |         </dependency>
22 |         <dependency>
23 |             <groupId>org.apache.flink</groupId>
24 |             <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
25 |             <version>${flink.version}</version>
26 |         </dependency>
27 |         <dependency>
28 |             <groupId>org.apache.flink</groupId>
29 |             <artifactId>flink-clients_${scala.binary.version}</artifactId>
30 |             <version>${flink.version}</version>
31 |         </dependency>
32 |         <!--   avro format  -->
33 |         <dependency>
34 |             <groupId>org.apache.flink</groupId>
35 |             <artifactId>flink-avro</artifactId>
36 |             <version>${flink.version}</version>
37 |         </dependency>
38 |         <dependency>
39 |             <groupId>org.apache.avro</groupId>
40 |             <artifactId>avro</artifactId>
41 |             <version>1.8.2</version>
42 |         </dependency>
43 |         <!--   csv format   -->
44 |         <dependency>
45 |             <groupId>org.apache.flink</groupId>
46 |             <artifactId>flink-csv</artifactId>
47 |             <version>${flink.version}</version>
48 |         </dependency>
49 |     </dependencies>
50 | 
51 | 
52 | </project>


--------------------------------------------------------------------------------
/sql-avro/src/main/java/TestUserIssue12.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
20 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
21 | import org.apache.flink.types.Row;
22 | import org.apache.flink.util.CloseableIterator;
23 | 
24 | public class TestUserIssue12 {
25 |     public static void main(String[] args) throws Exception {
26 |         StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
27 |         StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(environment);
28 |         environment.setParallelism(1);
29 | 
30 |         //construct some test data with avro format
31 |         //writeTestAvroData(tableEnvironment);
32 | 
33 |         tableEnvironment.executeSql("CREATE TABLE people (\n" +
34 |             " name String," +
35 |             " status Boolean," +
36 |             " note STRING" +
37 |             ") WITH (\n" +
38 |             "    'connector' = 'filesystem',\n" +
39 |             "    'path'     = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" +
40 |             "    'format'    = 'avro'\n" +
41 |             ")");
42 |         System.out.println("CREATE TABLE people (\n" +
43 |             " name String," +
44 |             " status Boolean," +
45 |             " note STRING" +
46 |             ") WITH (\n" +
47 |             "    'connector' = 'filesystem',\n" +
48 |             "    'path'     = 'file:///Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" +
49 |             "    'format'    = 'avro'\n" +
50 |             ")");
51 | 
52 |         CloseableIterator<Row> result = tableEnvironment.executeSql("select * from people").collect();
53 |         while (result.hasNext()) {
54 |             System.out.println(result.next());
55 |         }
56 |     }
57 | 
58 |     private static void writeTestAvroData(StreamTableEnvironment tableEnvironment) throws Exception {
59 |         String csvSourceDDL = "create table csv(" +
60 |             " name String," +
61 |             " status Boolean," +
62 |             " note STRING" +
63 |             ") with (" +
64 |             " 'connector' = 'filesystem',\n" +
65 |             " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.csv',\n" +
66 |             " 'format' = 'csv'" +
67 |             ")";
68 |         String csvSink = "create table csvSink(" +
69 |             " name String," +
70 |             " status Boolean," +
71 |             " note STRING" +
72 |             ") with (" +
73 |             " 'connector' = 'filesystem',\n" +
74 |             " 'path' = '/Users/bang/sourcecode/project/flink-sql-etl/data-generator/src/main/resources/user.avro',\n" +
75 |             " 'format' = 'avro'" +
76 |             ")";
77 |         tableEnvironment.executeSql(csvSourceDDL);
78 |         tableEnvironment.executeSql(csvSink);
79 |         tableEnvironment.executeSql("insert into csvSink select * from csv").await();
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/state-process/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>flink-sql-etl</artifactId>
 7 |         <groupId>org.example</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>state-process</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>org.apache.flink</groupId>
17 |             <artifactId>flink-state-processor-api_2.11</artifactId>
18 |             <version>${flink.version}</version>
19 |         </dependency>
20 | 
21 |         <dependency>
22 |             <groupId>org.apache.flink</groupId>
23 |             <artifactId>flink-java</artifactId>
24 |             <version>${flink.version}</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.flink</groupId>
28 |             <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
29 |             <version>${flink.version}</version>
30 |         </dependency>
31 |         <dependency>
32 |             <groupId>org.apache.flink</groupId>
33 |             <artifactId>flink-runtime_2.11</artifactId>
34 |             <version>${flink.version}</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>org.apache.flink</groupId>
38 |             <artifactId>flink-clients_2.11</artifactId>
39 |             <version>${flink.version}</version>
40 |         </dependency>
41 | 
42 |     </dependencies>
43 | 
44 | 
45 | </project>


--------------------------------------------------------------------------------
/state-process/src/main/java/state/CdcSourceStateAnalysis.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package state;
20 | 
21 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
22 | import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
23 | import org.apache.flink.api.java.DataSet;
24 | import org.apache.flink.api.java.ExecutionEnvironment;
25 | import org.apache.flink.configuration.CheckpointingOptions;
26 | import org.apache.flink.configuration.Configuration;
27 | import org.apache.flink.runtime.state.StateBackendLoader;
28 | import org.apache.flink.state.api.ExistingSavepoint;
29 | import org.apache.flink.state.api.Savepoint;
30 | 
31 | import java.nio.charset.StandardCharsets;
32 | 
33 | public class CdcSourceStateAnalysis {
34 | 
35 |     public static void main(String[] args) throws Exception {
36 |         ExecutionEnvironment bEnv = ExecutionEnvironment.getExecutionEnvironment();
37 |         bEnv.setParallelism(1);
38 | 
39 |         Configuration configuration = new Configuration();
40 |         configuration.setString(CheckpointingOptions.STATE_BACKEND.key(), "com.alibaba.flink.statebackend.GeminiStateBackendFactory");
41 |         ExistingSavepoint savepoint = Savepoint.load(bEnv, "/Users/bang/flink-cdc-debug",
42 |             StateBackendLoader.loadStateBackendFromConfig(configuration, Thread.currentThread().getContextClassLoader(), null));
43 | 
44 | 
45 |         DataSet<byte[]> offsetStat = savepoint.readUnionState("6cdc5bb954874d922eaee11a8e7b5dd5", "offset-states", PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO);
46 |         System.out.println(new String(offsetStat.collect().get(0), StandardCharsets.UTF_8) );
47 |         DataSet<String> historyRecords = savepoint.readUnionState("6cdc5bb954874d922eaee11a8e7b5dd5", "history-records-states", BasicTypeInfo.STRING_TYPE_INFO);
48 |         historyRecords.print();
49 | 
50 |         bEnv.execute("");
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------