├── .gitignore
├── DeplyREADME.md
├── FLINKSQL.md
├── FlinkOnYarnDeploy.md
├── LICENSE
├── README.md
├── data
    ├── data.txt
    └── t_order.sql
├── pom.xml
└── src
    └── main
        ├── TestResources
            ├── saic_cdh6_dev
            │   ├── core-site.xml
            │   ├── hdfs-site.xml
            │   └── hive-site.xml
            ├── saic_dev
            │   ├── core-site.xml
            │   ├── hdfs-site.xml
            │   ├── hive-site.xml
            │   ├── mapred-site.xml
            │   └── yarn-site.xml
            └── weekend
            │   ├── core-site.xml
            │   ├── hdfs-site.xml
            │   ├── hive-site.xml
            │   ├── mapred-site.xml
            │   └── yarn-site.xml
        ├── resources
            ├── core-site.xml
            ├── env.properties
            ├── hdfs-site.xml
            ├── hive-site.xml
            └── log4j.properties
        └── scala
            ├── com
                └── yyb
                │   └── flink10
                │       ├── DataSet
                │           ├── JDBC
                │           │   ├── ReadFromJDBCInputFormat.scala
                │           │   └── WriteToMysqlByOutputformat.scala
                │           ├── State
                │           │   └── StateOfCountWindowAverage.java
                │           ├── WordCount.scala
                │           └── kafka
                │           │   └── SendData2KafkaByKafkaOutputFormat.scala
                │       ├── DataStream
                │           ├── ProcessFunction
                │           │   ├── KeyedProcessFunctionDemo.java
                │           │   └── KeyedProcessFunctionOnNOKeyStreamDemo.java
                │           ├── State
                │           │   └── StateOfCountWindowAverage.java
                │           ├── data
                │           │   └── WordCountData.java
                │           ├── kafka
                │           │   ├── EventTimeDemo.java
                │           │   ├── SendData2KafkaByKafkaConnector.scala
                │           │   ├── SendData2KafkaByKafkaConnectorBrowse.scala
                │           │   └── SendData2KafkaByKafkaConnectorProduct.scala
                │           ├── parquet
                │           │   └── WriteParquetWordCount.scala
                │           └── sink
                │           │   ├── JDBC
                │           │       ├── ReadFromInputFormat.scala
                │           │       └── WriteToMysqlByJDBCOutputformat.scala
                │           │   └── StreamingFileSink
                │           │       ├── BulkEncodedSink
                │           │           ├── WordCountFileSourceStreamFileSinkOfParquet.scala
                │           │           ├── WordCountFileSourceStreamFileSinkOfParquetToHDFS.scala
                │           │           └── WordCountFileSourceStreamFileSinkOfSequence.scala
                │           │       └── RowEncodedSink
                │           │           ├── WordCountElementsSourceStreamFileSink.scala
                │           │           ├── WordCountElementsSourceStreamFileSinkJava.java
                │           │           └── WordCountFileSourceStreamFileSink.scala
                │       ├── OutputFormat
                │           └── KafkaOutputFormat.java
                │       ├── commonEntity
                │           ├── Current1.java
                │           ├── Current2.java
                │           ├── Pi.java
                │           ├── ProductInfo.java
                │           ├── Rate.java
                │           ├── Rate2.java
                │           └── UserBrowseLog.java
                │       ├── sink
                │           ├── KafkaBatchTableSink.java
                │           ├── ParquetSinkFunction.scala
                │           └── ParquetWriterSink.scala
                │       ├── table
                │           ├── blink
                │           │   ├── batch
                │           │   │   ├── BatchQuery.scala
                │           │   │   ├── BlinkHiveBatchDemo.scala
                │           │   │   ├── JDBC
                │           │   │   │   ├── BlinkBatchReadFromJDBCTableSource.scala
                │           │   │   │   └── BlinkBatchWriteToJDBCTableSink.scala
                │           │   │   ├── hive
                │           │   │   │   └── Fromkafka2HiveUseCatalog.java
                │           │   │   └── kafka
                │           │   │   │   └── WriteJsonDataByKafkaConnector.java
                │           │   └── stream
                │           │   │   ├── FileSystem
                │           │   │       ├── ReadFromKafkaConnectorWriteToLocalParquetFileJava.java
                │           │   │       ├── ReadFromKafkaConnectorWriteToLocalParquetFilePiJava.java
                │           │   │       └── ReadFromKafkaConnectorWriteToLocalTextFileJava.java
                │           │   │   ├── JDBC
                │           │   │       ├── ReadDataFromJDBCTableSource.scala
                │           │   │       ├── WriteDataByJDBCTableSink.scala
                │           │   │       └── WriteDataByJDBCTableUpsertSink.scala
                │           │   │   ├── StreamQuery.scala
                │           │   │   ├── TemporalTable
                │           │   │       └── MysqlTemporalTable.java
                │           │   │   ├── elasticsearch
                │           │   │       ├── WriteData2EsByConnectorTestEnv.java
                │           │   │       ├── WriteData2EsBySink.java
                │           │   │       ├── WriteData2EsBySinkTestEnv.java
                │           │   │       └── WriteData2EsByTableDesc.java
                │           │   │   ├── hive
                │           │   │       ├── Fromkafka2HiveUseCatalog.java
                │           │   │       ├── WriteData2HiveJavaReadFromkafkaTableSource.java
                │           │   │       └── WriteData2HiveReadFromkafkaTableSource.scala
                │           │   │   ├── join
                │           │   │       └── temporaltable
                │           │   │       │   ├── JoinWithKafkaConsumerTeporalTableFunction.java
                │           │   │       │   ├── JoinWithKafkaTeporalTableFunction.java
                │           │   │       │   ├── JoinWithLookupFunctionCheckpoint.java
                │           │   │       │   ├── JoinWithTeporalTableFunction.java
                │           │   │       │   ├── JoinWithTeporalTableFunctionWithJDBCConnection.java
                │           │   │       │   ├── TemporalTableDemo.java
                │           │   │       │   ├── TemporalTableFunction.java
                │           │   │       │   └── TemporalTableFunctionDemo.java
                │           │   │   └── kafka
                │           │   │       ├── EventTimeDemo.java
                │           │   │       ├── ReadDataFromKafkaConnector.scala
                │           │   │       ├── ReadDataFromKafkaConnectorJava.java
                │           │   │       ├── ReadDataFromKafkaSource.scala
                │           │   │       ├── ReadDataFromKafkaSourceJava.java
                │           │   │       ├── WriteToKafkaByKafkaConnectorOfOrder.java
                │           │   │       ├── WriteToKafkaByKafkaConnectorOfRates.java
                │           │   │       └── watermark
                │           │   │           └── UseProcesstimeAsWatermark.java
                │           └── flink
                │           │   ├── batch
                │           │       ├── BatchQuery.scala
                │           │       ├── BatchReadFromParquetQuery.scala
                │           │       ├── BatchReadFromSequenceQuery.scala
                │           │       ├── JDBC
                │           │       │   ├── BatchJDBCReadByInputformat2TableSource.scala
                │           │       │   ├── BatchJobReadFromJDBCTableSource.scala
                │           │       │   └── WriteJDBCByTableSink.scala
                │           │       └── kafka
                │           │       │   └── SendData2KafkaByKafkaBatchSink.scala
                │           │   └── stream
                │           │       ├── JDBC
                │           │           ├── InsetMode
                │           │           │   ├── AppendOnly.java
                │           │           │   ├── RetractStream.java
                │           │           │   └── UpsertStream.java
                │           │           ├── StreamJDBCReadByInputformat2TableSource.scala
                │           │           ├── StreamJobReadFromJDBCTableSource.scala
                │           │           ├── WriteDataByJDBCTableUpsertSink.scala
                │           │           ├── WriteDataByTableSink.scala
                │           │           └── WriteMysqlByJDBCConnectorUpsertMode.java
                │           │       ├── StreamQuery.scala
                │           │       └── kafka
                │           │           └── SendData2KafkaByKafkaConnector.scala
                │       ├── util
                │           ├── ParquetAvroWritersSelf.java
                │           └── RecordTypeInfo.java
                │       └── util1
                │           ├── Demo.java
                │           └── GeneratorClassByASM.java
            ├── flink
                └── api
                │   └── java
                │       ├── RecordAvroTypeInfo.java
                │       ├── Tuple0.java
                │       ├── Tuple1.java
                │       └── Tuple2.java
            └── org
                └── apache
                    └── flink
                        └── streaming
                            └── connectors
                                └── Elasticsearch7UpsertTableSinkPlus.java


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 | *.iml
4 | xxx.text/
5 | metastore_db/
6 | *.log
7 | derby.log
8 | dependency-reduced-pom.xml
9 | .DS_Store


--------------------------------------------------------------------------------
/DeplyREADME.md:
--------------------------------------------------------------------------------
1 | # Deploy
2 | ## StandAlone
3 | bin/flink run --class com.... -classpath xxx xxx.jar
4 | ## Flink On Yarn
5 | bin/flink run -m yarn-cluster --class xxx --classpath xxx xxx.jar


--------------------------------------------------------------------------------
/FlinkOnYarnDeploy.md:
--------------------------------------------------------------------------------
1 | #FlinkOnYarkDeploy


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |             DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
 2 |                     Version 2, December 2004
 3 | 
 4 |  Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
 5 | 
 6 |  Everyone is permitted to copy and distribute verbatim or modified
 7 |  copies of this license document, and changing it is allowed as long
 8 |  as the name is changed.
 9 | 
10 |             DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
11 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
12 | 
13 |   0. You just DO WHAT THE FUCK YOU WANT TO.
14 | 


--------------------------------------------------------------------------------
/data/data.txt:
--------------------------------------------------------------------------------
 1 | "To be, or not to be,--that is the question:--",
 2 |             "Whether 'tis nobler in the mind to suffer",
 3 |             "The slings and arrows of outrageous fortune",
 4 |             "Or to take arms against a sea of troubles,",
 5 |             "And by opposing end them?--To die,--to sleep,--",
 6 |             "No more; and by a sleep to say we end",
 7 |             "The heartache, and the thousand natural shocks",
 8 |             "That flesh is heir to,--'tis a consummation",
 9 |             "Devoutly to be wish'd. To die,--to sleep;--",
10 |             "To sleep! perchance to dream:--ay, there's the rub;",
11 |             "For in that sleep of death what dreams may come,",
12 |             "When we have shuffled off this mortal coil,",
13 |             "Must give us pause: there's the respect",
14 |             "That makes calamity of so long life;",
15 |             "For who would bear the whips and scorns of time,",
16 |             "The oppressor's wrong, the proud man's contumely,",
17 |             "The pangs of despis'd love, the law's delay,",
18 |             "The insolence of office, and the spurns",
19 |             "That patient merit of the unworthy takes,",
20 |             "When he himself might his quietus make",
21 |             "With a bare bodkin? who would these fardels bear,",
22 |             "To grunt and sweat under a weary life,",
23 |             "But that the dread of something after death,--",
24 |             "The undiscover'd country, from whose bourn",
25 |             "No traveller returns,--puzzles the will,",
26 |             "And makes us rather bear those ills we have",
27 |             "Than fly to others that we know not of?",
28 |             "Thus conscience does make cowards of us all;",
29 |             "And thus the native hue of resolution",
30 |             "Is sicklied o'er with the pale cast of thought;",
31 |             "And enterprises of great pith and moment,",
32 |             "With this regard, their currents turn awry,",
33 |             "And lose the name of action.--Soft you now!",
34 |             "The fair Ophelia!--Nymph, in thy orisons",
35 |             "Be all my sins remember'd."


--------------------------------------------------------------------------------
/data/t_order.sql:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Navicat Premium Data Transfer
 3 | 
 4 |  Source Server         : persona-test-new
 5 |  Source Server Type    : MySQL
 6 |  Source Server Version : 50721
 7 |  Source Host           : 172.16.11.82
 8 |  Source Database       : persona
 9 | 
10 |  Target Server Type    : MySQL
11 |  Target Server Version : 50721
12 |  File Encoding         : utf-8
13 | 
14 |  Date: 04/26/2020 10:15:29 AM
15 | */
16 | 
17 | SET NAMES utf8;
18 | SET FOREIGN_KEY_CHECKS = 0;
19 | 
20 | -- ----------------------------
21 | --  Table structure for `t_order`
22 | -- ----------------------------
23 | DROP TABLE IF EXISTS `t_order`;
24 | CREATE TABLE `t_order` (
25 |   `id` int(11) DEFAULT NULL,
26 |   `name` varchar(50) DEFAULT NULL,
27 |   `time` date DEFAULT NULL
28 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
29 | 
30 | -- ----------------------------
31 | --  Records of `t_order`
32 | -- ----------------------------
33 | BEGIN;
34 | INSERT INTO `t_order` VALUES ('1001', '用券', '2019-04-01'), ('1002', '不用券', '2019-05-10'), ('1001', '不用券', '2019-05-01'), ('1003', '不用券', '2019-04-12'), ('1001', '不用券', '2019-05-11'), ('1002', '用券', '2019-05-30'), ('1001', '不用券', '2019-05-22'), ('1003', '用券', '2019-05-24');
35 | COMMIT;
36 | 
37 | SET FOREIGN_KEY_CHECKS = 1;
38 | 


--------------------------------------------------------------------------------
/src/main/TestResources/saic_cdh6_dev/core-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <property>
  6 |     <name>fs.defaultFS</name>
  7 |     <value>hdfs://nameservice1</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>fs.trash.interval</name>
 11 |     <value>1</value>
 12 |   </property>
 13 |   <property>
 14 |     <name>io.compression.codecs</name>
 15 |     <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
 16 |   </property>
 17 |   <property>
 18 |     <name>hadoop.security.authentication</name>
 19 |     <value>simple</value>
 20 |   </property>
 21 |   <property>
 22 |     <name>hadoop.security.authorization</name>
 23 |     <value>false</value>
 24 |   </property>
 25 |   <property>
 26 |     <name>hadoop.rpc.protection</name>
 27 |     <value>authentication</value>
 28 |   </property>
 29 |   <property>
 30 |     <name>hadoop.security.auth_to_local</name>
 31 |     <value>DEFAULT</value>
 32 |   </property>
 33 |   <property>
 34 |     <name>hadoop.proxyuser.oozie.hosts</name>
 35 |     <value>*</value>
 36 |   </property>
 37 |   <property>
 38 |     <name>hadoop.proxyuser.oozie.groups</name>
 39 |     <value>*</value>
 40 |   </property>
 41 |   <property>
 42 |     <name>hadoop.proxyuser.flume.hosts</name>
 43 |     <value>*</value>
 44 |   </property>
 45 |   <property>
 46 |     <name>hadoop.proxyuser.flume.groups</name>
 47 |     <value>*</value>
 48 |   </property>
 49 |   <property>
 50 |     <name>hadoop.proxyuser.HTTP.hosts</name>
 51 |     <value>*</value>
 52 |   </property>
 53 |   <property>
 54 |     <name>hadoop.proxyuser.HTTP.groups</name>
 55 |     <value>*</value>
 56 |   </property>
 57 |   <property>
 58 |     <name>hadoop.proxyuser.hive.hosts</name>
 59 |     <value>*</value>
 60 |   </property>
 61 |   <property>
 62 |     <name>hadoop.proxyuser.hive.groups</name>
 63 |     <value>*</value>
 64 |   </property>
 65 |   <property>
 66 |     <name>hadoop.proxyuser.hue.hosts</name>
 67 |     <value>*</value>
 68 |   </property>
 69 |   <property>
 70 |     <name>hadoop.proxyuser.hue.groups</name>
 71 |     <value>*</value>
 72 |   </property>
 73 |   <property>
 74 |     <name>hadoop.proxyuser.httpfs.hosts</name>
 75 |     <value>*</value>
 76 |   </property>
 77 |   <property>
 78 |     <name>hadoop.proxyuser.httpfs.groups</name>
 79 |     <value>*</value>
 80 |   </property>
 81 |   <property>
 82 |     <name>hadoop.proxyuser.hdfs.groups</name>
 83 |     <value>*</value>
 84 |   </property>
 85 |   <property>
 86 |     <name>hadoop.proxyuser.hdfs.hosts</name>
 87 |     <value>*</value>
 88 |   </property>
 89 |   <property>
 90 |     <name>hadoop.proxyuser.yarn.hosts</name>
 91 |     <value>*</value>
 92 |   </property>
 93 |   <property>
 94 |     <name>hadoop.proxyuser.yarn.groups</name>
 95 |     <value>*</value>
 96 |   </property>
 97 |   <property>
 98 |     <name>hadoop.security.group.mapping</name>
 99 |     <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
100 |   </property>
101 |   <property>
102 |     <name>hadoop.security.instrumentation.requires.admin</name>
103 |     <value>false</value>
104 |   </property>
105 |   <property>
106 |     <name>net.topology.script.file.name</name>
107 |     <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
108 |   </property>
109 |   <property>
110 |     <name>io.file.buffer.size</name>
111 |     <value>65536</value>
112 |   </property>
113 |   <property>
114 |     <name>hadoop.ssl.enabled</name>
115 |     <value>false</value>
116 |   </property>
117 |   <property>
118 |     <name>hadoop.ssl.require.client.cert</name>
119 |     <value>false</value>
120 |     <final>true</final>
121 |   </property>
122 |   <property>
123 |     <name>hadoop.ssl.keystores.factory.class</name>
124 |     <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
125 |     <final>true</final>
126 |   </property>
127 |   <property>
128 |     <name>hadoop.ssl.server.conf</name>
129 |     <value>ssl-server.xml</value>
130 |     <final>true</final>
131 |   </property>
132 |   <property>
133 |     <name>hadoop.ssl.client.conf</name>
134 |     <value>ssl-client.xml</value>
135 |     <final>true</final>
136 |   </property>
137 |   <property>
138 |     <name>fs.protected.directories</name>
139 |     <value>/testdir</value>
140 |   </property>
141 |   <property>
142 |     <name>ipc.maximum.data.length</name>
143 |     <value>268435456</value>
144 |   </property>
145 | </configuration>
146 | 


--------------------------------------------------------------------------------
/src/main/TestResources/saic_cdh6_dev/hdfs-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <!--'dfs.nameservices', originally set to 'nameservice1' (non-final), is overridden below by a safety valve-->
  6 |   <property>
  7 |     <name>dfs.client.failover.proxy.provider.nameservice1</name>
  8 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  9 |   </property>
 10 |   <property>
 11 |     <name>dfs.ha.automatic-failover.enabled.nameservice1</name>
 12 |     <value>true</value>
 13 |   </property>
 14 |   <property>
 15 |     <name>ha.zookeeper.quorum</name>
 16 |     <value>njdev-nn01.nj:2181,njdev-nn02.nj:2181,njdev-nn03.nj:2181</value>
 17 |   </property>
 18 |   <property>
 19 |     <name>dfs.ha.namenodes.nameservice1</name>
 20 |     <value>namenode82,namenode89</value>
 21 |   </property>
 22 |   <property>
 23 |     <name>dfs.namenode.rpc-address.nameservice1.namenode82</name>
 24 |     <value>njdev-nn01.nj:8020</value>
 25 |   </property>
 26 |   <property>
 27 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode82</name>
 28 |     <value>njdev-nn01.nj:8022</value>
 29 |   </property>
 30 |   <property>
 31 |     <name>dfs.namenode.http-address.nameservice1.namenode82</name>
 32 |     <value>njdev-nn01.nj:9870</value>
 33 |   </property>
 34 |   <property>
 35 |     <name>dfs.namenode.https-address.nameservice1.namenode82</name>
 36 |     <value>njdev-nn01.nj:9871</value>
 37 |   </property>
 38 |   <property>
 39 |     <name>dfs.namenode.rpc-address.nameservice1.namenode89</name>
 40 |     <value>njdev-nn02.nj:8020</value>
 41 |   </property>
 42 |   <property>
 43 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode89</name>
 44 |     <value>njdev-nn02.nj:8022</value>
 45 |   </property>
 46 |   <property>
 47 |     <name>dfs.namenode.http-address.nameservice1.namenode89</name>
 48 |     <value>njdev-nn02.nj:9870</value>
 49 |   </property>
 50 |   <property>
 51 |     <name>dfs.namenode.https-address.nameservice1.namenode89</name>
 52 |     <value>njdev-nn02.nj:9871</value>
 53 |   </property>
 54 |   <property>
 55 |     <name>dfs.replication</name>
 56 |     <value>2</value>
 57 |   </property>
 58 |   <property>
 59 |     <name>dfs.blocksize</name>
 60 |     <value>134217728</value>
 61 |   </property>
 62 |   <property>
 63 |     <name>dfs.client.use.datanode.hostname</name>
 64 |     <value>false</value>
 65 |   </property>
 66 |   <property>
 67 |     <name>fs.permissions.umask-mode</name>
 68 |     <value>022</value>
 69 |   </property>
 70 |   <property>
 71 |     <name>dfs.client.block.write.locateFollowingBlock.retries</name>
 72 |     <value>7</value>
 73 |   </property>
 74 |   <property>
 75 |     <name>dfs.namenode.acls.enabled</name>
 76 |     <value>true</value>
 77 |   </property>
 78 |   <property>
 79 |     <name>dfs.client.read.shortcircuit</name>
 80 |     <value>true</value>
 81 |   </property>
 82 |   <property>
 83 |     <name>dfs.domain.socket.path</name>
 84 |     <value>/var/run/hdfs-sockets/dn</value>
 85 |   </property>
 86 |   <property>
 87 |     <name>dfs.client.read.shortcircuit.skip.checksum</name>
 88 |     <value>false</value>
 89 |   </property>
 90 |   <property>
 91 |     <name>dfs.client.domain.socket.data.traffic</name>
 92 |     <value>false</value>
 93 |   </property>
 94 |   <property>
 95 |     <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
 96 |     <value>true</value>
 97 |   </property>
 98 |   <property>
 99 |     <name>dfs.nameservices</name>
100 |     <value>nameservice1,njdev-cdh5</value>
101 |   </property>
102 |   <property>
103 |     <name>dfs.ha.namenodes.njdev-cdh5</name>
104 |     <value>nn1,nn2</value>
105 |   </property>
106 |   <property>
107 |     <name>dfs.namenode.rpc-address.njdev-cdh5.nn1</name>
108 |     <value>172.16.10.148:8020</value>
109 |   </property>
110 |   <property>
111 |     <name>dfs.namenode.rpc-address.njdev-cdh5.nn2</name>
112 |     <value>172.16.10.149:8020</value>
113 |   </property>
114 |   <property>
115 |     <name>dfs.namenode.http-address.njdev-cdh5.nn1</name>
116 |     <value>172.16.10.148:50070</value>
117 |   </property>
118 |   <property>
119 |     <name>dfs.namenode.http-address.njdev-cdh5.nn2</name>
120 |     <value>172.16.10.149:50070</value>
121 |   </property>
122 |   <property>
123 |     <name>dfs.client.failover.proxy.provider.njdev-cdh5</name>
124 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
125 |   </property>
126 | </configuration>
127 | 


--------------------------------------------------------------------------------
/src/main/TestResources/saic_dev/core-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <property>
  6 |     <name>fs.defaultFS</name>
  7 |     <value>hdfs://nameservice1</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>fs.trash.interval</name>
 11 |     <value>1</value>
 12 |   </property>
 13 |   <property>
 14 |     <name>io.compression.codecs</name>
 15 |     <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
 16 |   </property>
 17 |   <property>
 18 |     <name>hadoop.security.authentication</name>
 19 |     <value>simple</value>
 20 |   </property>
 21 |   <property>
 22 |     <name>hadoop.security.authorization</name>
 23 |     <value>false</value>
 24 |   </property>
 25 |   <property>
 26 |     <name>hadoop.rpc.protection</name>
 27 |     <value>authentication</value>
 28 |   </property>
 29 |   <property>
 30 |     <name>hadoop.security.auth_to_local</name>
 31 |     <value>DEFAULT</value>
 32 |   </property>
 33 |   <property>
 34 |     <name>hadoop.proxyuser.oozie.hosts</name>
 35 |     <value>*</value>
 36 |   </property>
 37 |   <property>
 38 |     <name>hadoop.proxyuser.oozie.groups</name>
 39 |     <value>*</value>
 40 |   </property>
 41 |   <property>
 42 |     <name>hadoop.proxyuser.mapred.hosts</name>
 43 |     <value>*</value>
 44 |   </property>
 45 |   <property>
 46 |     <name>hadoop.proxyuser.mapred.groups</name>
 47 |     <value>*</value>
 48 |   </property>
 49 |   <property>
 50 |     <name>hadoop.proxyuser.flume.hosts</name>
 51 |     <value>*</value>
 52 |   </property>
 53 |   <property>
 54 |     <name>hadoop.proxyuser.flume.groups</name>
 55 |     <value>*</value>
 56 |   </property>
 57 |   <property>
 58 |     <name>hadoop.proxyuser.HTTP.hosts</name>
 59 |     <value>*</value>
 60 |   </property>
 61 |   <property>
 62 |     <name>hadoop.proxyuser.HTTP.groups</name>
 63 |     <value>*</value>
 64 |   </property>
 65 |   <property>
 66 |     <name>hadoop.proxyuser.hive.hosts</name>
 67 |     <value>*</value>
 68 |   </property>
 69 |   <property>
 70 |     <name>hadoop.proxyuser.hive.groups</name>
 71 |     <value>*</value>
 72 |   </property>
 73 |   <property>
 74 |     <name>hadoop.proxyuser.hue.hosts</name>
 75 |     <value>*</value>
 76 |   </property>
 77 |   <property>
 78 |     <name>hadoop.proxyuser.hue.groups</name>
 79 |     <value>*</value>
 80 |   </property>
 81 |   <property>
 82 |     <name>hadoop.proxyuser.httpfs.hosts</name>
 83 |     <value>*</value>
 84 |   </property>
 85 |   <property>
 86 |     <name>hadoop.proxyuser.httpfs.groups</name>
 87 |     <value>*</value>
 88 |   </property>
 89 |   <property>
 90 |     <name>hadoop.proxyuser.hdfs.groups</name>
 91 |     <value>*</value>
 92 |   </property>
 93 |   <property>
 94 |     <name>hadoop.proxyuser.hdfs.hosts</name>
 95 |     <value>*</value>
 96 |   </property>
 97 |   <property>
 98 |     <name>hadoop.proxyuser.yarn.hosts</name>
 99 |     <value>*</value>
100 |   </property>
101 |   <property>
102 |     <name>hadoop.proxyuser.yarn.groups</name>
103 |     <value>*</value>
104 |   </property>
105 |   <property>
106 |     <name>hadoop.security.group.mapping</name>
107 |     <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
108 |   </property>
109 |   <property>
110 |     <name>hadoop.security.instrumentation.requires.admin</name>
111 |     <value>false</value>
112 |   </property>
113 |   <property>
114 |     <name>net.topology.script.file.name</name>
115 |     <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
116 |   </property>
117 |   <property>
118 |     <name>io.file.buffer.size</name>
119 |     <value>65536</value>
120 |   </property>
121 |   <property>
122 |     <name>hadoop.ssl.enabled</name>
123 |     <value>false</value>
124 |   </property>
125 |   <property>
126 |     <name>hadoop.ssl.require.client.cert</name>
127 |     <value>false</value>
128 |     <final>true</final>
129 |   </property>
130 |   <property>
131 |     <name>hadoop.ssl.keystores.factory.class</name>
132 |     <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
133 |     <final>true</final>
134 |   </property>
135 |   <property>
136 |     <name>hadoop.ssl.server.conf</name>
137 |     <value>ssl-server.xml</value>
138 |     <final>true</final>
139 |   </property>
140 |   <property>
141 |     <name>hadoop.ssl.client.conf</name>
142 |     <value>ssl-client.xml</value>
143 |     <final>true</final>
144 |   </property>
145 | </configuration>
146 | 


--------------------------------------------------------------------------------
/src/main/TestResources/saic_dev/hdfs-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <property>
  6 |     <name>dfs.nameservices</name>
  7 |     <value>nameservice1</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>dfs.client.failover.proxy.provider.nameservice1</name>
 11 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
 12 |   </property>
 13 |   <property>
 14 |     <name>dfs.ha.automatic-failover.enabled.nameservice1</name>
 15 |     <value>true</value>
 16 |   </property>
 17 |   <property>
 18 |     <name>ha.zookeeper.quorum</name>
 19 |     <value>njdev-cdh5-dn01.nj:2181,njdev-cdh5-nn01.nj:2181,njdev-cdh5-nn02.nj:2181</value>
 20 |   </property>
 21 |   <property>
 22 |     <name>dfs.ha.namenodes.nameservice1</name>
 23 |     <value>namenode98,namenode62</value>
 24 |   </property>
 25 |   <property>
 26 |     <name>dfs.namenode.rpc-address.nameservice1.namenode98</name>
 27 |     <value>njdev-cdh5-nn01.nj:8020</value>
 28 |   </property>
 29 |   <property>
 30 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode98</name>
 31 |     <value>njdev-cdh5-nn01.nj:8022</value>
 32 |   </property>
 33 |   <property>
 34 |     <name>dfs.namenode.http-address.nameservice1.namenode98</name>
 35 |     <value>njdev-cdh5-nn01.nj:50070</value>
 36 |   </property>
 37 |   <property>
 38 |     <name>dfs.namenode.https-address.nameservice1.namenode98</name>
 39 |     <value>njdev-cdh5-nn01.nj:50470</value>
 40 |   </property>
 41 |   <property>
 42 |     <name>dfs.namenode.rpc-address.nameservice1.namenode62</name>
 43 |     <value>njdev-cdh5-nn02.nj:8020</value>
 44 |   </property>
 45 |   <property>
 46 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode62</name>
 47 |     <value>njdev-cdh5-nn02.nj:8022</value>
 48 |   </property>
 49 |   <property>
 50 |     <name>dfs.namenode.http-address.nameservice1.namenode62</name>
 51 |     <value>njdev-cdh5-nn02.nj:50070</value>
 52 |   </property>
 53 |   <property>
 54 |     <name>dfs.namenode.https-address.nameservice1.namenode62</name>
 55 |     <value>njdev-cdh5-nn02.nj:50470</value>
 56 |   </property>
 57 |   <property>
 58 |     <name>dfs.replication</name>
 59 |     <value>2</value>
 60 |   </property>
 61 |   <property>
 62 |     <name>dfs.blocksize</name>
 63 |     <value>134217728</value>
 64 |   </property>
 65 |   <property>
 66 |     <name>dfs.client.use.datanode.hostname</name>
 67 |     <value>false</value>
 68 |   </property>
 69 |   <property>
 70 |     <name>fs.permissions.umask-mode</name>
 71 |     <value>022</value>
 72 |   </property>
 73 |   <property>
 74 |     <name>dfs.namenode.acls.enabled</name>
 75 |     <value>false</value>
 76 |   </property>
 77 |   <property>
 78 |     <name>dfs.client.use.legacy.blockreader</name>
 79 |     <value>false</value>
 80 |   </property>
 81 |   <property>
 82 |     <name>dfs.client.read.shortcircuit</name>
 83 |     <value>true</value>
 84 |   </property>
 85 |   <property>
 86 |     <name>dfs.domain.socket.path</name>
 87 |     <value>/var/run/hdfs-sockets/dn</value>
 88 |   </property>
 89 |   <property>
 90 |     <name>dfs.client.read.shortcircuit.skip.checksum</name>
 91 |     <value>false</value>
 92 |   </property>
 93 |   <property>
 94 |     <name>dfs.client.domain.socket.data.traffic</name>
 95 |     <value>false</value>
 96 |   </property>
 97 |   <property>
 98 |     <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
 99 |     <value>true</value>
100 |   </property>
101 | </configuration>
102 | 


--------------------------------------------------------------------------------
/src/main/TestResources/weekend/core-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <property>
21 | <name>fs.defaultFS</name>
22 | <value>hdfs://ns1</value>
23 | </property>
24 | <property>      
25 | <name>ha.zookeeper.quorum</name>      
26 | <value>weekend110:2181,weekend111:2181,weekend112:2181</value>     
27 | </property>  
28 | <property>
29 | <name>io.file.buffer.size</name>
30 | <value>131072</value>
31 | </property>
32 | <!-- 指定hadoop临时目录,自行创建 -->
33 | <property>
34 | <name>hadoop.tmp.dir</name>
35 | <value>/app/hadoop-2.9.2/tmp</value>
36 | </property>
37 | </configuration>
38 | 


--------------------------------------------------------------------------------
/src/main/TestResources/weekend/hdfs-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 | <!--指定hdfs的nameservice为ns1，需要和core-site.xml中的保持一致 -->
21 |                 <property>
22 |                     <name>dfs.nameservices</name>
23 |                     <value>ns1</value>
24 |                 </property>
25 |                 <!-- ns1下面有两个NameNode，分别是nn1，nn2 -->
26 |                 <property>
27 |                     <name>dfs.ha.namenodes.ns1</name>
28 |                     <value>nn1,nn2</value>
29 |                 </property>
30 |                 <!-- nn1的RPC通信地址 -->
31 |                 <property>
32 |                     <name>dfs.namenode.rpc-address.ns1.nn1</name>
33 |                     <value>weekend110:9000</value>
34 |                 </property>
35 |                 <!-- nn1的http通信地址 -->
36 |                 <property>
37 |                     <name>dfs.namenode.http-address.ns1.nn1</name>
38 |                     <value>weekend110:50070</value>
39 |                 </property>
40 |                 <!-- nn2的RPC通信地址 -->
41 |                 <property>
42 |                     <name>dfs.namenode.rpc-address.ns1.nn2</name>
43 |                     <value>weekend111:9000</value>
44 |                 </property>
45 |                 <!-- nn2的http通信地址 -->
46 |                 <property>
47 |                     <name>dfs.namenode.http-address.ns1.nn2</name>
48 |                     <value>weekend111:50070</value>
49 |                 </property>
50 |                 <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
51 |                 <property>
52 |                     <name>dfs.namenode.shared.edits.dir</name>
53 |                     <value>qjournal://weekend110:8485;weekend111:8485;weekend112:8485/ns1</value>
54 |                 </property>
55 |                 <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
56 |                 <property>
57 |                     <name>dfs.journalnode.edits.dir</name>
58 |                     <value>/app/hadoop-2.9.2/hdfs/journal</value>
59 |                 </property>
60 |                 <property>  
61 |                     <name>dfs.namenode.name.dir</name>  
62 |                     <value>/app/hadoop-2.9.2/hdfs/name</value>  
63 |                 </property>  
64 |                 <property>  
65 |                     <name>dfs.datanode.data.dir</name>  
66 |                     <value>/app/hadoop-2.9.2/hdfs/data</value>  
67 |                 </property> 
68 |                 <!-- 开启NameNode失败自动切换 -->
69 |                 <property>
70 |                     <name>dfs.ha.automatic-failover.enabled</name>
71 |                     <value>true</value>
72 |                 </property>
73 |                 <!-- 配置失败自动切换实现方式 -->
74 |                 <property>
75 |                     <name>dfs.client.failover.proxy.provider.ns1</name>
76 |                     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
77 |                 </property>
78 |                 <!-- 配置隔离机制方法，多个机制用换行分割，即每个机制暂用一行-->
79 |                 <property>
80 |                     <name>dfs.ha.fencing.methods</name>
81 |                     <value>
82 |                     sshfence
83 |                     shell(/bin/true)
84 |                     </value>
85 |                 </property>
86 |                 <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
87 |                 <property>
88 |                     <name>dfs.ha.fencing.ssh.private-key-files</name>
89 |                     <value>/root/.ssh/id_rsa</value>
90 |                 </property>
91 |                 <!-- 配置sshfence隔离机制超时时间 -->
92 |                 <property>
93 |                     <name>dfs.ha.fencing.ssh.connect-timeout</name>
94 |                     <value>30000</value>
95 |                 </property>
96 |             </configuration>
97 | 


--------------------------------------------------------------------------------
/src/main/TestResources/weekend/mapred-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |   Licensed under the Apache License, Version 2.0 (the "License");
 5 |   you may not use this file except in compliance with the License.
 6 |   You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |   Unless required by applicable law or agreed to in writing, software
11 |   distributed under the License is distributed on an "AS IS" BASIS,
12 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |   See the License for the specific language governing permissions and
14 |   limitations under the License. See accompanying LICENSE file.
15 | -->
16 | 
17 | <!-- Put site-specific property overrides in this file. -->
18 | 
19 | <configuration>
20 |  <property>
21 |                     <name>mapreduce.framework.name</name>
22 |                     <value>yarn</value>
23 |                 </property>
24 |                 <!-- mr转化历史任务的rpc通信地址-->
25 |                 <property>  
26 |                     <name>mapreduce.jobhistory.address</name>  
27 |                     <value>weekend111:10020</value>  
28 |                 </property>
29 |                 <!-- mr转化历史任务的http通信地址-->
30 |                 <property>  
31 |                     <name>mapreduce.jobhistory.webapp.address</name>  
32 |                     <value>weekend111:19888</value>  
33 |                 </property>
34 |                 <!-- 会在hdfs的根目录下面创建一个history的文件夹，存放历史任务的相关运行情况-->
35 |                 <property>
36 |                     <name>yarn.app.mapreduce.am.staging-dir</name>
37 |                     <value>/history</value>
38 |                 </property>
39 |                 <!-- map和reduce的日志级别-->
40 |                 <property>
41 |                     <name>mapreduce.map.log.level</name>
42 |                     <value>INFO</value>
43 |                 </property>
44 |                 <property>
45 |                     <name>mapreduce.reduce.log.level</name>
46 |                     <value>INFO</value>
47 |                 </property>
48 | </configuration>
49 | 


--------------------------------------------------------------------------------
/src/main/TestResources/weekend/yarn-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <!--
 3 |   Licensed under the Apache License, Version 2.0 (the "License");
 4 |   you may not use this file except in compliance with the License.
 5 |   You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 |   Unless required by applicable law or agreed to in writing, software
10 |   distributed under the License is distributed on an "AS IS" BASIS,
11 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |   See the License for the specific language governing permissions and
13 |   limitations under the License. See accompanying LICENSE file.
14 | -->
15 | <configuration>
16 | <!-- 开启RM高可靠 -->
17 |                 <property>
18 |                     <name>yarn.resourcemanager.ha.enabled</name>
19 |                     <value>true</value>
20 |                 </property>
21 |                 <!-- 指定RM的cluster id -->
22 |                 <property>
23 |                     <name>yarn.resourcemanager.cluster-id</name>
24 |                     <value>yrc</value>
25 |                 </property>
26 |                 <!-- 指定RM的名字 -->
27 |                 <property>
28 |                     <name>yarn.resourcemanager.ha.rm-ids</name>
29 |                     <value>rm1,rm2</value>
30 |                 </property>
31 |                 <!-- 分别指定RM的地址 -->
32 |                 <property>
33 |                     <name>yarn.resourcemanager.hostname.rm1</name>
34 |                     <value>weekend111</value>
35 |                 </property>
36 |                 <property>
37 |                     <name>yarn.resourcemanager.hostname.rm2</name>
38 |                     <value>weekend112</value>
39 |                 </property>
40 | 		<property>
41 |                          <name>yarn.resourcemanager.webapp.address.rm1</name>
42 |                          <value>weekend111:8088</value>
43 |                 </property>
44 |                 <property>
45 |                          <name>yarn.resourcemanager.webapp.address.rm2</name>
46 |                          <value>weekend112:8088</value>
47 |                 </property>
48 |                 <!-- 指定zk集群地址 -->
49 |                 <property>
50 |                     <name>yarn.resourcemanager.zk-address</name>
51 |                     <value>weekend110:2181,weekend111:2181,weekend112:2181</value>
52 |                 </property>
53 |                 <property>
54 |                     <name>yarn.nodemanager.aux-services</name>
55 |                     <value>mapreduce_shuffle</value>
56 |                 </property>
57 | 		<property>
58 |                      <name>yarn.nodemanager.vmem-check-enabled</name>
59 |                      <value>false</value>
60 |                      <description>Whether virtual memory limits will be enforced for containers</description>
61 |                 </property>
62 |                 <property>
63 |                     <name>yarn.nodemanager.vmem-pmem-ratio</name>
64 |                     <value>4</value>
65 |                     <description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
66 |                 </property>
67 | </configuration>
68 | 


--------------------------------------------------------------------------------
/src/main/resources/core-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <property>
  6 |     <name>fs.defaultFS</name>
  7 |     <value>hdfs://nameservice1</value>
  8 |   </property>
  9 |   <property>
 10 |     <name>fs.trash.interval</name>
 11 |     <value>1</value>
 12 |   </property>
 13 |   <property>
 14 |     <name>io.compression.codecs</name>
 15 |     <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
 16 |   </property>
 17 |   <property>
 18 |     <name>hadoop.security.authentication</name>
 19 |     <value>simple</value>
 20 |   </property>
 21 |   <property>
 22 |     <name>hadoop.security.authorization</name>
 23 |     <value>false</value>
 24 |   </property>
 25 |   <property>
 26 |     <name>hadoop.rpc.protection</name>
 27 |     <value>authentication</value>
 28 |   </property>
 29 |   <property>
 30 |     <name>hadoop.security.auth_to_local</name>
 31 |     <value>DEFAULT</value>
 32 |   </property>
 33 |   <property>
 34 |     <name>hadoop.proxyuser.oozie.hosts</name>
 35 |     <value>*</value>
 36 |   </property>
 37 |   <property>
 38 |     <name>hadoop.proxyuser.oozie.groups</name>
 39 |     <value>*</value>
 40 |   </property>
 41 |   <property>
 42 |     <name>hadoop.proxyuser.flume.hosts</name>
 43 |     <value>*</value>
 44 |   </property>
 45 |   <property>
 46 |     <name>hadoop.proxyuser.flume.groups</name>
 47 |     <value>*</value>
 48 |   </property>
 49 |   <property>
 50 |     <name>hadoop.proxyuser.HTTP.hosts</name>
 51 |     <value>*</value>
 52 |   </property>
 53 |   <property>
 54 |     <name>hadoop.proxyuser.HTTP.groups</name>
 55 |     <value>*</value>
 56 |   </property>
 57 |   <property>
 58 |     <name>hadoop.proxyuser.hive.hosts</name>
 59 |     <value>*</value>
 60 |   </property>
 61 |   <property>
 62 |     <name>hadoop.proxyuser.hive.groups</name>
 63 |     <value>*</value>
 64 |   </property>
 65 |   <property>
 66 |     <name>hadoop.proxyuser.hue.hosts</name>
 67 |     <value>*</value>
 68 |   </property>
 69 |   <property>
 70 |     <name>hadoop.proxyuser.hue.groups</name>
 71 |     <value>*</value>
 72 |   </property>
 73 |   <property>
 74 |     <name>hadoop.proxyuser.httpfs.hosts</name>
 75 |     <value>*</value>
 76 |   </property>
 77 |   <property>
 78 |     <name>hadoop.proxyuser.httpfs.groups</name>
 79 |     <value>*</value>
 80 |   </property>
 81 |   <property>
 82 |     <name>hadoop.proxyuser.hdfs.groups</name>
 83 |     <value>*</value>
 84 |   </property>
 85 |   <property>
 86 |     <name>hadoop.proxyuser.hdfs.hosts</name>
 87 |     <value>*</value>
 88 |   </property>
 89 |   <property>
 90 |     <name>hadoop.proxyuser.yarn.hosts</name>
 91 |     <value>*</value>
 92 |   </property>
 93 |   <property>
 94 |     <name>hadoop.proxyuser.yarn.groups</name>
 95 |     <value>*</value>
 96 |   </property>
 97 |   <property>
 98 |     <name>hadoop.security.group.mapping</name>
 99 |     <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
100 |   </property>
101 |   <property>
102 |     <name>hadoop.security.instrumentation.requires.admin</name>
103 |     <value>false</value>
104 |   </property>
105 |   <property>
106 |     <name>net.topology.script.file.name</name>
107 |     <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
108 |   </property>
109 |   <property>
110 |     <name>io.file.buffer.size</name>
111 |     <value>65536</value>
112 |   </property>
113 |   <property>
114 |     <name>hadoop.ssl.enabled</name>
115 |     <value>false</value>
116 |   </property>
117 |   <property>
118 |     <name>hadoop.ssl.require.client.cert</name>
119 |     <value>false</value>
120 |     <final>true</final>
121 |   </property>
122 |   <property>
123 |     <name>hadoop.ssl.keystores.factory.class</name>
124 |     <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
125 |     <final>true</final>
126 |   </property>
127 |   <property>
128 |     <name>hadoop.ssl.server.conf</name>
129 |     <value>ssl-server.xml</value>
130 |     <final>true</final>
131 |   </property>
132 |   <property>
133 |     <name>hadoop.ssl.client.conf</name>
134 |     <value>ssl-client.xml</value>
135 |     <final>true</final>
136 |   </property>
137 |   <property>
138 |     <name>fs.protected.directories</name>
139 |     <value>/testdir</value>
140 |   </property>
141 |   <property>
142 |     <name>ipc.maximum.data.length</name>
143 |     <value>268435456</value>
144 |   </property>
145 | </configuration>
146 | 


--------------------------------------------------------------------------------
/src/main/resources/env.properties:
--------------------------------------------------------------------------------
 1 | env=dev
 2 | path=/root/script/jars/Pi.class
 3 | zookeeper.connect=${zookeeper.connect}
 4 | bootstrap.servers=${bootstrap.servers}
 5 | es.protocol=http
 6 | es.hosts=172.16.10.89:9200,172.16.10.75:9200,172.16.10.90:9200
 7 | es.username=elastic
 8 | es.password=Es#172.10
 9 | #es.hosts=172.16.11.104:9200,172.16.11.66:9200,172.16.11.67:9200
10 | # Authorization: Basic ZWxhc3RpYzpFcyMxNzIuMTA=


--------------------------------------------------------------------------------
/src/main/resources/hdfs-site.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <!--Autogenerated by Cloudera Manager-->
  4 | <configuration>
  5 |   <!--'dfs.nameservices', originally set to 'nameservice1' (non-final), is overridden below by a safety valve-->
  6 |   <property>
  7 |     <name>dfs.client.failover.proxy.provider.nameservice1</name>
  8 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  9 |   </property>
 10 |   <property>
 11 |     <name>dfs.ha.automatic-failover.enabled.nameservice1</name>
 12 |     <value>true</value>
 13 |   </property>
 14 |   <property>
 15 |     <name>ha.zookeeper.quorum</name>
 16 |     <value>njdev-nn01.nj:2181,njdev-nn02.nj:2181,njdev-nn03.nj:2181</value>
 17 |   </property>
 18 |   <property>
 19 |     <name>dfs.ha.namenodes.nameservice1</name>
 20 |     <value>namenode82,namenode89</value>
 21 |   </property>
 22 |   <property>
 23 |     <name>dfs.namenode.rpc-address.nameservice1.namenode82</name>
 24 |     <value>njdev-nn01.nj:8020</value>
 25 |   </property>
 26 |   <property>
 27 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode82</name>
 28 |     <value>njdev-nn01.nj:8022</value>
 29 |   </property>
 30 |   <property>
 31 |     <name>dfs.namenode.http-address.nameservice1.namenode82</name>
 32 |     <value>njdev-nn01.nj:9870</value>
 33 |   </property>
 34 |   <property>
 35 |     <name>dfs.namenode.https-address.nameservice1.namenode82</name>
 36 |     <value>njdev-nn01.nj:9871</value>
 37 |   </property>
 38 |   <property>
 39 |     <name>dfs.namenode.rpc-address.nameservice1.namenode89</name>
 40 |     <value>njdev-nn02.nj:8020</value>
 41 |   </property>
 42 |   <property>
 43 |     <name>dfs.namenode.servicerpc-address.nameservice1.namenode89</name>
 44 |     <value>njdev-nn02.nj:8022</value>
 45 |   </property>
 46 |   <property>
 47 |     <name>dfs.namenode.http-address.nameservice1.namenode89</name>
 48 |     <value>njdev-nn02.nj:9870</value>
 49 |   </property>
 50 |   <property>
 51 |     <name>dfs.namenode.https-address.nameservice1.namenode89</name>
 52 |     <value>njdev-nn02.nj:9871</value>
 53 |   </property>
 54 |   <property>
 55 |     <name>dfs.replication</name>
 56 |     <value>2</value>
 57 |   </property>
 58 |   <property>
 59 |     <name>dfs.blocksize</name>
 60 |     <value>134217728</value>
 61 |   </property>
 62 |   <property>
 63 |     <name>dfs.client.use.datanode.hostname</name>
 64 |     <value>false</value>
 65 |   </property>
 66 |   <property>
 67 |     <name>fs.permissions.umask-mode</name>
 68 |     <value>022</value>
 69 |   </property>
 70 |   <property>
 71 |     <name>dfs.client.block.write.locateFollowingBlock.retries</name>
 72 |     <value>7</value>
 73 |   </property>
 74 |   <property>
 75 |     <name>dfs.namenode.acls.enabled</name>
 76 |     <value>true</value>
 77 |   </property>
 78 |   <property>
 79 |     <name>dfs.client.read.shortcircuit</name>
 80 |     <value>true</value>
 81 |   </property>
 82 |   <property>
 83 |     <name>dfs.domain.socket.path</name>
 84 |     <value>/var/run/hdfs-sockets/dn</value>
 85 |   </property>
 86 |   <property>
 87 |     <name>dfs.client.read.shortcircuit.skip.checksum</name>
 88 |     <value>false</value>
 89 |   </property>
 90 |   <property>
 91 |     <name>dfs.client.domain.socket.data.traffic</name>
 92 |     <value>false</value>
 93 |   </property>
 94 |   <property>
 95 |     <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
 96 |     <value>true</value>
 97 |   </property>
 98 |   <property>
 99 |     <name>dfs.nameservices</name>
100 |     <value>nameservice1,njdev-cdh5</value>
101 |   </property>
102 |   <property>
103 |     <name>dfs.ha.namenodes.njdev-cdh5</name>
104 |     <value>nn1,nn2</value>
105 |   </property>
106 |   <property>
107 |     <name>dfs.namenode.rpc-address.njdev-cdh5.nn1</name>
108 |     <value>172.16.10.148:8020</value>
109 |   </property>
110 |   <property>
111 |     <name>dfs.namenode.rpc-address.njdev-cdh5.nn2</name>
112 |     <value>172.16.10.149:8020</value>
113 |   </property>
114 |   <property>
115 |     <name>dfs.namenode.http-address.njdev-cdh5.nn1</name>
116 |     <value>172.16.10.148:50070</value>
117 |   </property>
118 |   <property>
119 |     <name>dfs.namenode.http-address.njdev-cdh5.nn2</name>
120 |     <value>172.16.10.149:50070</value>
121 |   </property>
122 |   <property>
123 |     <name>dfs.client.failover.proxy.provider.njdev-cdh5</name>
124 |     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
125 |   </property>
126 | </configuration>
127 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | log4j.rootLogger=INFO, console
20 | 
21 | 
22 | 
23 | log4j.appender.console=org.apache.log4j.ConsoleAppender
24 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
25 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
26 | 
27 | log4j.logger.org.apache.flink=INFO


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataSet/JDBC/ReadFromJDBCInputFormat.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataSet.JDBC
 2 | 
 3 | import java.sql.Date
 4 | 
 5 | import org.apache.flink.api.common.typeinfo.TypeInformation
 6 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat
 7 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 8 | import org.apache.flink.api.scala.ExecutionEnvironment
 9 | import org.apache.flink.api.scala._
10 | import org.apache.flink.api.scala.typeutils.Types
11 | import org.apache.flink.types.Row
12 | 
13 | /**
14 |   * @Author yyb
15 |   * @Description
16 |   * @Date Create in 2020-04-26
17 |   * @Time 09:52
18 |   */
19 | object ReadFromJDBCInputFormat {
20 |   def main(args: Array[String]): Unit = {
21 |     val env = ExecutionEnvironment.getExecutionEnvironment
22 | 
23 |     val types =  Array[TypeInformation[_]](Types.INT, Types.STRING, Types.SQL_DATE)
24 |     val fields = Array[String]("id", "name", "time")
25 |     val typeInfo = new RowTypeInfo(types, fields)
26 | 
27 |     val jdbcInformat: JDBCInputFormat = JDBCInputFormat.buildJDBCInputFormat()
28 |       .setAutoCommit(false)
29 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
30 |       .setDrivername("com.mysql.jdbc.Driver")
31 |       .setUsername("root")
32 |       .setPassword("111111")
33 |       .setQuery("select * from t_order")
34 |       .setRowTypeInfo(typeInfo)
35 |       .finish()
36 | 
37 |     val AUX_TABLE: DataSet[Row] =  env.createInput(jdbcInformat)
38 | 
39 |     AUX_TABLE.print()
40 | 
41 |     val orderDataSet: DataSet[Order] =  AUX_TABLE.map(x => Order(x.getField(0).asInstanceOf[Int], x.getField(1).asInstanceOf[String], x.getField(2).asInstanceOf[Date]))
42 |     orderDataSet.print()
43 | 
44 | //    env.execute("ReadFromJDBCInputFormat")
45 | 
46 |   }
47 | 
48 |   case class Order(id:Int, name:String, time:Date)
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataSet/JDBC/WriteToMysqlByOutputformat.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataSet.JDBC
 2 | 
 3 | import com.yyb.flink10.DataStream.data.WordCountData
 4 | import com.yyb.flink10.DataStream.parquet.WriteParquetWordCount.WORDCOUNT
 5 | import org.apache.flink.api.java.io.jdbc.JDBCOutputFormat
 6 | import org.apache.flink.api.scala.ExecutionEnvironment
 7 | import org.apache.flink.api.scala._
 8 | import org.apache.flink.types.Row
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-04-26
14 |   * @Time 10:32
15 |   */
16 | object WriteToMysqlByOutputformat {
17 |   def main(args: Array[String]): Unit = {
18 |     val env = ExecutionEnvironment.getExecutionEnvironment
19 | 
20 |     val text = env.fromElements(WordCountData.WORDS: _*)
21 |     val counts: DataSet[WORDCOUNT] = text.flatMap(_.toLowerCase.split("\\W+"))
22 |       .filter(_.nonEmpty)
23 |       .map(WORDCOUNT(_, 1))
24 |       .groupBy(0)
25 |       .sum(1)
26 |     val countRecord: DataSet[Row] = counts.map(x =>  Row.of(x.word, x.count.asInstanceOf[Integer]))
27 | 
28 |     val mysqlOutput: JDBCOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
29 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
30 |       .setDrivername("com.mysql.jdbc.Driver")
31 |       .setUsername("root")
32 |       .setPassword("111111")
33 |       .setQuery("insert into  wordcount (word, count) values(?, ?)") //注意这里是 mysql 的插入语句
34 |       .setSqlTypes(Array(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)) //这里是每行数据的 类型
35 |       .finish()
36 | 
37 |     countRecord.output(mysqlOutput)
38 | 
39 | 
40 |     env.execute("WriteToMysqlByOutputformat")
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataSet/WordCount.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataSet
 2 | 
 3 | import org.apache.flink.api.scala.ExecutionEnvironment
 4 | import org.apache.flink.api.scala._
 5 | 
 6 | /**
 7 |   * @Author yyb
 8 |   * @Description
 9 |   * @Date Create in 2020-04-15
10 |   * @Time 17:00
11 |   */
12 | object WordCount {
13 |   def main(args: Array[String]): Unit = {
14 |     val env = ExecutionEnvironment.getExecutionEnvironment
15 | 
16 |     val filePtah = "/Users/yyb/Downloads/1.txt"
17 |     val filepathtosave = "/Users/yyb/Downloads/1_rs.csv"
18 |     val text =  env.readTextFile(filePtah)
19 |     val wordCounts = text.flatMap(_.toLowerCase.split("\\W+") filter { _.nonEmpty})
20 |       .map((_, 1))
21 |       .groupBy(0)
22 |       .sum(1)
23 |     wordCounts.setParallelism(1).print()
24 |     wordCounts.setParallelism(1).writeAsCsv(filepathtosave)
25 | 
26 | 
27 |     env.execute("WordCount")
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataSet/kafka/SendData2KafkaByKafkaOutputFormat.scala:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.DataSet.kafka
  2 | 
  3 | import java.io.InputStream
  4 | import java.util.Properties
  5 | 
  6 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat
  7 | import com.yyb.flink10.sink.KafkaBatchTableSink
  8 | import org.apache.flink.api.scala._
  9 | import org.apache.flink.formats.json.JsonRowSerializationSchema
 10 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord
 11 | import org.apache.flink.kafka011.shaded.org.apache.kafka.common.serialization.StringSerializer
 12 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer
 13 | import org.apache.flink.table.api.scala.BatchTableEnvironment
 14 | import org.apache.flink.table.api.{DataTypes, Table, TableSchema}
 15 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema}
 16 | 
 17 | 
 18 | /**
 19 |   * @Author yyb
 20 |   * @Description
 21 |   * @Date Create in 2020-07-28
 22 |   * @Time 16:12
 23 |   */
 24 | object SendData2KafkaByKafkaOutputFormat {
 25 |   def main(args: Array[String]): Unit = {
 26 |     val env = ExecutionEnvironment.getExecutionEnvironment
 27 | //    val blinkTableEnv = BatchTableEnvironment.create(env)
 28 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
 29 |     val prop: Properties = new Properties()
 30 |     prop.load(in_env)
 31 |     println(prop.getProperty("zookeeper.connect"))
 32 | 
 33 | 
 34 | 
 35 |     val kafka = new Kafka
 36 |     kafka.version("0.11")
 37 |       .topic("eventsource_yhj")
 38 |       .property("zookeeper.connect", prop.getProperty("zookeeper.connect"))
 39 |       .property("bootstrap.servers", prop.getProperty("bootstrap.servers")).
 40 |       property("group.id", "yyb_dev")
 41 |       .startFromLatest
 42 | 
 43 | 
 44 |     val schema = new Schema
 45 |     val tableSchema1 = TableSchema.builder
 46 |       .field("amount", DataTypes.INT)
 47 |       .field("currency", DataTypes.STRING).build
 48 |     schema.schema(tableSchema1)
 49 | 
 50 | //    val tableSource = blinkTableEnv.connect(kafka)
 51 | //      .withFormat(new Json().failOnMissingField(true))
 52 | //      .withSchema(schema)
 53 | //    tableSource.createTemporaryTable("Orders_tmp")
 54 | 
 55 |     val schemaString = new JsonRowSerializationSchema.Builder(tableSchema1.toRowType)
 56 |     val kafkaProp = new Properties();
 57 |     kafkaProp.put("key.serializer", classOf[StringSerializer])
 58 |     kafkaProp.put("value.serializer", classOf[StringSerializer])
 59 |     kafkaProp.put("zookeeper.connect", prop.getProperty("zookeeper.connect"))
 60 |     kafkaProp.put("bootstrap.servers", prop.getProperty("bootstrap.servers"))
 61 |     kafkaProp.put("topic", "eventsource_yhj")
 62 | 
 63 |     val kafkaProducer = new FlinkKafkaProducer[String, String](kafkaProp)
 64 |     val data = Array(Current(1, "Euro"))
 65 | 
 66 |     val dataDS = env.fromCollection(data)
 67 | 
 68 |     val datasOfRecord: DataSet[ProducerRecord[String, String]] =  dataDS.map(x => {
 69 |        val record: ProducerRecord[String, String] = new ProducerRecord[String, String]("eventsource_yhj", x.toString)
 70 |       record
 71 |     })
 72 | 
 73 | 
 74 | 
 75 | 
 76 |     /**
 77 |       * 这里的 发送数据 到 kafka是 先 collect 到 driver 才 发送的，所以不是 分布式的处理方法
 78 |       * 需要调优
 79 |       */
 80 | //    datasOfRecord.collect().foreach(kafkaProducer.send(_))
 81 | //    kafkaProducer.flush()
 82 | 
 83 |     /**
 84 |       * 这里使用的是 dataset 的 kafkaOutputFormat
 85 |       */
 86 |     val kafkaOutputFormat = new KafkaOutputFormat(kafkaProp);
 87 |     dataDS.map(x => x.toString).output(kafkaOutputFormat)
 88 | 
 89 | //    val dataTable: Table = blinkTableEnv.fromDataSet(dataDS.map(_.toString))
 90 | 
 91 | //    blinkTableEnv.registerTable("dataSource", dataTable)
 92 | 
 93 | //    val kafkaBatchTableSink = new KafkaBatchTableSink(kafkaOutputFormat);
 94 | //    blinkTableEnv.registerTableSink("kafkaBatchTableSink", kafkaBatchTableSink)
 95 | 
 96 |     var sql =
 97 |       """
 98 |         |insert into kafkaBatchTableSink select * from dataSource
 99 |         |""".stripMargin
100 |     //因为 kafka 是 无界的， 所以不能使用 batch 模式 的 kafkatablesink
101 |     //BatchTableSink or OutputFormatTableSink required to emit batch Table.
102 | //    blinkTableEnv.sqlUpdate(sql)
103 | 
104 | //    dataTable.insertInto("Orders_tmp")
105 | 
106 | 
107 |     env.execute("SendData2KafkaByKafkaConnector")
108 |   }
109 | 
110 |   case class Current(amount:Int, currency:String){
111 |     override def toString: String = {
112 |       s"""{"amount":"${amount}",currency:"${currency}"}""".stripMargin
113 |     }
114 | 
115 |     def toBytes(): Array[Byte] ={
116 |       toString.getBytes()
117 |     }
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/State/StateOfCountWindowAverage.java:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.DataStream.State;
  2 | 
  3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
  4 | import org.apache.flink.api.common.state.ValueState;
  5 | import org.apache.flink.api.common.state.ValueStateDescriptor;
  6 | import org.apache.flink.api.common.typeinfo.TypeHint;
  7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
  8 | import org.apache.flink.api.java.tuple.Tuple2;
  9 | import org.apache.flink.api.java.tuple.Tuple3;
 10 | import org.apache.flink.configuration.Configuration;
 11 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 14 | import org.apache.flink.util.Collector;
 15 | 
 16 | /**
 17 |  * @Author yyb
 18 |  * @Description
 19 |  * @Date Create in 2020-08-03
 20 |  * @Time 14:52
 21 |  */
 22 | public class StateOfCountWindowAverage {
 23 |     public static void main(String[] args) throws Exception {
 24 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 25 | 
 26 |         DataStreamSource<Tuple2<Long, Long>> soure = env.fromElements(
 27 |                 Tuple2.of(1L, 3L), Tuple2.of(1L, 5L), Tuple2.of(1L, 7L),
 28 |                 Tuple2.of(1L, 4L), Tuple2.of(1L, 2L), Tuple2.of(2L, 2L),
 29 |                 Tuple2.of(2L, 4L)
 30 | 
 31 |         );
 32 | 
 33 | //        soure.keyBy(0).flatMap(new CountWindowAverage())
 34 | //                .print();
 35 | 
 36 |         SingleOutputStreamOperator<Tuple3<Long, Long, Long>> rs1 = soure.keyBy(0).flatMap(new GroupSum());
 37 |         rs1.print();
 38 | 
 39 | 
 40 |         env.execute("StateOfCountWindowAverage");
 41 | 
 42 | 
 43 |     }
 44 | 
 45 |     /**
 46 |      * 为什么要用 flatMap ,flatMap 有 抽取一层数据的意思，这个 把 2 个元素 计算 输出了 1个元素
 47 |      */
 48 |     static class CountWindowAverage extends RichFlatMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>{
 49 |         private transient ValueState<Tuple2<Long, Long>> sum;
 50 | 
 51 |         @Override
 52 |         public void flatMap(Tuple2<Long, Long> value, Collector<Tuple2<Long, Long>> out) throws Exception {
 53 |             Tuple2<Long, Long> currentSum = sum.value();
 54 |             currentSum.f0 = currentSum.f0 + 1;
 55 |             currentSum.f1 = currentSum.f1 + value.f1;
 56 |             sum.update(currentSum);
 57 |             if(currentSum.f0 >=2){ //这里遇到 2 个 一组的元素 求均值后 ，输出 ，清空 sum；如果需要求这一组的 平均值，则不需要这里
 58 |                 out.collect(new Tuple2<>(value.f0, currentSum.f1 / currentSum.f0));
 59 |                 sum.clear();
 60 |             }
 61 |         }
 62 | 
 63 |         @Override
 64 |         public void open(Configuration parameters) throws Exception {
 65 |             //初始化 和 get sum ValueState
 66 |             ValueStateDescriptor<Tuple2<Long, Long>> descriptor =
 67 |                     new ValueStateDescriptor<>(
 68 |                             "average", // the state name
 69 |                             TypeInformation.of(new TypeHint<Tuple2<Long, Long>>() {}), // type information
 70 |                             Tuple2.of(0L, 0L)); // default value of the state, if nothing was set
 71 |             sum = getRuntimeContext().getState(descriptor);
 72 |         }
 73 |     }
 74 | 
 75 |     static class GroupSum extends RichFlatMapFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>>{
 76 |         //这里好输出的类型对应
 77 |         private transient ValueState<Tuple3<Long, Long, Long>> sum;
 78 | 
 79 |         //第一个字段 输入的类型
 80 |         //第二个字段 输出的类型
 81 |         @Override
 82 |         public void flatMap(Tuple2<Long, Long> value, Collector<Tuple3<Long, Long, Long>> out) throws Exception {
 83 |             Tuple3<Long, Long, Long> currentSum = sum.value();
 84 |             currentSum.f0 = value.f0;
 85 |             currentSum.f1 = currentSum.f0 + 1;
 86 |             currentSum.f2 = currentSum.f2 + value.f1;
 87 |             sum.update(currentSum);
 88 |             out.collect(Tuple3.of(currentSum.f0, currentSum.f1, currentSum.f2)); //注意这里每一条数据都会返回出去，所以这个不适合 RichFlatMapFunction 做 聚合的 操作的
 89 |         }
 90 | 
 91 |         @Override
 92 |         public void open(Configuration parameters) throws Exception {
 93 |             ValueStateDescriptor<Tuple3<Long, Long, Long>> descriptor = new ValueStateDescriptor<>(
 94 |                     "sum",
 95 |                     TypeInformation.of(new TypeHint<Tuple3<Long, Long, Long>>() { //和输出类型对应
 96 |                     }),
 97 |                     Tuple3.of(0L, 0L, 0L)
 98 |             );
 99 | 
100 |             sum = getRuntimeContext().getState(descriptor);
101 |         }
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/data/WordCountData.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.data;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-04-15
 7 |  * @Time 21:52
 8 |  */
 9 | public class WordCountData {
10 |     public static final String[] WORDS = new String[] {
11 |             "To be, or not to be,--that is the question:--",
12 |             "Whether 'tis nobler in the mind to suffer",
13 |             "The slings and arrows of outrageous fortune",
14 |             "Or to take arms against a sea of troubles,",
15 |             "And by opposing end them?--To die,--to sleep,--",
16 |             "No more; and by a sleep to say we end",
17 |             "The heartache, and the thousand natural shocks",
18 |             "That flesh is heir to,--'tis a consummation",
19 |             "Devoutly to be wish'd. To die,--to sleep;--",
20 |             "To sleep! perchance to dream:--ay, there's the rub;",
21 |             "For in that sleep of death what dreams may come,",
22 |             "When we have shuffled off this mortal coil,",
23 |             "Must give us pause: there's the respect",
24 |             "That makes calamity of so long life;",
25 |             "For who would bear the whips and scorns of time,",
26 |             "The oppressor's wrong, the proud man's contumely,",
27 |             "The pangs of despis'd love, the law's delay,",
28 |             "The insolence of office, and the spurns",
29 |             "That patient merit of the unworthy takes,",
30 |             "When he himself might his quietus make",
31 |             "With a bare bodkin? who would these fardels bear,",
32 |             "To grunt and sweat under a weary life,",
33 |             "But that the dread of something after death,--",
34 |             "The undiscover'd country, from whose bourn",
35 |             "No traveller returns,--puzzles the will,",
36 |             "And makes us rather bear those ills we have",
37 |             "Than fly to others that we know not of?",
38 |             "Thus conscience does make cowards of us all;",
39 |             "And thus the native hue of resolution",
40 |             "Is sicklied o'er with the pale cast of thought;",
41 |             "And enterprises of great pith and moment,",
42 |             "With this regard, their currents turn awry,",
43 |             "And lose the name of action.--Soft you now!",
44 |             "The fair Ophelia!--Nymph, in thy orisons",
45 |             "Be all my sins remember'd."
46 |     };
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/kafka/EventTimeDemo.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.kafka;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import com.yyb.flink10.commonEntity.Current1;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.streaming.api.CheckpointingMode;
 7 | import org.apache.flink.streaming.api.TimeCharacteristic;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.streaming.api.functions.ProcessFunction;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
14 | import org.apache.flink.table.api.EnvironmentSettings;
15 | import org.apache.flink.table.api.Table;
16 | import org.apache.flink.table.api.java.StreamTableEnvironment;
17 | import org.apache.flink.types.Row;
18 | import org.apache.flink.util.Collector;
19 | 
20 | import java.io.InputStream;
21 | import java.util.Properties;
22 | 
23 | /**
24 |  * @Author yyb
25 |  * @Description
26 |  * @Date Create in 2020-08-10
27 |  * @Time 18:04
28 |  */
29 | public class EventTimeDemo {
30 |     public static void main(String[] args) throws Exception {
31 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
34 | 
35 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
36 |         env.enableCheckpointing(3000);
37 |         env.getCheckpointConfig().setTolerableCheckpointFailureNumber(3);
38 |         env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
39 |         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
40 | 
41 |         env.getConfig().setAutoWatermarkInterval(1000);
42 | 
43 |         InputStream in_env  = ClassLoader.getSystemResourceAsStream("env.properties");
44 |         Properties prop = new Properties();
45 |         prop.load(in_env);
46 | 
47 | 
48 | 
49 |         Properties properties = new Properties();
50 |         properties.setProperty("bootstrap.servers", prop.getProperty("bootstrap.servers"));
51 |         properties.setProperty("zookeeper.connect", prop.getProperty("zookeeper.connect"));
52 |         properties.setProperty("group.id", "test");
53 | 
54 |         FlinkKafkaConsumer011<String> kafkaSource = new FlinkKafkaConsumer011<String>("eventsource_yyb", new SimpleStringSchema(), properties);
55 |         DataStream<String> stream = env.addSource(kafkaSource);
56 | 
57 | 
58 |         DataStream<Current1> currentDS = stream.process(new ProcessFunction<String, Current1>() {
59 |             @Override
60 |             public void processElement(String value, Context ctx, Collector<Current1> out) throws Exception {
61 |                 Current1 current1 = JSON.parseObject(value, Current1.class);
62 |                 out.collect(current1);
63 |             }
64 |         });
65 | 
66 |         currentDS.assignTimestampsAndWatermarks(new TimestampExtractor(Time.seconds(0)));
67 | 
68 |         currentDS.print().setParallelism(1);
69 | 
70 |         // sql rowtime
71 |         //注意 第一个 rowtime 是自己的 rowtime，user_action_time.rowtime才是 真正的 eventTime
72 |         Table t = blinkTableEnv.fromDataStream(currentDS, "rowtime,amount,currency,user_action_time.rowtime");
73 | 
74 |         DataStream<Row> tRow = blinkTableEnv.toAppendStream(t, Row.class);
75 |         tRow.print().setParallelism(1);
76 |         env.execute("EventTimeDemo");
77 | 
78 | 
79 |     }
80 | 
81 |     static class TimestampExtractor extends BoundedOutOfOrdernessTimestampExtractor<Current1> {
82 | 
83 |         public TimestampExtractor(Time maxOutOfOrderness){
84 |             super(maxOutOfOrderness);
85 |         }
86 |         @Override
87 |         public long extractTimestamp(Current1 element) {
88 |             return Long.parseLong(element.getRowtime());
89 |         }
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnector.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.kafka
 2 | 
 3 | import java.io.InputStream
 4 | import java.util.Properties
 5 | 
 6 | import org.apache.flink.api.scala._
 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011
 9 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema
10 | import org.apache.flink.table.api.scala.StreamTableEnvironment
11 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema}
12 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema}
13 | 
14 | 
15 | /**
16 |   * @Author yyb
17 |   * @Description
18 |   * @Date Create in 2020-07-28
19 |   * @Time 16:12
20 |   */
21 | object SendData2KafkaByKafkaConnector {
22 |   def main(args: Array[String]): Unit = {
23 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
24 | 
25 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
26 |     val prop: Properties = new Properties()
27 |     prop.load(in_env)
28 | 
29 |     val data = Array(Current(1, "Euro"))
30 | 
31 |     val dataDS: DataStream[Current] = env.fromCollection(data)
32 | 
33 | 
34 |     val kafkaSink = new FlinkKafkaProducer011[String](
35 |       prop.getProperty("bootstrap.servers"),            // broker list
36 |       "eventsource_yyb",                  // target topic
37 |       new SimpleStringSchema());   // serialization schema
38 |     kafkaSink.setWriteTimestampToKafka(true)
39 | 
40 |     dataDS.map(_.toString).addSink(kafkaSink)
41 | 
42 | 
43 |     env.execute("SendData2KafkaByKafkaConnector")
44 |   }
45 | 
46 |   case class Current(amount:Int, currency:String)
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnectorBrowse.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.kafka
 2 | 
 3 | import java.io.InputStream
 4 | import java.util.Properties
 5 | 
 6 | import com.alibaba.fastjson.JSON
 7 | import com.yyb.flink10.commonEntity.UserBrowseLog
 8 | import org.apache.flink.api.scala._
 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011
11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema
12 | 
13 | 
14 | /**
15 |   * @Author yyb
16 |   * @Description
17 |   * @Date Create in 2020-07-28
18 |   * @Time 16:12
19 |   */
20 | object SendData2KafkaByKafkaConnectorBrowse {
21 |   def main(args: Array[String]): Unit = {
22 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
23 | 
24 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
25 |     val prop: Properties = new Properties()
26 |     prop.load(in_env)
27 | 
28 |     val data = Array(
29 |       new UserBrowseLog("user_1", "2016-01-01 00:00:00", "browse", "product_5", 20, 0L),
30 |       new UserBrowseLog("user_1", "2016-01-01 00:00:01", "browse", "product_5", 20, 0L),
31 |       new UserBrowseLog("user_1", "2016-01-01 00:00:02", "browse", "product_5", 20, 0L),
32 |       new UserBrowseLog("user_1", "2016-01-01 00:00:03", "browse", "product_5", 20, 0L),
33 |       new UserBrowseLog("user_1", "2016-01-01 00:00:04", "browse", "product_5", 20, 0L),
34 |       new UserBrowseLog("user_1", "2016-01-01 00:00:05", "browse", "product_5", 20, 0L),
35 |       new UserBrowseLog("user_1", "2016-01-01 00:00:06", "browse", "product_5", 20, 0L),
36 |       new UserBrowseLog("user_2", "2016-01-01 00:00:01", "browse", "product_3", 20, 0L),
37 |       new UserBrowseLog("user_2", "2016-01-01 00:00:02", "browse", "product_3", 20, 0L),
38 |       new UserBrowseLog("user_2", "2016-01-01 00:00:05", "browse", "product_3", 20, 0L),
39 |       new UserBrowseLog("user_2", "2016-01-01 00:00:06", "browse", "product_3", 20, 0L)
40 |     )
41 | 
42 |     val dataDS: DataStream[UserBrowseLog] = env.fromCollection(data)
43 | 
44 | 
45 |     val kafkaSink = new FlinkKafkaProducer011[String](
46 |       prop.getProperty("bootstrap.servers"),            // broker list
47 |       "eventsource_yyb_browse",                  // target topic
48 |       new SimpleStringSchema());   // serialization schema
49 |     kafkaSink.setWriteTimestampToKafka(true)
50 | 
51 |     dataDS.map(JSON.toJSON(_).toString).addSink(kafkaSink)
52 | 
53 | 
54 |     env.execute("SendData2KafkaByKafkaConnectorBrowse")
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnectorProduct.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.kafka
 2 | 
 3 | import java.io.InputStream
 4 | import java.util.Properties
 5 | 
 6 | import com.alibaba.fastjson.JSON
 7 | import com.yyb.flink10.commonEntity.{ProductInfo, UserBrowseLog}
 8 | import org.apache.flink.api.scala._
 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011
11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema
12 | 
13 | 
14 | /**
15 |   * @Author yyb
16 |   * @Description
17 |   * @Date Create in 2020-07-28
18 |   * @Time 16:12
19 |   */
20 | object SendData2KafkaByKafkaConnectorProduct {
21 |   def main(args: Array[String]): Unit = {
22 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
23 | 
24 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
25 |     val prop: Properties = new Properties()
26 |     prop.load(in_env)
27 | 
28 |     val data = Array(
29 |       new ProductInfo("product_5", "name50", "category50", "2016-01-01 00:00:00", 0L),
30 |       new ProductInfo("product_5", "name52", "category52", "2016-01-01 00:00:02", 0L),
31 |       new ProductInfo("product_5", "name55", "category55", "2016-01-01 00:00:05", 0L),
32 |       new ProductInfo("product_3", "name32", "category32", "2016-01-01 00:00:02", 0L),
33 |       new ProductInfo("product_3", "name35", "category35", "2016-01-01 00:00:05", 0L)
34 |     )
35 | 
36 |     val dataDS: DataStream[ProductInfo] = env.fromCollection(data)
37 | 
38 | 
39 |     val kafkaSink = new FlinkKafkaProducer011[String](
40 |       prop.getProperty("bootstrap.servers"),            // broker list
41 |       "eventsource_yyb_product",                  // target topic
42 |       new SimpleStringSchema());   // serialization schema
43 |     kafkaSink.setWriteTimestampToKafka(true)
44 | 
45 |     dataDS.map(JSON.toJSON(_).toString).addSink(kafkaSink)
46 | 
47 | 
48 |     env.execute("SendData2KafkaByKafkaConnectorBrowse")
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/parquet/WriteParquetWordCount.scala:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.DataStream.parquet
  2 | 
  3 | import java.util.concurrent.TimeUnit
  4 | 
  5 | import com.yyb.flink10.DataStream.data.WordCountData
  6 | import org.apache.flink.api.common.serialization.SimpleStringEncoder
  7 | import org.apache.flink.api.java.utils.ParameterTool
  8 | import org.apache.flink.core.fs.Path
  9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters
 10 | import org.apache.flink.runtime.state.StateBackend
 11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
 12 | import org.apache.flink.streaming.api.CheckpointingMode
 13 | import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
 14 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner
 15 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy
 16 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
 17 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
 18 | import org.apache.flink.streaming.api.scala._
 19 | 
 20 | /**
 21 |   * @Author yyb
 22 |   * @Description
 23 |   * @Date Create in 2020-04-16
 24 |   * @Time 09:53
 25 |   */
 26 | object WriteParquetWordCount {
 27 |   def main(args: Array[String]): Unit = {
 28 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 29 | 
 30 |     val params = ParameterTool.fromArgs(args)
 31 | 
 32 |     env.getConfig.setGlobalJobParameters(params)
 33 | 
 34 | 
 35 | 
 36 | //    env.enableCheckpointing(1000)
 37 | //    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
 38 | //    env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION)
 39 | //    // checkpoint错误次数，是否任务会失败
 40 | //    env.getCheckpointConfig.setTolerableCheckpointFailureNumber(3)
 41 | //    env.setStateBackend(new FsStateBackend("/tmp/xxx").asInstanceOf[StateBackend])
 42 | 
 43 | 
 44 | 
 45 | 
 46 |     val config = OutputFileConfig
 47 |       .builder()
 48 |       .withPartPrefix("wordcount")
 49 |       .withPartSuffix(".exe")
 50 |       .build()
 51 | 
 52 |     val text =
 53 |       if(params.has("--input")){
 54 |         env.readTextFile(params.get("--input"))
 55 |       }else{
 56 |         println("Executing WordCount example with default inputs data set.")
 57 |         println("Use --input to specify file input.")
 58 |         // get default test text data
 59 |         env.fromElements(WordCountData.WORDS: _*)
 60 |       }
 61 | 
 62 |     val counts: DataStream[WORDCOUNT] = text.flatMap(_.toLowerCase.split("\\W+"))
 63 |       .filter(_.nonEmpty)
 64 |       .map(WORDCOUNT(_, 1))
 65 |       .keyBy(0)
 66 |       .sum(1)
 67 | 
 68 |     if(params.has("--output")){
 69 |       counts.writeAsText(params.get("--output"))
 70 |       //注意这里的 范型 要和 counts 的范型 一致
 71 |       val filesink: StreamingFileSink[WORDCOUNT] = StreamingFileSink
 72 |         .forBulkFormat(new Path(params.get("--output")), ParquetAvroWriters.forReflectRecord(classOf[WORDCOUNT]))
 73 |         .withBucketAssigner(new DateTimeBucketAssigner())
 74 | //        .withRollingPolicy(OnCheckpointRollingPolicy.build())
 75 | //        .withOutputFileConfig(config) // 设置输出文件的 前后缀
 76 |         .build()
 77 | 
 78 |       val sink: StreamingFileSink[WORDCOUNT] = StreamingFileSink
 79 | 
 80 |         .forRowFormat(new Path(""), new SimpleStringEncoder[WORDCOUNT]("UTF-8"))
 81 | //        .withBucketAssigner(new DateTimeBucketAssigner())
 82 | //        .withRollingPolicy(OnCheckpointRollingPolicy.build())
 83 | 
 84 | //        .withOutputFileConfig(config)
 85 |         .build()
 86 | 
 87 | 
 88 | 
 89 |       counts.addSink(filesink)
 90 | 
 91 |     }else{
 92 |       println("Printing result to stdout. Use --output to specify output path.")
 93 |       counts.print()
 94 |     }
 95 | 
 96 |     env.execute("StreamWordCount")
 97 |   }
 98 | 
 99 |   case class WORDCOUNT(word:String, count:Int)
100 | }
101 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/JDBC/ReadFromInputFormat.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation
 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat
 5 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 6 | import org.apache.flink.api.scala.typeutils.Types
 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 8 | import org.apache.flink.streaming.api.scala._
 9 | import org.apache.flink.types.Row
10 | 
11 | /**
12 |   * @Author yyb
13 |   * @Description
14 |   * @Date Create in 2020-04-29
15 |   * @Time 16:25
16 |   */
17 | object ReadFromInputFormat {
18 |   def main(args: Array[String]): Unit = {
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 | 
21 |     val types =  Array[TypeInformation[_]](Types.INT, Types.STRING, Types.SQL_DATE)
22 |     val fields = Array[String]("id", "name", "time")
23 |     val typeInfo = new RowTypeInfo(types, fields)
24 |     val jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat()
25 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
26 |       .setDrivername("com.mysql.jdbc.Driver")
27 |       .setUsername("root")
28 |       .setPassword("111111")
29 |       .setQuery("select * from t_order")
30 |       .setRowTypeInfo(typeInfo)
31 |       .finish()
32 | 
33 |     val t_order: DataStream[Row] =  env.createInput(jdbcInputFormat)
34 |     t_order.print()
35 | 
36 |     env.execute("ReadFromInputFormat")
37 | 
38 | //    t_order.addSink() //flink-jdbc 的 sinkFunction 都是 非 public的，不可用的，里面的 sinkFunction 是在 tableSource 中使用的
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/JDBC/WriteToMysqlByJDBCOutputformat.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.JDBC
 2 | 
 3 | import com.yyb.flink10.DataStream.data.WordCountData
 4 | import org.apache.flink.api.java.io.jdbc.{JDBCAppendTableSink, JDBCOutputFormat, JDBCSinkFunction}
 5 | import org.apache.flink.api.scala.DataSet
 6 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 7 | import org.apache.flink.types.Row
 8 | 
 9 | /**
10 |   * @Author yyb
11 |   * @Description 注意在 DataStream 的模式下 会出现 多条聚合数据
12 |   * @Date Create in 2020-04-26
13 |   * @Time 13:50
14 |   */
15 | object WriteToMysqlByJDBCOutputformat {
16 |   def main(args: Array[String]): Unit = {
17 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
18 | 
19 |     val text: DataStream[String] = env.fromElements(WordCountData.WORDS: _*)
20 |     val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase.split("\\W+"))
21 |       .filter(_.nonEmpty)
22 |       .map((_, 1))
23 |       .keyBy(0)
24 |       .sum(1)
25 | 
26 |     val mysqlOutput: JDBCOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
27 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
28 |       .setDrivername("com.mysql.jdbc.Driver")
29 |       .setUsername("root")
30 |       .setPassword("111111")
31 |       .setQuery("insert into  wordcount (word, count) values(?, ?)") //注意这里是 mysql 的插入语句
32 |       .setSqlTypes(Array(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)) //这里是每行数据的 类型
33 |       .finish()
34 | 
35 | //    val jdbcSink = new JDBCSinkFunction(mysqlOutput) //注意这个 类不能这样实用化，因为它不是 public class
36 | 
37 | 
38 |     val countRecord: DataStream[Row] = counts.map(x =>  Row.of(x._1, x._2.asInstanceOf[Integer]))
39 | 
40 | 
41 |     countRecord.writeUsingOutputFormat(mysqlOutput)
42 | 
43 | 
44 |     env.execute("WriteToMysqlByJDBCOutputformat")
45 | 
46 |   }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfParquet.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink
 2 | 
 3 | import com.yyb.flink10.sink.ParquetWriterSink
 4 | import org.apache.avro.reflect.ReflectData
 5 | import org.apache.flink.api.common.serialization.SimpleStringEncoder
 6 | import org.apache.flink.api.java.utils.ParameterTool
 7 | import org.apache.flink.core.fs.Path
 8 | import org.apache.flink.formats.parquet.ParquetWriterFactory
 9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters
10 | import org.apache.flink.streaming.api.CheckpointingMode
11 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner
14 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy
15 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
16 | import org.apache.flink.streaming.api.scala._
17 | import org.apache.parquet.hadoop.metadata.CompressionCodecName
18 | 
19 | /**
20 |   * @Author yyb
21 |   * @Description
22 |   * @Date Create in 2020-04-18
23 |   * @Time 17:40
24 |   */
25 | object WordCountFileSourceStreamFileSinkOfParquet {
26 |   def main(args: Array[String]): Unit = {
27 |     System.setProperty("HADOOP_USER_NAME", "yyb")
28 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
29 |     val params = ParameterTool.fromArgs(args)
30 | 
31 |     env.getConfig.setGlobalJobParameters(params)
32 | 
33 |     env.enableCheckpointing(20)
34 |     env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
35 | 
36 |     val fileOutputCofig = OutputFileConfig
37 |       .builder()
38 |       .withPartSuffix(".parquet")
39 |       .build()
40 | 
41 |     val fileSourcePath = "./data/data.txt"
42 |     val fileSinkPath = "./xxx.text/rs2"
43 |     val fileSinkPath1 = "./xxx.text/rs3"
44 |     val fileSinkPath2 = "file:///Users/yyb/ScalaSource/flink10_learn/xxx.text/rs4"
45 | 
46 |     val wc = env.readTextFile(fileSourcePath)
47 |       .flatMap(_.split("\\W+"))
48 |       .filter(_.nonEmpty)
49 |       .map(WC(_, 1))
50 |       .keyBy(0)
51 |       .sum(1)
52 | 
53 |       val simleSink = StreamingFileSink.forRowFormat(new Path(fileSinkPath), new SimpleStringEncoder[WC]())
54 |       .build()
55 | 
56 |     // 注意在使用这种方式的 parquet sink 与 一次运行的 dataStream 配合的时候，一般会出现 parquet文件没有写完整的问题
57 |     val parquetSink: StreamingFileSink[WC] = StreamingFileSink.forBulkFormat(new Path(fileSinkPath),
58 |       ParquetAvroWriters.forReflectRecord(classOf[WC]))
59 | //      .withNewBucketAssigner(new BasePathBucketAssigner())
60 | //      .withOutputFileConfig(fileOutputCofig)
61 | //      .withBucketCheckInterval(10)
62 |       .withRollingPolicy(OnCheckpointRollingPolicy.build())
63 |       .build()
64 | 
65 |     wc.print()
66 | 
67 | //    wc.addSink(parquetSink).setParallelism(1)
68 | 
69 |     val parquetSinkmy = new ParquetWriterSink[WC](fileSinkPath2,
70 |       ReflectData.get.getSchema(classOf[WC]).toString,
71 |       CompressionCodecName.UNCOMPRESSED)
72 | 
73 |     wc.addSink(parquetSinkmy)
74 | 
75 | //    wc.addSink(txtSink).setParallelism(1)
76 | 
77 | 
78 |     env.execute("WordCountFileSourceStreamFileSinkOfParquet")
79 |   }
80 | 
81 |   case class WC(word:String, ct:Int)
82 | }
83 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfParquetToHDFS.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink
 2 | 
 3 | import org.apache.flink.api.common.serialization.SimpleStringEncoder
 4 | import org.apache.flink.api.java.utils.ParameterTool
 5 | import org.apache.flink.core.fs.Path
 6 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters
 7 | import org.apache.flink.streaming.api.CheckpointingMode
 8 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy
 9 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
10 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
11 | 
12 | /**
13 |   * @Author yyb
14 |   * @Description
15 |   * @Date Create in 2020-04-18
16 |   * @Time 17:40
17 |   */
18 | object WordCountFileSourceStreamFileSinkOfParquetToHDFS {
19 |   def main(args: Array[String]): Unit = {
20 |     System.setProperty("HADOOP_USER_NAME", "root")
21 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
22 |     val params = ParameterTool.fromArgs(args)
23 | 
24 |     env.getConfig.setGlobalJobParameters(params)
25 | 
26 |     env.enableCheckpointing(20)
27 |     env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
28 | 
29 |     val fileOutputCofig = OutputFileConfig
30 |       .builder()
31 |       .withPartSuffix(".parquet")
32 |       .build()
33 | 
34 |     val fileSourcePath = "/Users/yyb/Downloads/1.txt"
35 |     val fileSinkPath = "hdfs://ns1/user/yyb/parquet"
36 | 
37 |     val wc = env.readTextFile(fileSourcePath)
38 |       .flatMap(_.split("\\W+"))
39 |       .filter(_.nonEmpty)
40 |       .map(WC(_, 1))
41 |       .keyBy(0)
42 |       .sum(1)
43 | 
44 |       val simleSink = StreamingFileSink.forRowFormat(new Path(fileSinkPath), new SimpleStringEncoder[WC]())
45 |       .build()
46 | 
47 |     val parquetSink = StreamingFileSink.forBulkFormat(new Path(fileSinkPath),
48 |       ParquetAvroWriters.forReflectRecord(classOf[WC]))
49 | //      .withNewBucketAssigner(new BasePathBucketAssigner())
50 | //      .withOutputFileConfig(fileOutputCofig)
51 | //      .withBucketCheckInterval(10)
52 |       .withRollingPolicy(OnCheckpointRollingPolicy.build())
53 |       .build()
54 | 
55 | //    wc.print()
56 | 
57 |     wc.addSink(parquetSink).setParallelism(1)
58 | 
59 | 
60 |     env.execute("WordCountFileSourceStreamFileSinkOfParquet")
61 |   }
62 | 
63 |   case class WC(word:String, ct:Int)
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfSequence.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink
 2 | 
 3 | import com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink.WordCountFileSourceStreamFileSinkOfParquet.WC
 4 | import org.apache.flink.api.java.tuple
 5 | import org.apache.hadoop.conf.Configuration
 6 | import org.apache.flink.api.java.utils.ParameterTool
 7 | import org.apache.flink.configuration.GlobalConfiguration
 8 | import org.apache.flink.core.fs.Path
 9 | import org.apache.flink.formats.sequencefile.SequenceFileWriterFactory
10 | import org.apache.flink.runtime.util.HadoopUtils
11 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
12 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
13 | import org.apache.flink.streaming.api.scala._
14 | import org.apache.hadoop.io.{LongWritable, Text}
15 | 
16 | /**
17 |   * @Author yyb
18 |   * @Description
19 |   * @Date Create in 2020-04-18
20 |   * @Time 18:25
21 |   */
22 | object WordCountFileSourceStreamFileSinkOfSequence {
23 |   def main(args: Array[String]): Unit = {
24 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
25 | 
26 |     env.generateSequence(1, 100)
27 | 
28 |     val params = ParameterTool.fromArgs(args)
29 | 
30 |     env.getConfig.setGlobalJobParameters(params)
31 | 
32 |     val fileOutputCofig = OutputFileConfig
33 |       .builder()
34 |       .withPartSuffix(".sequence")
35 |       .build()
36 | 
37 |     val fileSinkPath = "./xxx.text/rs3"
38 | 
39 |     val wc: DataStream[tuple.Tuple2[LongWritable, Text]] = env.generateSequence(1, 100).map(x => new tuple.Tuple2(new LongWritable(x), new Text(x.toString)))
40 | 
41 | 
42 | 
43 | 
44 |     /**
45 |       * 这里的 LongWritable 和 Text 都是 org.apache.hadoop.io 下的包，
46 |       * 是在 hadoop-common 依赖中的，你可以直接 依赖这个包，
47 |       * 也可以 依赖 hadoop-client 这个包， hadoop-client 这个包里面 有 hadoop-common 这个依赖。
48 |       */
49 |     val hadoopConf: Configuration = HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration())
50 |     val parquetSink: StreamingFileSink[tuple.Tuple2[LongWritable, Text]] = StreamingFileSink.forBulkFormat(new Path(fileSinkPath),
51 |       new SequenceFileWriterFactory(hadoopConf, classOf[LongWritable], classOf[Text]))
52 |       //      .withNewBucketAssigner(new BasePathBucketAssigner())
53 |       .withOutputFileConfig(fileOutputCofig)
54 |       .build()
55 | 
56 |     wc.addSink(parquetSink).setParallelism(1)
57 | 
58 |     env.execute("WordCountFileSourceStreamFileSinkOfParquet")
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountElementsSourceStreamFileSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink
 2 | 
 3 | import com.yyb.flink10.DataStream.data.WordCountData
 4 | import org.apache.flink.api.common.serialization.SimpleStringEncoder
 5 | import org.apache.flink.api.java.utils.ParameterTool
 6 | import org.apache.flink.core.fs.Path
 7 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner
 8 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
10 | import org.apache.flink.streaming.api.scala._
11 | 
12 | /**
13 |   * @Author yyb
14 |   * @Description
15 |   * @Date Create in 2020-04-15
16 |   * @Time 21:45
17 |   */
18 | object WordCountElementsSourceStreamFileSink {
19 |   def main(args: Array[String]): Unit = {
20 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
21 |     val params = ParameterTool.fromArgs(args)
22 | 
23 |     env.getConfig.setGlobalJobParameters(params)
24 | 
25 |     val config = OutputFileConfig
26 |       .builder()
27 |       .withPartPrefix("wordcount")
28 |       .withPartSuffix("exe")
29 |       .build()
30 | 
31 |     val text: DataStream[String] =
32 |       if(params.has("--input")){
33 |         env.readTextFile(params.get("--input"))
34 |       }else{
35 |         println("Executing WordCount example with default inputs data set.")
36 |         println("Use --input to specify file input.")
37 |         // get default test text data
38 |         env.fromElements(WordCountData.WORDS: _*)
39 |       }
40 | 
41 |     val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase.split("\\W+"))
42 |       .filter(_.nonEmpty)
43 |       .map((_, 1))
44 |       .keyBy(0)
45 |       .sum(1)
46 | 
47 |     if(params.has("--output")){
48 |       counts.writeAsText(params.get("--output")) //方法已经被 压制，推荐使用下面的 sink 方法
49 |       //注意这里的 范型 要和 counts 的范型 一致
50 |       /**
51 |         * 注意这里有一个 scala 的 多个 .with。。。 build的bug，可以使用 java 版本编写，或者提高 flink的版本
52 |         */
53 |       val filesink: StreamingFileSink[(String, Int)] = StreamingFileSink
54 |         .forRowFormat(new Path(params.get("--output")), new SimpleStringEncoder[(String, Int)]("UTF-8"))
55 | //        .withRollingPolicy(
56 | //          DefaultRollingPolicy.builder()
57 | //            .withRolloverInterval(TimeUnit.MINUTES.toMillis(15))
58 | //            .withInactivityInterval(TimeUnit.MINUTES.toMillis(5))
59 | //            .withMaxPartSize(1024 * 1024 * 1024)
60 | //            .build())
61 | //         .withBucketAssigner(new DateTimeBucketAssigner()) //这种方式，即以时间格式 yyyy-MM-dd--HH，可以自己修改 以时间格式 和 时区
62 | 
63 |         .withBucketAssigner(new BasePathBucketAssigner[(String, Int)]) //这种方式就是 不会指定 子文件的命名方式。
64 | //        .withOutputFileConfig(config) // 设置输出文件的 前后缀
65 |         .build()
66 | 
67 |       counts.addSink(filesink)
68 | 
69 |     }else{
70 |       println("Printing result to stdout. Use --output to specify output path.")
71 |       counts.print()
72 |     }
73 | 
74 |     env.execute("StreamWordCount")
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountElementsSourceStreamFileSinkJava.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink;
 2 | 
 3 | import com.yyb.flink10.DataStream.data.WordCountData;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.serialization.SimpleStringEncoder;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.api.java.utils.ParameterTool;
 8 | import org.apache.flink.core.fs.Path;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
14 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner;
15 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy;
16 | import org.apache.flink.util.Collector;
17 | 
18 | import java.util.concurrent.TimeUnit;
19 | 
20 | /**
21 |  * @Author yyb
22 |  * @Description
23 |  * @Date Create in 2020-04-17
24 |  * @Time 17:20
25 |  */
26 | public class WordCountElementsSourceStreamFileSinkJava {
27 |     public static void main(String[] args) throws Exception {
28 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
29 |         ParameterTool params = ParameterTool.fromArgs(args);
30 |         env.getConfig().setGlobalJobParameters(params);
31 | 
32 |         OutputFileConfig config = OutputFileConfig
33 |                 .builder()
34 |                 .withPartPrefix("wordcount")
35 |                 .withPartSuffix("exe")
36 |                 .build();
37 | 
38 |         DataStream<String> text = null;
39 |         if(params.has("--input")){
40 |             text = env.readTextFile(params.get("--input"));
41 |         }else{
42 |             System.out.println("Executing WordCount example with default inputs data set.");
43 |             System.out.println("Use --input to specify file input.");
44 |             // get default test text data
45 |             text = env.fromElements(WordCountData.WORDS);
46 |         }
47 | 
48 |         DataStream<Tuple2<String, Integer>> counts = text.flatMap(new myFlatMap());
49 |         SingleOutputStreamOperator<Tuple2<String, Integer>> rs = counts.keyBy(0).sum(1);
50 | 
51 |         StreamingFileSink streamingFileSink = StreamingFileSink.forRowFormat(new Path("./xxx.text/rs4"),
52 |                 new SimpleStringEncoder<Tuple2<String, Integer>>("utf-8"))
53 |                 .withRollingPolicy(DefaultRollingPolicy.builder()
54 |                         .withRolloverInterval(TimeUnit.MINUTES.toMillis(15))
55 |                         .withInactivityInterval(TimeUnit.MINUTES.toMillis(5))
56 |                         .withMaxPartSize(1024 * 1024 * 1024)
57 |                         .build())
58 |                 .withBucketAssigner(new BasePathBucketAssigner<Tuple2<String, Integer>>() )
59 |                 .withOutputFileConfig(config)
60 |                 .build();
61 | 
62 |         rs.addSink(streamingFileSink);
63 | 
64 |         rs.setParallelism(1).print();
65 | 
66 |         env.execute("xxx");
67 | 
68 |     }
69 |     public static final class myFlatMap implements FlatMapFunction<String, Tuple2<String, Integer>>{
70 | 
71 |         @Override
72 |         public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
73 |             String[] tokens = s.toLowerCase().split("\\W+");
74 |             for(String token : tokens){
75 |                 if(token.length() >0 ){
76 |                     collector.collect(new Tuple2<>(token, 1));
77 |                 }
78 |             }
79 |         }
80 |     }
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountFileSourceStreamFileSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink
 2 | 
 3 | import org.apache.flink.api.common.serialization.SimpleStringEncoder
 4 | import org.apache.flink.api.java.utils.ParameterTool
 5 | import org.apache.flink.core.fs.Path
 6 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink}
 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
 8 | import org.apache.flink.streaming.api.scala._
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-04-18
14 |   * @Time 17:17
15 |   */
16 | object WordCountFileSourceStreamFileSink {
17 |   def main(args: Array[String]): Unit = {
18 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
19 |     val params = ParameterTool.fromArgs(args)
20 | 
21 |     env.getConfig.setGlobalJobParameters(params)
22 | 
23 | 
24 |     val fileSourcePath = "/Users/yyb/Downloads/1.txt"
25 |     val fileSinkPath = "./xxx.text/rs1"
26 | 
27 |     val wc: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
28 |       .flatMap(_.toLowerCase.split("\\W+"))
29 |       .filter(_.nonEmpty)
30 |       .map((_, 1))
31 |       .keyBy(0)
32 |       .sum(1)
33 | 
34 |     val outputFileConfig = OutputFileConfig
35 |       .builder()
36 |       .withPartPrefix("filesource")
37 |       .withPartSuffix(".finksink")
38 |       .build()
39 | 
40 |     val fileSink: StreamingFileSink[(String, Int)] = StreamingFileSink.forRowFormat(new Path(fileSinkPath),
41 |       new SimpleStringEncoder[(String, Int)]("UTF-8"))
42 |       .withOutputFileConfig(outputFileConfig)
43 |       .build()
44 | 
45 | //    wc.addSink(fileSink)
46 |     wc.addSink(fileSink).setParallelism(1) //这样减少输出文件的个数，但是生产环境不建议使用，会影响性能
47 | 
48 |     env.execute("WordCountFileSourceStreamFileSink")
49 | 
50 | 
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/OutputFormat/KafkaOutputFormat.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.OutputFormat;
 2 | 
 3 | import org.apache.flink.api.common.io.RichOutputFormat;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord;
 6 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer;
 7 | 
 8 | import java.io.IOException;
 9 | import java.util.Properties;
10 | 
11 | /**
12 |  * @Author yyb
13 |  * @Description
14 |  * @Date Create in 2020-07-29
15 |  * @Time 17:45
16 |  */
17 | public class KafkaOutputFormat extends RichOutputFormat<String> {
18 |     private Properties properties;
19 | 
20 |     private FlinkKafkaProducer flinkKafkaProducer;
21 |     public KafkaOutputFormat(Properties properties){
22 |         this.properties = properties;
23 |     }
24 | 
25 | 
26 |     @Override
27 |     public void configure(Configuration parameters) {
28 | 
29 |     }
30 | 
31 |     @Override
32 |     public void open(int taskNumber, int numTasks) throws IOException {
33 |         flinkKafkaProducer = new FlinkKafkaProducer<String, String>(properties);
34 |     }
35 | 
36 |     @Override
37 |     public void writeRecord(String record) throws IOException {
38 |         ProducerRecord<String, String> recordP = new ProducerRecord<String, String>(this.properties.getProperty("topic"), record);
39 |         flinkKafkaProducer.send(recordP);
40 |     }
41 | 
42 |     @Override
43 |     public void close() throws IOException {
44 |         flinkKafkaProducer.flush();
45 |         flinkKafkaProducer.close();
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/Current1.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-03
 7 |  * @Time 09:25
 8 |  */
 9 | public class Current1 {
10 |     private String rowtime;
11 |     private int amount;
12 |     private String currency;
13 |     public Current1(){
14 | 
15 |     }
16 | 
17 |     public Current1(String rowtime, int amount, String currency) {
18 |         this.rowtime = rowtime;
19 |         this.amount = amount;
20 |         this.currency = currency;
21 |     }
22 | 
23 |     public String getRowtime() {
24 |         return rowtime;
25 |     }
26 | 
27 |     public void setRowtime(String rowtime) {
28 |         this.rowtime = rowtime;
29 |     }
30 | 
31 |     public int getAmount() {
32 |         return amount;
33 |     }
34 | 
35 |     public void setAmount(int amount) {
36 |         this.amount = amount;
37 |     }
38 | 
39 |     public String getCurrency() {
40 |         return currency;
41 |     }
42 | 
43 |     public void setCurrency(String currency) {
44 |         this.currency = currency;
45 |     }
46 | 
47 |     @Override
48 |     public String toString() {
49 |         return "Current1{" +
50 |                 "rowtime='" + rowtime + '\'' +
51 |                 ", amount=" + amount +
52 |                 ", currency='" + currency + '\'' +
53 |                 '}';
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/Current2.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-03
 7 |  * @Time 09:25
 8 |  */
 9 | public class Current2 {
10 |     private String rowtime;
11 |     private int amount;
12 |     private String currency;
13 |     private Long eventTime;
14 |     public Current2(){
15 | 
16 |     }
17 | 
18 |     public Current2(String rowtime, int amount, String currency, Long eventTime) {
19 |         this.rowtime = rowtime;
20 |         this.amount = amount;
21 |         this.currency = currency;
22 |         this.eventTime = eventTime;
23 |     }
24 | 
25 |     public String getRowtime() {
26 |         return rowtime;
27 |     }
28 | 
29 |     public void setRowtime(String rowtime) {
30 |         this.rowtime = rowtime;
31 |     }
32 | 
33 |     public int getAmount() {
34 |         return amount;
35 |     }
36 | 
37 |     public void setAmount(int amount) {
38 |         this.amount = amount;
39 |     }
40 | 
41 |     public String getCurrency() {
42 |         return currency;
43 |     }
44 | 
45 |     public void setCurrency(String currency) {
46 |         this.currency = currency;
47 |     }
48 | 
49 |     public Long getEventTime() {
50 |         return eventTime;
51 |     }
52 | 
53 |     public void setEventTime(Long eventTime) {
54 |         this.eventTime = eventTime;
55 |     }
56 | 
57 |     @Override
58 |     public String toString() {
59 |         return "Current2{" +
60 |                 "rowtime='" + rowtime + '\'' +
61 |                 ", amount=" + amount +
62 |                 ", currency='" + currency + '\'' +
63 |                 ", eventTime=" + eventTime +
64 |                 '}';
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/Pi.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-06-10
 7 |  * @Time 11:38
 8 |  */
 9 | public class Pi{
10 |     private String id;
11 |     private String time;
12 | 
13 |     public String getId() {
14 |         return id;
15 |     }
16 | 
17 |     public void setId(String id) {
18 |         this.id = id;
19 |     }
20 | 
21 |     public String getTime() {
22 |         return time;
23 |     }
24 | 
25 |     public void setTime(String time) {
26 |         this.time = time;
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/ProductInfo.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-19
 7 |  * @Time 09:17
 8 |  */
 9 | public class ProductInfo {
10 |     private String productID;
11 |     private String productName;
12 |     private String productCategory;
13 |     private String updatedAt;
14 |     private Long updatedAtTimestamp;
15 | 
16 |     public ProductInfo() {
17 |     }
18 | 
19 |     public ProductInfo(String productID, String productName, String productCategory, String updatedAt, Long updatedAtTimestamp) {
20 |         this.productID = productID;
21 |         this.productName = productName;
22 |         this.productCategory = productCategory;
23 |         this.updatedAt = updatedAt;
24 |         this.updatedAtTimestamp = updatedAtTimestamp;
25 |     }
26 | 
27 |     public String getProductID() {
28 |         return productID;
29 |     }
30 | 
31 |     public void setProductID(String productID) {
32 |         this.productID = productID;
33 |     }
34 | 
35 |     public String getProductName() {
36 |         return productName;
37 |     }
38 | 
39 |     public void setProductName(String productName) {
40 |         this.productName = productName;
41 |     }
42 | 
43 |     public String getProductCategory() {
44 |         return productCategory;
45 |     }
46 | 
47 |     public void setProductCategory(String productCategory) {
48 |         this.productCategory = productCategory;
49 |     }
50 | 
51 |     public String getUpdatedAt() {
52 |         return updatedAt;
53 |     }
54 | 
55 |     public void setUpdatedAt(String updatedAt) {
56 |         this.updatedAt = updatedAt;
57 |     }
58 | 
59 |     public Long getUpdatedAtTimestamp() {
60 |         return updatedAtTimestamp;
61 |     }
62 | 
63 |     public void setUpdatedAtTimestamp(Long updatedAtTimestamp) {
64 |         this.updatedAtTimestamp = updatedAtTimestamp;
65 |     }
66 | 
67 |     @Override
68 |     public String toString() {
69 |         return "ProductInfo{" +
70 |                 "productID='" + productID + '\'' +
71 |                 ", productName='" + productName + '\'' +
72 |                 ", productCategory='" + productCategory + '\'' +
73 |                 ", updatedAt='" + updatedAt + '\'' +
74 |                 ", updatedAtTimestamp=" + updatedAtTimestamp +
75 |                 '}';
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/Rate.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-06
 7 |  * @Time 17:56
 8 |  */
 9 | public class Rate {
10 |     private String rowtime;
11 |     private String currency;
12 |     private int rate;
13 | 
14 |     public Rate() {
15 |     }
16 | 
17 |     public Rate(String rowtime, String currency, Integer rate) {
18 |         this.rowtime = rowtime;
19 |         this.currency = currency;
20 |         this.rate = rate;
21 |     }
22 | 
23 |     public String getRowtime() {
24 |         return rowtime;
25 |     }
26 | 
27 |     public void setRowtime(String rowtime) {
28 |         this.rowtime = rowtime;
29 |     }
30 | 
31 |     public String getCurrency() {
32 |         return currency;
33 |     }
34 | 
35 |     public void setCurrency(String currency) {
36 |         this.currency = currency;
37 |     }
38 | 
39 |     public int getRate() {
40 |         return rate;
41 |     }
42 | 
43 |     public void setRate(int rate) {
44 |         this.rate = rate;
45 |     }
46 | 
47 |     @Override
48 |     public String toString() {
49 |         return "Rate{" +
50 |                 "rowtime='" + rowtime + '\'' +
51 |                 ", currency='" + currency + '\'' +
52 |                 ", rate=" + rate +
53 |                 '}';
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/Rate2.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-06
 7 |  * @Time 17:56
 8 |  */
 9 | public class Rate2 {
10 |     private String rowtime;
11 |     private String currency;
12 |     private int rate;
13 |     private Long eventTime;
14 | 
15 |     public Rate2() {
16 |     }
17 | 
18 |     public Rate2(String rowtime, String currency, int rate, Long eventTime) {
19 |         this.rowtime = rowtime;
20 |         this.currency = currency;
21 |         this.rate = rate;
22 |         this.eventTime = eventTime;
23 |     }
24 | 
25 |     public String getRowtime() {
26 |         return rowtime;
27 |     }
28 | 
29 |     public void setRowtime(String rowtime) {
30 |         this.rowtime = rowtime;
31 |     }
32 | 
33 |     public String getCurrency() {
34 |         return currency;
35 |     }
36 | 
37 |     public void setCurrency(String currency) {
38 |         this.currency = currency;
39 |     }
40 | 
41 |     public int getRate() {
42 |         return rate;
43 |     }
44 | 
45 |     public void setRate(int rate) {
46 |         this.rate = rate;
47 |     }
48 | 
49 |     public Long getEventTime() {
50 |         return eventTime;
51 |     }
52 | 
53 |     public void setEventTime(Long eventTime) {
54 |         this.eventTime = eventTime;
55 |     }
56 | 
57 |     @Override
58 |     public String toString() {
59 |         return "Rate2{" +
60 |                 "rowtime='" + rowtime + '\'' +
61 |                 ", currency='" + currency + '\'' +
62 |                 ", rate=" + rate +
63 |                 ", eventTime=" + eventTime +
64 |                 '}';
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/commonEntity/UserBrowseLog.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.commonEntity;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-19
 7 |  * @Time 09:05
 8 |  */
 9 | public class UserBrowseLog {
10 |     private String userID;
11 |     private String eventTime;
12 |     private String eventType;
13 |     private String productID;
14 |     private Integer productPrice;
15 |     private Long eventTimeTimestamp;
16 | 
17 |     public UserBrowseLog() {
18 |     }
19 | 
20 |     public UserBrowseLog(String userID, String eventTime, String eventType, String productID, Integer productPrice, Long eventTimeTimestamp) {
21 |         this.userID = userID;
22 |         this.eventTime = eventTime;
23 |         this.eventType = eventType;
24 |         this.productID = productID;
25 |         this.productPrice = productPrice;
26 |         this.eventTimeTimestamp = eventTimeTimestamp;
27 |     }
28 | 
29 |     public String getUserID() {
30 |         return userID;
31 |     }
32 | 
33 |     public void setUserID(String userID) {
34 |         this.userID = userID;
35 |     }
36 | 
37 |     public String getEventTime() {
38 |         return eventTime;
39 |     }
40 | 
41 |     public void setEventTime(String eventTime) {
42 |         this.eventTime = eventTime;
43 |     }
44 | 
45 |     public String getEventType() {
46 |         return eventType;
47 |     }
48 | 
49 |     public void setEventType(String eventType) {
50 |         this.eventType = eventType;
51 |     }
52 | 
53 |     public String getProductID() {
54 |         return productID;
55 |     }
56 | 
57 |     public void setProductID(String productID) {
58 |         this.productID = productID;
59 |     }
60 | 
61 |     public Integer getProductPrice() {
62 |         return productPrice;
63 |     }
64 | 
65 |     public void setProductPrice(Integer productPrice) {
66 |         this.productPrice = productPrice;
67 |     }
68 | 
69 |     public Long getEventTimeTimestamp() {
70 |         return eventTimeTimestamp;
71 |     }
72 | 
73 |     public void setEventTimeTimestamp(Long eventTimeTimestamp) {
74 |         this.eventTimeTimestamp = eventTimeTimestamp;
75 |     }
76 | 
77 |     @Override
78 |     public String toString() {
79 |         return "UserBrowseLog{" +
80 |                 "userID='" + userID + '\'' +
81 |                 ", eventTime='" + eventTime + '\'' +
82 |                 ", eventType='" + eventType + '\'' +
83 |                 ", productID='" + productID + '\'' +
84 |                 ", productPrice=" + productPrice +
85 |                 ", eventTimeTimestamp=" + eventTimeTimestamp +
86 |                 '}';
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/sink/KafkaBatchTableSink.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.sink;
 2 | 
 3 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat;
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 5 | import org.apache.flink.api.java.DataSet;
 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo;
 7 | import org.apache.flink.table.sinks.BatchTableSink;
 8 | import org.apache.flink.table.sinks.TableSink;
 9 | import org.apache.flink.types.Row;
10 | 
11 | /**
12 |  * @Author yyb
13 |  * @Description
14 |  * @Date Create in 2020-07-30
15 |  * @Time 13:36
16 |  */
17 | public class KafkaBatchTableSink implements BatchTableSink<String> {
18 | 
19 |     private final KafkaOutputFormat kafkaOutputFormat;
20 |     private String[] fieldNames = new String[]{"value"};
21 |     private TypeInformation[] fieldTypes = new TypeInformation[]{TypeInformation.of(String.class)};
22 | 
23 |     public KafkaBatchTableSink(KafkaOutputFormat kafkaOutputFormat){
24 |         this.kafkaOutputFormat = kafkaOutputFormat;
25 |     }
26 | 
27 |     @Override
28 |     public void emitDataSet(DataSet<String> dataSet) {
29 |         dataSet.output(kafkaOutputFormat);
30 |     }
31 | 
32 |     @Override
33 |     public TableSink<String> configure(String[] fieldNames, TypeInformation<?>[] fieldTypes) {
34 |         return null;
35 |     }
36 | 
37 |     @Override
38 |     public String[] getFieldNames() {
39 |         return fieldNames;
40 |     }
41 | 
42 |     @Override
43 |     public TypeInformation<?>[] getFieldTypes() {
44 |         return fieldTypes;
45 |     }
46 | 
47 |     @Override
48 |     public TypeInformation<String> getOutputType() {
49 |         return TypeInformation.of(String.class);
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/sink/ParquetSinkFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.sink
 2 | 
 3 | import org.apache.flink.api.common.functions.RuntimeContext
 4 | import org.apache.flink.configuration.Configuration
 5 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 6 | import org.apache.parquet.hadoop.ParquetWriter
 7 | import org.apache.parquet.hadoop.metadata.CompressionCodecName
 8 | 
 9 | /**
10 |   * @Author yyb
11 |   * @Description
12 |   * @Date Create in 2020-04-25
13 |   * @Time 21:47
14 |   */
15 | class ParquetSinkFunction[IN](val path: String, val schema: String, val compressionCodecName: CompressionCodecName) extends RichSinkFunction[IN]{
16 |   var parquetWriter: ParquetWriter[IN] = null
17 | 
18 |   override def close(): Unit = {
19 |     parquetWriter.close()
20 |   }
21 | 
22 |   override def invoke(value: IN, context: SinkFunction.Context[_]): Unit = {
23 |     parquetWriter.write(value)
24 |   }
25 | 
26 |   override def open(parameters: Configuration): Unit = {
27 |     super.open(parameters)
28 |     val ctx = getRuntimeContext
29 |     parquetWriter
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/sink/ParquetWriterSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.sink
 2 | 
 3 | import org.apache.avro.Schema
 4 | import org.apache.avro.reflect.ReflectData
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.apache.hadoop.fs.Path
 7 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 8 | import org.apache.parquet.avro.AvroParquetWriter
 9 | import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
10 | import org.apache.parquet.hadoop.metadata.CompressionCodecName
11 | 
12 | class ParquetWriterSink[IN](val path: String, val schema: String, val compressionCodecName: CompressionCodecName) extends RichSinkFunction[IN] {
13 |   var parquetWriter: ParquetWriter[IN] = null
14 | 
15 |   override def open(parameters: Configuration): Unit = {
16 |     parquetWriter =  AvroParquetWriter.builder[IN](new Path(path))
17 |       .withSchema(new Schema.Parser().parse(schema))
18 |       .withCompressionCodec(compressionCodecName)
19 | //      .withPageSize(config.pageSize)
20 | //      .withRowGroupSize(config.blockSize)
21 | //      .withDictionaryEncoding(config.enableDictionary)
22 |       .withWriteMode(ParquetFileWriter.Mode.OVERWRITE)
23 | //      .withValidation(config.validating)
24 |         .withDataModel(ReflectData.get)
25 |       .build()
26 |   }
27 | 
28 |   override def close(): Unit = {
29 |     parquetWriter.close()
30 |   }
31 | 
32 |   override def invoke(value: IN, context: SinkFunction.Context[_]): Unit = {
33 |     parquetWriter.write(value)
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/BatchQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.batch
 2 | 
 3 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment}
 4 | import org.apache.flink.table.sources.CsvTableSource
 5 | import org.apache.flink.table.types.AtomicDataType
 6 | import org.apache.flink.table.types.logical.{IntType, VarCharType}
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description 注意 Blink
11 |   * @Date Create in 2020-04-18
12 |   * @Time 21:05
13 |   */
14 | object BatchQuery {
15 |   def main(args: Array[String]): Unit = {
16 |    val bbSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build()
17 |    val bbTableEnv: TableEnvironment = TableEnvironment.create(bbSettings)
18 | 
19 | 
20 | 
21 |     val sourceFIlePath = "/Users/yyb/Dwonloads/1_rs.csv"
22 | 
23 |     val stringField = new AtomicDataType(new VarCharType(50))
24 |     val intField = new AtomicDataType(new IntType)
25 |     val csvTableSource: CsvTableSource =  CsvTableSource.builder()
26 |       .path(sourceFIlePath)
27 |       .field("word", stringField)
28 | //      .field("word", Types.STRING) //方法已压制
29 |       .field("count", intField)
30 | //      .field("count", Types.INT) //方法已压制
31 |       .build()
32 | 
33 |     val input: Table = bbTableEnv.fromTableSource(csvTableSource)
34 | 
35 | 
36 | 
37 | 
38 |     bbTableEnv.createTemporaryView("wordcount", input)
39 | 
40 |     bbTableEnv.sqlQuery("select * from wordcount").printSchema()
41 | 
42 | 
43 |     bbTableEnv.execute("BatchQuery")
44 | 
45 | 
46 |   }
47 | 
48 |   case class WORDCOUNT(word:String, count:Int)
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/BlinkHiveBatchDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.batch
 2 | 
 3 | import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment}
 4 | import org.apache.flink.table.catalog.hive.HiveCatalog
 5 | 
 6 | /**
 7 |   * @Author yyb
 8 |   * @Description
 9 |   * @Date Create in 2020-04-20
10 |   * @Time 13:33
11 |   */
12 | object BlinkHiveBatchDemo {
13 |   def main(args: Array[String]): Unit = {
14 |     val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build()
15 |     val bbTableEnv = TableEnvironment.create(settings)
16 | 
17 |     val name = "myhive"
18 |     val defaultDatabase = "flink"
19 | //    val hiveConfDir = "src/main/resources/" //hive-site.xml的本地目录 ，注意 当有 hive-site.xml 在 resources 下的时候 ，hiveConfDir 也需要设置,否则会提示
20 | //Required table missing : "DBS" in Catalog "" Schema "". DataNucleus requires this table to perform its persistence operations. Either your MetaData is incorrect, or you need to enable "datanucleus.schema.autoCreateTables"
21 |     val hiveConfDir = this.getClass.getResource("/").getFile  //可以通过这一种方式设置 hiveConfDir，这样的话，开发与测试和生产环境可以保持一致
22 | 
23 |     val version = "2.3.6"
24 |     val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version)
25 | 
26 |     bbTableEnv.registerCatalog("myhive", hive)
27 |     bbTableEnv.useCatalog("myhive")
28 | 
29 |     //注意 查询语句 myhive.flink.a myhive是你的Hcatalog的别称，flink是库名称，a是别名称
30 |     bbTableEnv.sqlQuery("select * from myhive.flink.a").printSchema()
31 | 
32 | 
33 | //    bbTableEnv.execute("BlinkHiveBatchDemo")
34 | 
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/JDBC/BlinkBatchReadFromJDBCTableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.batch.JDBC
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource}
 4 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment, TableSchema}
 5 | import org.apache.flink.table.types.AtomicDataType
 6 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType}
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description
11 |   * @Date Create in 2020-04-27
12 |   * @Time 11:09
13 |   */
14 | object BlinkBatchReadFromJDBCTableSource {
15 |   def main(args: Array[String]): Unit = {
16 |     val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build()
17 |     val blinkBatchTableEnv = TableEnvironment.create(settings)
18 | 
19 |     val lookOption =  JDBCLookupOptions.builder()
20 |       .setCacheExpireMs(60*1000)
21 |       .setCacheMaxSize(1024*1024)
22 |       .setMaxRetryTimes(10)
23 |       .build()
24 | 
25 |     val jdbcOpition = JDBCOptions.builder()
26 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
27 |       .setDriverName("com.mysql.jdbc.Driver")
28 |       .setUsername("root")
29 |       .setPassword("111111")
30 |       .setTableName("t_order")
31 |       .build()
32 | 
33 |     val jdbcReadOption = JDBCReadOptions.builder()
34 |       .setFetchSize(5000)
35 |       .build()
36 | 
37 |     val tableSchema = TableSchema.builder()
38 |       .field("id", new AtomicDataType(new IntType))
39 |       .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
40 |       .field("time", new AtomicDataType(new DateType))
41 |       .build()
42 | 
43 |     val jdbcTableSource: JDBCTableSource =  JDBCTableSource.builder()
44 |       .setLookupOptions(lookOption)
45 |       .setOptions(jdbcOpition)
46 |       .setReadOptions(jdbcReadOption)
47 |       .setSchema(tableSchema)
48 |       .build()
49 | 
50 | 
51 |     val t_order: Table = blinkBatchTableEnv.fromTableSource(jdbcTableSource)
52 | 
53 |     blinkBatchTableEnv.createTemporaryView("t_order", t_order)
54 | 
55 |     blinkBatchTableEnv.sqlQuery("select * from t_order").printSchema()
56 | 
57 | 
58 | 
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/JDBC/BlinkBatchWriteToJDBCTableSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.batch.JDBC
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 5 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink
 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment}
 7 | import org.apache.flink.table.sources.CsvTableSource
 8 | import org.apache.flink.table.types.AtomicDataType
 9 | import org.apache.flink.table.types.logical.{IntType, VarCharType}
10 | 
11 | /**
12 |   * @Author yyb
13 |   * @Description
14 |   * @Date Create in 2020-04-27
15 |   * @Time 13:19
16 |   */
17 | object BlinkBatchWriteToJDBCTableSink {
18 |   def main(args: Array[String]): Unit = {
19 |     //blink env
20 |     val settings = EnvironmentSettings.newInstance().inBatchMode().useBlinkPlanner().build()
21 |     val blinkBatchTableEnv = TableEnvironment.create(settings)
22 | 
23 |     val sourceFIlePath = "/Users/yyb/Downloads/1_rs.csv"
24 | 
25 |     val stringField = new AtomicDataType(new VarCharType(50))
26 |     val intField = new AtomicDataType(new IntType)
27 |     val csvTableSource: CsvTableSource =  CsvTableSource.builder()
28 |       .path(sourceFIlePath)
29 |       .field("word", stringField)
30 |       //      .field("word", Types.STRING) //方法已压制
31 |       .field("count", intField)
32 |       //      .field("count", Types.INT) //方法已压制
33 |       .build()
34 | 
35 |     val word: Table =  blinkBatchTableEnv.fromTableSource(csvTableSource)
36 | 
37 | 
38 |     val jdbcAppendTableSink =  JDBCAppendTableSink.builder()
39 |       .setBatchSize(5000)
40 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
41 |       .setDrivername("com.mysql.jdbc.Driver")
42 |       .setUsername("root")
43 |       .setPassword("111111")
44 |       .setQuery("insert into  wordcount (word, count) values(?, ?)")
45 |       .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)
46 |       .build()
47 | 
48 | 
49 |     blinkBatchTableEnv.createTemporaryView("word", word)
50 | 
51 |     val sql =
52 |       s"""
53 |          |select * from word
54 |        """.stripMargin
55 |     blinkBatchTableEnv.sqlQuery(sql).printSchema()
56 | 
57 |     blinkBatchTableEnv.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink)
58 | 
59 |     blinkBatchTableEnv.sqlQuery(sql).insertInto("word1") //注意 这样 把数据 倒入到 sink 中去
60 | 
61 | 
62 |     blinkBatchTableEnv.execute("BlinkBatchWriteToJDBCTableSink")
63 | 
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/hive/Fromkafka2HiveUseCatalog.java:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.table.blink.batch.hive;
  2 | 
  3 | import com.yyb.flink10.commonEntity.Pi;
  4 | import com.yyb.flink10.table.blink.stream.hive.WriteData2HiveJavaReadFromkafkaTableSource;
  5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
  6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
  7 | import org.apache.flink.api.java.typeutils.RowTypeInfo;
  8 | import org.apache.flink.formats.json.JsonRowDeserializationSchema;
  9 | import org.apache.flink.streaming.api.datastream.DataStream;
 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 11 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource;
 12 | import org.apache.flink.streaming.connectors.kafka.config.StartupMode;
 13 | import org.apache.flink.table.api.*;
 14 | import org.apache.flink.table.api.java.StreamTableEnvironment;
 15 | import org.apache.flink.table.catalog.ObjectPath;
 16 | import org.apache.flink.table.catalog.hive.HiveCatalog;
 17 | import org.apache.flink.table.descriptors.Schema;
 18 | 
 19 | import java.util.Collections;
 20 | import java.util.List;
 21 | import java.util.Optional;
 22 | import java.util.Properties;
 23 | 
 24 | /**
 25 |  * @Author yyb
 26 |  * @Description 经过多次尝试，目前 flink 不支持 table insert hive table
 27 |  * @Date Create in 2020-07-07
 28 |  * @Time 16:22
 29 |  */
 30 | public class Fromkafka2HiveUseCatalog {
 31 |     public static void main(String[] args) throws Exception {
 32 | //        System.setProperty("HADOOP_USER_NAME", "center");
 33 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build();
 34 |         TableEnvironment tableEnv = TableEnvironment.create(settings);
 35 | 
 36 |         String name = "myhive";
 37 |         String defaultDatabase = "test";
 38 |         String hiveConfDir = WriteData2HiveJavaReadFromkafkaTableSource.class.getResource("/").getFile();  //可以通过这一种方式设置 hiveConfDir，这样的话，开发与测试和生产环境可以保持一致
 39 |         String version = "2.1.1";
 40 |         HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version);
 41 | 
 42 |         tableEnv.registerCatalog("myhive", hive);
 43 |         tableEnv.useCatalog("myhive");
 44 | 
 45 |         /**
 46 |          * kafka start
 47 |          */
 48 |         Schema schema = new Schema();
 49 |         TableSchema tableSchema = TableSchema.builder()
 50 |                 .field("id", DataTypes.STRING())
 51 |                 .field("time", DataTypes.STRING())
 52 |                 .build();
 53 |         schema.schema(tableSchema);
 54 |         Properties prop = new Properties();
 55 |         prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181");
 56 |         prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092");
 57 |         prop.put("group.id", "yyb_dev");
 58 | 
 59 |         TypeInformation[] types = new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO};
 60 |         String[] fields = new String[]{"id", "time"};
 61 |         RowTypeInfo rowTypeINfo = new RowTypeInfo(types, fields);
 62 |         JsonRowDeserializationSchema jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(rowTypeINfo).build();
 63 | //        Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema);
 64 |         //指定 从 kafka 的 earliest 开始消费
 65 |         Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, Optional.empty(), Collections.emptyList(), Optional.empty(),"eventsource_yhj", prop, jsonRowDeserializationSchema
 66 |                 , StartupMode.EARLIEST, Collections.emptyMap());
 67 | 
 68 |         Table kafkaTable = tableEnv.fromTableSource(kafka);
 69 | 
 70 |         tableEnv.createTemporaryView("kafkaTable", kafkaTable);
 71 | 
 72 |         /**
 73 |          * kafka end
 74 |          */
 75 | 
 76 | 
 77 |         List<String> dbs = hive.listDatabases();
 78 |         for(String db : dbs){
 79 |             System.out.println(db);
 80 |         }
 81 | 
 82 |         System.out.println("------------------");
 83 | 
 84 |         List<String> tbs = hive.listTables("test");
 85 |         for(String tb : tbs){
 86 |             System.out.println(tb);
 87 |         }
 88 | 
 89 |         boolean xx = hive.tableExists(new ObjectPath("test", "a"));
 90 |         System.out.println(xx + " cvb--------------");
 91 |         Table sink = tableEnv.from("test.a");
 92 |         sink.printSchema();
 93 | 
 94 | 
 95 | //        tableEnv.insertInto("test.a", kafkaTable);
 96 | //        kafkaTable.insertInto("test.a");
 97 | 
 98 |         String sql = "insert into test.a partition(dt=20200305) select * from kafkaTable";
 99 |         tableEnv.sqlUpdate(sql);
100 | 
101 |         tableEnv.execute("Fromkafka2HiveUseCatalog");
102 | 
103 | 
104 | 
105 | 
106 | 
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/batch/kafka/WriteJsonDataByKafkaConnector.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.batch.kafka;
 2 | 
 3 | import org.apache.flink.table.api.DataTypes;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.TableEnvironment;
 6 | import org.apache.flink.table.api.TableSchema;
 7 | import org.apache.flink.table.descriptors.ConnectTableDescriptor;
 8 | import org.apache.flink.table.descriptors.Json;
 9 | import org.apache.flink.table.descriptors.Kafka;
10 | import org.apache.flink.table.descriptors.Schema;
11 | 
12 | import java.util.ArrayList;
13 | import java.util.List;
14 | 
15 | /**
16 |  * @Author yyb
17 |  * @Description
18 |  * @Date Create in 2020-07-27
19 |  * @Time 19:22
20 |  */
21 | public class WriteJsonDataByKafkaConnector {
22 |     public static void main(String[] args){
23 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build();
24 |         TableEnvironment blinkBatchTableEnv = TableEnvironment.create(settings);
25 | 
26 |         Kafka kafka = new Kafka();
27 |         kafka.version("0.11")
28 |                 .topic("eventsource_yhj")
29 |                 .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
30 |                 .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
31 |                 .property("group.id", "yyb_dev")
32 |                 .startFromEarliest();
33 | 
34 |         Schema schema = new Schema();
35 |         TableSchema tableSchema = TableSchema.builder()
36 |                 .field("id", DataTypes.STRING())
37 |                 .field("time", DataTypes.STRING())
38 |                 .build();
39 |         schema.schema(tableSchema);
40 |         ConnectTableDescriptor tableSink = blinkBatchTableEnv.connect(kafka)
41 |                 .withFormat(new Json().failOnMissingField(true))
42 |                 .withSchema(schema);
43 | 
44 | 
45 |         tableSink.createTemporaryTable("kafka_sink");
46 | 
47 | 
48 | 
49 | 
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/FileSystem/ReadFromKafkaConnectorWriteToLocalParquetFilePiJava.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.FileSystem;
 2 | 
 3 | import com.yyb.flink10.commonEntity.Pi;
 4 | import org.apache.avro.JsonProperties;
 5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 6 | import org.apache.flink.api.java.typeutils.TupleTypeInfo;
 7 | import org.apache.flink.core.fs.Path;
 8 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
 9 | import org.apache.flink.streaming.api.CheckpointingMode;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
14 | import org.apache.flink.table.api.DataTypes;
15 | import org.apache.flink.table.api.EnvironmentSettings;
16 | import org.apache.flink.table.api.Table;
17 | import org.apache.flink.table.api.TableSchema;
18 | import org.apache.flink.table.api.java.StreamTableEnvironment;
19 | import org.apache.flink.table.descriptors.*;
20 | 
21 | import java.util.ArrayList;
22 | 
23 | /**
24 |  * @Author yyb
25 |  * @Description
26 |  * @Date Create in 2020-06-16
27 |  * @Time 10:24
28 |  */
29 | public class ReadFromKafkaConnectorWriteToLocalParquetFilePiJava {
30 |     public static void main(String[] args) throws Exception {
31 |         EnvironmentSettings setttings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, setttings);
34 | 
35 |         env.enableCheckpointing(20);
36 |         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
37 | 
38 |         Kafka kafka = new Kafka();
39 |         kafka.version("0.11")
40 |                 .topic("eventsource_yhj")
41 |                 .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
42 |                 .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
43 |                 .property("group.id", "yyb_dev")
44 |                 .startFromEarliest();
45 | 
46 |         Schema schema = new Schema();
47 |         TableSchema tableSchema = TableSchema.builder()
48 |                 .field("id", DataTypes.STRING())
49 |                 .field("time", DataTypes.STRING())
50 |                 .build();
51 |         schema.schema(tableSchema);
52 |         ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka)
53 |                 .withFormat(new Json().failOnMissingField(true))
54 |                 .withSchema(schema);
55 |         tableSource.createTemporaryTable("test");
56 |         String sql = "select * from test";
57 | 
58 |         Table test = flinkTableEnv.from("test");
59 |         test.printSchema();
60 | 
61 | 
62 |         //transfor 2 dataStream
63 |         DataStream<?> testDataStream = flinkTableEnv.toAppendStream(test, Pi.class); //使用 Class 的方式
64 | 
65 |         String fileSinkPath = "./xxx.text/rs6/";
66 | 
67 |         StreamingFileSink parquetSink = StreamingFileSink.
68 |                 forBulkFormat(new Path(fileSinkPath),
69 |                         ParquetAvroWriters.forReflectRecord(Pi.class))
70 |                 .withRollingPolicy(OnCheckpointRollingPolicy.build())
71 |                 .build();
72 | 
73 |         testDataStream.addSink(parquetSink).setParallelism(1);
74 | 
75 |         flinkTableEnv.execute("ReadFromKafkaConnectorWriteToLocalFileJava");
76 | 
77 | 
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/FileSystem/ReadFromKafkaConnectorWriteToLocalTextFileJava.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.FileSystem;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.DataTypes;
 5 | import org.apache.flink.table.api.EnvironmentSettings;
 6 | import org.apache.flink.table.api.Table;
 7 | import org.apache.flink.table.api.TableSchema;
 8 | import org.apache.flink.table.api.java.StreamTableEnvironment;
 9 | import org.apache.flink.table.descriptors.*;
10 | 
11 | /**
12 |  * @Author yyb
13 |  * @Description
14 |  * @Date Create in 2020-06-16
15 |  * @Time 10:24
16 |  */
17 | public class ReadFromKafkaConnectorWriteToLocalTextFileJava {
18 |     public static void main(String[] args) throws Exception {
19 |         EnvironmentSettings setttings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 |         StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, setttings);
22 | 
23 |         Kafka kafka = new Kafka();
24 |         kafka.version("0.11")
25 |                 .topic("eventsource_yhj")
26 |                 .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
27 |                 .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
28 |                 .property("group.id", "yyb_dev")
29 |                 .startFromEarliest();
30 | 
31 |         Schema schema = new Schema();
32 |         TableSchema tableSchema = TableSchema.builder()
33 |                 .field("id", DataTypes.STRING())
34 |                 .field("time", DataTypes.STRING())
35 |                 .build();
36 |         schema.schema(tableSchema);
37 |         ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka)
38 |                 .withFormat(new Json().failOnMissingField(true))
39 |                 .withSchema(schema);
40 |         tableSource.createTemporaryTable("test");
41 |         String sql = "select * from test";
42 | 
43 |         Table test = flinkTableEnv.from("test");
44 |         test.printSchema();
45 | 
46 | 
47 |         String path = "./xxx.text/rs5/";
48 |         FileSystem localFIle = new FileSystem();
49 |         localFIle.path(path);
50 | 
51 |         ConnectTableDescriptor tableSink = flinkTableEnv.connect(localFIle)
52 |                 .withFormat(new OldCsv())
53 |                 .withSchema(schema);
54 | 
55 |         tableSink.createTemporaryTable("test_sink");
56 | 
57 |         flinkTableEnv.insertInto(test, "test_sink");
58 | 
59 |         flinkTableEnv.execute("ReadFromKafkaConnectorWriteToLocalFileJava");
60 | 
61 | 
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/ReadDataFromJDBCTableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource}
 4 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableSchema}
 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 7 | import org.apache.flink.table.types.AtomicDataType
 8 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType}
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-04-30
14 |   * @Time 10:01
15 |   */
16 | object ReadDataFromJDBCTableSource {
17 |   def main(args: Array[String]): Unit = {
18 |     val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     val blinkStreamTable = StreamTableEnvironment.create(env, settings)
21 | 
22 |     val fileSourcePath = "/Users/yyb/Downloads/1.txt"
23 | 
24 |     val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
25 |       .flatMap(_.split("\\W+"))
26 |       .filter(_.nonEmpty)
27 |       .map((_, 1))
28 |       .keyBy(0)
29 |       .sum(1)
30 | 
31 | 
32 |     val table: Table = blinkStreamTable.fromDataStream(wcStream)
33 | 
34 |     blinkStreamTable.createTemporaryView("wd", table)
35 | 
36 |     var sql =
37 |       """
38 |         |select * from wd
39 |       """.stripMargin
40 | 
41 |     blinkStreamTable.sqlQuery(sql).printSchema()
42 | 
43 |     val lookOption =  JDBCLookupOptions.builder()
44 |       .setCacheExpireMs(60*1000)
45 |       .setCacheMaxSize(1024*1024)
46 |       .setMaxRetryTimes(10)
47 |       .build()
48 | 
49 |     val jdbcOpition = JDBCOptions.builder()
50 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
51 |       .setDriverName("com.mysql.jdbc.Driver")
52 |       .setUsername("root")
53 |       .setPassword("111111")
54 |       .setTableName("t_order")
55 |       .build()
56 | 
57 |     val jdbcReadOption = JDBCReadOptions.builder()
58 |       .setFetchSize(5000)
59 |       .build()
60 | 
61 |     val tableSchema = TableSchema.builder()
62 |       .field("id", new AtomicDataType(new IntType))
63 |       .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
64 |       .field("time", new AtomicDataType(new DateType))
65 |       .build()
66 | 
67 |     val jdbcTableSource: JDBCTableSource =  JDBCTableSource.builder()
68 |       .setLookupOptions(lookOption)
69 |       .setOptions(jdbcOpition)
70 |       .setReadOptions(jdbcReadOption)
71 |       .setSchema(tableSchema)
72 |       .build()
73 | 
74 |     blinkStreamTable.registerTableSource("mysql_t_order", jdbcTableSource)
75 | 
76 |     blinkStreamTable.sqlQuery("select * from mysql_t_order")
77 | 
78 | 
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/WriteDataByJDBCTableSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 4 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink
 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table}
 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 8 | 
 9 | /**
10 |   * @Author yyb
11 |   * @Description
12 |   * @Date Create in 2020-04-30
13 |   * @Time 10:25
14 |   */
15 | object WriteDataByJDBCTableSink {
16 |   def main(args: Array[String]): Unit = {
17 |     val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
18 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
19 |     val blinkStreamTable = StreamTableEnvironment.create(env, settings)
20 | 
21 |     val fileSourcePath = "./data/data.txt"
22 | 
23 |     val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
24 |       .flatMap(_.split("\\W+"))
25 |       .filter(_.nonEmpty)
26 |       .map((_, 1))
27 |       .keyBy(0)
28 |       .sum(1)
29 | 
30 | 
31 |     val table: Table = blinkStreamTable.fromDataStream(wcStream)
32 | 
33 |     blinkStreamTable.createTemporaryView("wd", table)
34 | 
35 |     var sql =
36 |       """
37 |         |select * from wd
38 |       """.stripMargin
39 | 
40 |     blinkStreamTable.sqlQuery(sql).printSchema()
41 | 
42 |     val jdbcAppendTableSink =  JDBCAppendTableSink.builder()
43 |       .setBatchSize(5000)
44 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
45 |       .setDrivername("com.mysql.jdbc.Driver")
46 |       .setUsername("root")
47 |       .setPassword("111111")
48 |       .setQuery("insert into  wordcount (word, count) values(?, ?)")
49 |       .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)
50 |       .build()
51 | 
52 |     blinkStreamTable.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink)
53 | 
54 |     table.insertInto("word1")
55 | 
56 |     blinkStreamTable.execute("WriteDataByJDBCTableSink")
57 | 
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/WriteDataByJDBCTableUpsertSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.{JDBCAppendTableSink, JDBCOptions, JDBCUpsertTableSink}
 4 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 5 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableSchema}
 7 | import org.apache.flink.table.types.AtomicDataType
 8 | import org.apache.flink.table.types.logical.{BigIntType, DateType, IntType, VarCharType}
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-04-30
14 |   * @Time 10:25
15 |   */
16 | object WriteDataByJDBCTableUpsertSink {
17 |   def main(args: Array[String]): Unit = {
18 |     val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     val blinkStreamTable = StreamTableEnvironment.create(env, settings)
21 | 
22 |     val fileSourcePath = "./data/data.txt"
23 | 
24 |     val wcStream: DataStream[WordCount] = env.readTextFile(fileSourcePath)
25 |       .flatMap(_.split("\\W+"))
26 |       .filter(_.nonEmpty)
27 |       .map((_, 1))
28 |       .keyBy(0)
29 |       .sum(1)
30 |       .map(x => WordCount(x._1, x._2))
31 | 
32 | 
33 |     val table: Table = blinkStreamTable.fromDataStream(wcStream)
34 | 
35 | 
36 |     blinkStreamTable.createTemporaryView("wd", table)
37 | 
38 |     var sql =
39 |       """
40 |         |select * from wd
41 |       """.stripMargin
42 |     blinkStreamTable.sqlQuery(sql).printSchema()
43 |     sql =
44 |       s"""
45 |          |select word , count(`count`) from wd group by word
46 |          |""".stripMargin
47 | 
48 | 
49 | 
50 | 
51 | 
52 |     val jdbcOpition = JDBCOptions.builder()
53 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
54 |       .setDriverName("com.mysql.jdbc.Driver")
55 |       .setUsername("root")
56 |       .setPassword("111111")
57 |       .setTableName("wordcount")
58 |       .build()
59 | 
60 |     val tableSchema = TableSchema.builder()
61 |       .field("word", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
62 |       .field("count", new AtomicDataType(new BigIntType()))
63 |       .build()
64 | 
65 |     val jdbcUpsertTableSink =  JDBCUpsertTableSink.builder()
66 |       .setOptions(jdbcOpition)
67 |       .setFlushIntervalMills(1000)
68 |       .setFlushMaxSize(1024*1024*12)
69 |       .setTableSchema(tableSchema)
70 |       .build()
71 |     jdbcUpsertTableSink.setKeyFields(Array("word"))
72 |     jdbcUpsertTableSink.setIsAppendOnly(false)
73 | //    blinkStreamTable.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcUpsertTableSink)
74 |     blinkStreamTable.registerTableSink("word1", jdbcUpsertTableSink)
75 | 
76 | 
77 |     //注意这里， 这里的 sql 需要的是 有 聚合操作的，如果没有的话， 那么 结果表里面就会出现多条记录，因为  jdbcUpsertTableSink 里面的 IsAppendOnly，KeyFields 是 flink 执行计划推断出来的
78 |     blinkStreamTable.sqlQuery(sql).insertInto("word1")
79 | 
80 | 
81 |     blinkStreamTable.execute("WriteDataByJDBCTableUpsertSink")
82 | 
83 |   }
84 | 
85 |   case class WordCount(word:String, count:Int)
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/StreamQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream
 2 | 
 3 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
 4 | import org.apache.flink.streaming.api.scala._
 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table}
 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description
11 |   * @Date Create in 2020-04-18
12 |   * @Time 21:05
13 |   */
14 | object StreamQuery {
15 |   def main(args: Array[String]): Unit = {
16 |     val blinkStreamSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
17 |     val streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
18 | 
19 |     val streamTableEnv: StreamTableEnvironment = StreamTableEnvironment.create(streamExecutionEnvironment, blinkStreamSettings)
20 | 
21 |     val fileSourcePath = "/Users/yyb/Downloads/1.txt"
22 | 
23 |     val wcStream: DataStream[(String, Int)] = streamExecutionEnvironment.readTextFile(fileSourcePath)
24 |       .flatMap(_.split("\\W+"))
25 |       .filter(_.nonEmpty)
26 |       .map((_, 1))
27 |       .keyBy(0)
28 |       .sum(1)
29 | 
30 | 
31 |     val table: Table = streamTableEnv.fromDataStream(wcStream)
32 | 
33 |     streamTableEnv.createTemporaryView("wd", table)
34 | 
35 |     var sql =
36 |       """
37 |         |select * from wd
38 |       """.stripMargin
39 | 
40 |     streamTableEnv.sqlQuery(sql).printSchema()
41 | 
42 |     val dataStream: DataStream[WD] =  streamTableEnv.toAppendStream[WD](table)
43 | 
44 |     dataStream.print()
45 | 
46 |     streamTableEnv.execute("StreamQuery")
47 |   }
48 | 
49 |   case class WD(word:String, count:Int)
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/hive/WriteData2HiveJavaReadFromkafkaTableSource.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.hive;
 2 | 
 3 | import com.yyb.flink10.commonEntity.Pi;
 4 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo;
 7 | import org.apache.flink.formats.json.JsonRowDeserializationSchema;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource;
11 | import org.apache.flink.streaming.connectors.kafka.config.StartupMode;
12 | import org.apache.flink.table.api.DataTypes;
13 | import org.apache.flink.table.api.EnvironmentSettings;
14 | import org.apache.flink.table.api.Table;
15 | import org.apache.flink.table.api.TableSchema;
16 | import org.apache.flink.table.api.java.StreamTableEnvironment;
17 | import org.apache.flink.table.catalog.hive.HiveCatalog;
18 | import org.apache.flink.table.descriptors.Schema;
19 | 
20 | import java.util.Collections;
21 | import java.util.Optional;
22 | import java.util.Properties;
23 | 
24 | 
25 | /**
26 |  * @Author yyb
27 |  * @Description
28 |  * @Date Create in 2020-07-07
29 |  * @Time 14:28
30 |  */
31 | public class WriteData2HiveJavaReadFromkafkaTableSource {
32 |     public static void main(String[] args) throws Exception {
33 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
34 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
36 |         Schema schema = new Schema();
37 |         TableSchema tableSchema = TableSchema.builder()
38 |                 .field("id", DataTypes.STRING())
39 |                 .field("time", DataTypes.STRING())
40 |                 .build();
41 |         schema.schema(tableSchema);
42 |         Properties prop = new Properties();
43 |         prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181");
44 |         prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092");
45 |         prop.put("group.id", "yyb_dev1");
46 | 
47 |         TypeInformation[] types = new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO};
48 |         String[] fields = new String[]{"id", "time"};
49 |         RowTypeInfo rowTypeINfo = new RowTypeInfo(types, fields);
50 |         JsonRowDeserializationSchema jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(rowTypeINfo).build();
51 | //        Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema);
52 |         //指定 从 kafka 的 earliest 开始消费
53 |         Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, Optional.empty(), Collections.emptyList(), Optional.empty(),"eventsource_yhj", prop, jsonRowDeserializationSchema
54 |         , StartupMode.EARLIEST, Collections.emptyMap());
55 | 
56 |         Table kafkaSource = tableEnv.fromTableSource(kafka);
57 | 
58 |         tableEnv.createTemporaryView("default_catalog.kafkaSource", kafkaSource);
59 | 
60 |         String sql ="select * from default_catalog.kafkaSource";
61 |         tableEnv.sqlQuery(sql).printSchema();
62 | 
63 |         String name = "myhive";
64 |         String defaultDatabase = "test";
65 |         String hiveConfDir = WriteData2HiveJavaReadFromkafkaTableSource.class.getResource("/").getFile();  //可以通过这一种方式设置 hiveConfDir，这样的话，开发与测试和生产环境可以保持一致
66 | 
67 | //    val version = "2.3.6"
68 |         String version = "1.1.0";
69 |         HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version);
70 | 
71 |         tableEnv.registerCatalog("myhive", hive);
72 |         tableEnv.useCatalog("myhive");
73 | 
74 |         sql = "insert into myhive.test.a select * from default_catalog.kafkaSource";
75 |         tableEnv.sqlUpdate(sql);
76 | 
77 |         DataStream<Pi> kafkaSourceDataStream = tableEnv.toAppendStream(kafkaSource, Pi.class);
78 |          kafkaSourceDataStream.print().setParallelism(1);
79 |         tableEnv.execute("WriteData2Hive");
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/hive/WriteData2HiveReadFromkafkaTableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.hive
 2 | 
 3 | import java.util.Properties
 4 | 
 5 | import com.yyb.flink10.commonEntity.Pi
 6 | import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation}
 7 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 8 | import org.apache.flink.formats.json.JsonRowDeserializationSchema
 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
10 | import org.apache.flink.streaming.api.scala._
11 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource
12 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema}
13 | import org.apache.flink.table.api.scala.StreamTableEnvironment
14 | import org.apache.flink.table.catalog.hive.HiveCatalog
15 | import org.apache.flink.table.descriptors.Schema
16 | 
17 | /**
18 |   * @Author yyb
19 |   * @Description 注意使用 java 版本的代码
20 |   *             本代码 在 new RowTypeInfo 的时候报错
21 |   * @Date Create in 2020-07-07
22 |   * @Time 10:26
23 |   */
24 | object WriteData2HiveReadFromkafkaTableSource {
25 |   def main(args: Array[String]): Unit = {
26 |     val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
27 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
28 |     val tableEnv = StreamTableEnvironment.create(env, settings)
29 | 
30 |     val schema = new Schema()
31 |     val tableSchema: TableSchema =  TableSchema.builder()
32 |       .field("id", DataTypes.STRING())
33 |       .field("time", DataTypes.STRING())
34 |       .build()
35 |     schema.schema(tableSchema)
36 |     val prop = new Properties()
37 |     prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
38 |     prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
39 |     prop.put("group.id", "yyb_dev")
40 | 
41 |     val types: Array[BasicTypeInfo[String]] =  Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
42 |     val fields: Array[String] = Array("id", "time")
43 | //    val rowTypeINfo = new RowTypeInfo(types, fields)
44 |     val jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(schema.toString).build()
45 | 
46 |     val kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema)
47 | 
48 |     val kafkaSource: Table = tableEnv.fromTableSource(kafka)
49 |     tableEnv.createTemporaryView("kafkaSource", kafkaSource)
50 | 
51 | 
52 |     val name = "myhive"
53 |     val defaultDatabase = "flink"
54 |     val hiveConfDir = this.getClass.getResource("/").getFile  //可以通过这一种方式设置 hiveConfDir，这样的话，开发与测试和生产环境可以保持一致
55 | 
56 | //    val version = "2.3.6"
57 |     val version = "1.1.0"
58 |     val hive = new  HiveCatalog(name, defaultDatabase, hiveConfDir, version)
59 | 
60 |     tableEnv.registerCatalog("myhive", hive)
61 |     tableEnv.useCatalog("myhive")
62 | 
63 |     var sql =
64 |       s"""
65 |          |insert into table myhive.${defaultDatabase}.a select * from kafkaSource
66 |          |""".stripMargin
67 |     tableEnv.sqlUpdate(sql)
68 | 
69 |     val kafkaSourceDataStream: DataStream[Pi] = tableEnv.toAppendStream[Pi](kafkaSource)
70 |     kafkaSourceDataStream.print().setParallelism(1)
71 | 
72 |     tableEnv.execute("WriteData2Hive")
73 | 
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/join/temporaltable/TemporalTableDemo.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.join.temporaltable;
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.JDBCLookupOptions;
 4 | import org.apache.flink.api.java.io.jdbc.JDBCOptions;
 5 | import org.apache.flink.api.java.io.jdbc.JDBCReadOptions;
 6 | import org.apache.flink.api.java.io.jdbc.JDBCTableSource;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.table.api.EnvironmentSettings;
10 | import org.apache.flink.table.api.Table;
11 | import org.apache.flink.table.api.TableSchema;
12 | import org.apache.flink.table.api.java.StreamTableEnvironment;
13 | import org.apache.flink.table.types.AtomicDataType;
14 | import org.apache.flink.table.types.logical.DateType;
15 | import org.apache.flink.table.types.logical.IntType;
16 | import org.apache.flink.table.types.logical.VarCharType;
17 | import org.apache.flink.types.Row;
18 | 
19 | /**
20 |  * @Author yyb
21 |  * @Description
22 |  * @Date Create in 2020-07-27
23 |  * @Time 16:14
24 |  */
25 | public class TemporalTableDemo {
26 |     public static void main(String[] args) throws Exception {
27 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
28 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
29 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
30 | 
31 |         JDBCLookupOptions lookOption = JDBCLookupOptions.builder()
32 |                 .setCacheExpireMs(60 * 1000)
33 |                 .setCacheMaxSize(1024 * 1024)
34 |                 .setMaxRetryTimes(10)
35 |                 .build();
36 | 
37 |         JDBCOptions jdbcOpition = JDBCOptions.builder()
38 |                 .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
39 |                 .setDriverName("com.mysql.jdbc.Driver")
40 |                 .setUsername("root")
41 |                 .setPassword("111111")
42 |                 .setTableName("RatesHistory")
43 |                 .build();
44 | 
45 |         JDBCReadOptions jdbcReadOption = JDBCReadOptions.builder()
46 |                 .setFetchSize(5000)
47 |                 .build();
48 | 
49 |         TableSchema tableSchema = TableSchema.builder()
50 |                 .field("rowtime", new AtomicDataType(new VarCharType(2147483647)))
51 |                 .field("currency", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
52 |                 .field("rate", new AtomicDataType(new IntType()))
53 |                 .build();
54 | 
55 |         JDBCTableSource jdbcTableSource = JDBCTableSource.builder()
56 |                 .setLookupOptions(lookOption)
57 |                 .setOptions(jdbcOpition)
58 |                 .setReadOptions(jdbcReadOption)
59 |                 .setSchema(tableSchema)
60 |                 .build();
61 | 
62 |         blinkTableEnv.registerTableSource("LatestRates", jdbcTableSource);
63 | 
64 |         blinkTableEnv.registerFunction("jdbcLookup", jdbcTableSource.getLookupFunction(new String[]{"currency"}));
65 | 
66 |         // 注意不能直接 访问 时态表的数据
67 |         String sql = "SELECT * FROM LatestRates FOR SYSTEM_TIME AS OF Timestamp '2020-07-27 16:30:15'";
68 |         sql = "select * from LatestRates";
69 |         Table rs1 = blinkTableEnv.sqlQuery(sql);
70 |         DataStream<Row> rs1DataStream = blinkTableEnv.toAppendStream(rs1, Row.class);
71 |         rs1DataStream.print().setParallelism(1);
72 | 
73 |         blinkTableEnv.execute("TemporalTableDemo");
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/join/temporaltable/TemporalTableFunction.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.join.temporaltable;
 2 | 
 3 | 
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStream;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.EnvironmentSettings;
 9 | import org.apache.flink.table.api.Table;
10 | import org.apache.flink.table.api.java.StreamTableEnvironment;
11 | import org.apache.flink.types.Row;
12 | 
13 | import java.util.ArrayList;
14 | import java.util.List;
15 | 
16 | /**
17 |  * @Author yyb
18 |  * @Description 时态表 函数
19 |  * @Date Create in 2020-07-27
20 |  * @Time 15:44
21 |  */
22 | public class TemporalTableFunction {
23 |     public static void main(String[] args) throws Exception {
24 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
25 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
26 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
27 |         List<Tuple2<String, Long>> ratesHistoryData = new ArrayList<>();
28 |         ratesHistoryData.add(Tuple2.of("US Dollar", 102L));
29 |         ratesHistoryData.add(Tuple2.of("Euro", 114L));
30 |         ratesHistoryData.add(Tuple2.of("Yen", 1L));
31 |         ratesHistoryData.add(Tuple2.of("Euro", 116L));
32 |         ratesHistoryData.add(Tuple2.of("Euro", 119L));
33 | 
34 |         DataStreamSource<Tuple2<String, Long>> ratesHistoryStream = env.fromCollection(ratesHistoryData);
35 |         //加入 processtime 属性
36 |         Table ratesHistory = blinkTableEnv.fromDataStream(ratesHistoryStream, "r_currency, r_rate, r_proctime.proctime");
37 |         blinkTableEnv.createTemporaryView("RatesHistory", ratesHistory);
38 | 
39 |         //创建 Temporal Table Function
40 |         org.apache.flink.table.functions.TemporalTableFunction rates = ratesHistory.createTemporalTableFunction("r_proctime", "r_currency");
41 |         blinkTableEnv.registerFunction("Rates", rates);
42 | 
43 |         // 注意不能直接 访问 时态表函数的数据
44 |         String sql = "SELECT * FROM RatesHistory FOR SYSTEM_TIME AS OF TIME '16:01:15'";
45 | 
46 |         Table rs1 = blinkTableEnv.sqlQuery(sql);
47 |         DataStream<Row> rs1DataStream = blinkTableEnv.toAppendStream(rs1, Row.class);
48 |         rs1DataStream.print().setParallelism(1);
49 | 
50 |         blinkTableEnv.execute("TemporalTableFunction");
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/EventTimeDemo.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import com.yyb.flink10.commonEntity.Current1;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.streaming.api.CheckpointingMode;
 7 | import org.apache.flink.streaming.api.TimeCharacteristic;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.streaming.api.functions.ProcessFunction;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
14 | import org.apache.flink.table.api.EnvironmentSettings;
15 | import org.apache.flink.table.api.Table;
16 | import org.apache.flink.table.api.java.StreamTableEnvironment;
17 | import org.apache.flink.types.Row;
18 | import org.apache.flink.util.Collector;
19 | 
20 | import java.io.InputStream;
21 | import java.util.Properties;
22 | 
23 | /**
24 |  * @Author yyb
25 |  * @Description
26 |  * @Date Create in 2020-08-10
27 |  * @Time 18:04
28 |  */
29 | public class EventTimeDemo {
30 |     public static void main(String[] args) throws Exception {
31 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
34 | 
35 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
36 |         env.enableCheckpointing(3000);
37 |         env.getCheckpointConfig().setTolerableCheckpointFailureNumber(3);
38 |         env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
39 |         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
40 | 
41 |         env.getConfig().setAutoWatermarkInterval(1000);
42 | 
43 |         InputStream in_env  = ClassLoader.getSystemResourceAsStream("env.properties");
44 |         Properties prop = new Properties();
45 |         prop.load(in_env);
46 | 
47 | 
48 | 
49 |         Properties properties = new Properties();
50 |         properties.setProperty("bootstrap.servers", prop.getProperty("bootstrap.servers"));
51 |         properties.setProperty("zookeeper.connect", prop.getProperty("zookeeper.connect"));
52 |         properties.setProperty("group.id", "test");
53 | 
54 |         FlinkKafkaConsumer011<String> kafkaSource = new FlinkKafkaConsumer011<String>("eventsource_yyb", new SimpleStringSchema(), properties);
55 |         DataStream<String> stream = env.addSource(kafkaSource);
56 | 
57 | 
58 |         DataStream<Current1> currentDS = stream.process(new ProcessFunction<String, Current1>() {
59 |             @Override
60 |             public void processElement(String value, Context ctx, Collector<Current1> out) throws Exception {
61 |                 Current1 current1 = JSON.parseObject(value, Current1.class);
62 |                 out.collect(current1);
63 |             }
64 |         });
65 | 
66 |         currentDS.assignTimestampsAndWatermarks(new TimestampExtractor(Time.seconds(0)));
67 | 
68 |         currentDS.print().setParallelism(1);
69 | 
70 |         // sql rowtime
71 |         //注意 第一个 rowtime 是自己的 rowtime，user_action_time.rowtime才是 真正的 eventTime
72 |         Table t = blinkTableEnv.fromDataStream(currentDS, "rowtime,amount,currency,user_action_time.rowtime");
73 | 
74 |         DataStream<Row> tRow = blinkTableEnv.toAppendStream(t, Row.class);
75 |         tRow.print().setParallelism(1);
76 |         env.execute("EventTimeDemo");
77 | 
78 | 
79 |     }
80 | 
81 |     static class TimestampExtractor extends BoundedOutOfOrdernessTimestampExtractor<Current1> {
82 | 
83 |         public TimestampExtractor(Time maxOutOfOrderness){
84 |             super(maxOutOfOrderness);
85 |         }
86 |         @Override
87 |         public long extractTimestamp(Current1 element) {
88 |             return Long.parseLong(element.getRowtime());
89 |         }
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaConnector.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka
 2 | 
 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 4 | import org.apache.flink.streaming.api.scala._
 5 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema}
 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 7 | import org.apache.flink.table.descriptors.{ConnectTableDescriptor, Json, Kafka, Schema}
 8 | 
 9 | /**
10 |   * @Author yyb
11 |   * @Description
12 |   * @Date Create in 2020-06-10
13 |   * @Time 09:32
14 |   */
15 | object ReadDataFromKafkaConnector {
16 |   def main(args: Array[String]): Unit = {
17 |     val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
18 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
19 |     val flinkTableEnv = StreamTableEnvironment.create(env, settings)
20 | 
21 |     val kafka = new Kafka()
22 |     kafka.version("0.11")
23 |       .topic("eventsource_yhj")
24 |       .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
25 |       .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
26 |       .property("group.id", "yyb_dev")
27 |       .startFromEarliest()
28 | 
29 |     val schema = new Schema()
30 |     val tableSchema =  TableSchema.builder()
31 |         .field("id", DataTypes.STRING())
32 |         .field("time", DataTypes.STRING())
33 |         .build()
34 |     schema.schema(tableSchema)
35 |     val tableSource: ConnectTableDescriptor =  flinkTableEnv.connect(kafka)
36 |       .withFormat( new Json().failOnMissingField(true) )
37 |       .withSchema(schema)
38 |     tableSource.createTemporaryTable("test")
39 |     var sql = "select * from test"
40 | 
41 |     val test: Table =  flinkTableEnv.from("test")
42 |     test.printSchema()
43 | 
44 | 
45 |     val testDataStream: DataStream[Pi] =  flinkTableEnv.toAppendStream[Pi](test)
46 | 
47 |     testDataStream.print().setParallelism(1)
48 | 
49 |     flinkTableEnv.execute("ReadDataFromKafkaConnector")
50 | 
51 | 
52 |   }
53 | 
54 |   case class Pi(
55 |                id:String,
56 |                time:String
57 |                )
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaConnectorJava.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka;
 2 | 
 3 | 
 4 | import com.yyb.flink10.util1.GeneratorClassByASM;
 5 | import net.sf.cglib.core.ReflectUtils;
 6 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 7 | import org.apache.flink.api.java.typeutils.TupleTypeInfo;
 8 | import org.apache.flink.core.fs.Path;
 9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink;
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
14 | import org.apache.flink.table.api.DataTypes;
15 | import org.apache.flink.table.api.EnvironmentSettings;
16 | import org.apache.flink.table.api.Table;
17 | import org.apache.flink.table.api.TableSchema;
18 | import org.apache.flink.table.api.java.StreamTableEnvironment;
19 | import org.apache.flink.table.descriptors.ConnectTableDescriptor;
20 | import org.apache.flink.table.descriptors.Json;
21 | import org.apache.flink.table.descriptors.Kafka;
22 | import org.apache.flink.table.descriptors.Schema;
23 | import org.apache.flink.types.Row;
24 | 
25 | /**
26 |   * 注意 这里 涉及到了 ASM 动态产生 class  并加载的 内容，可以参考 https://blog.csdn.net/u010374412/article/details/106714721 博文
27 |   * @Author yyb
28 |   * @Description
29 |   * @Date Create in 2020-06-10
30 |   * @Time 09:32
31 |   */
32 | public class ReadDataFromKafkaConnectorJava {
33 |   public static void main(String[] args) throws Exception {
34 | 
35 |     /**
36 |      * 这里是 ASM 生产 动态 class 类，不用理会。
37 |      */
38 |     String packageName = "com.yyb.flink10.xxx.";
39 |     String className = "Pi";
40 |     byte[] byteOfClass = GeneratorClassByASM.geneClassMain(packageName, className);
41 |     Class piCLass = ReflectUtils.defineClass(packageName + className, byteOfClass, ReadDataFromKafkaConnectorJava.class.getClassLoader());
42 |     Class<?> xx = Class.forName(packageName + className);
43 |     System.out.println(xx.newInstance());
44 | 
45 | 
46 |     EnvironmentSettings settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
47 |     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
48 |     env.registerType(piCLass);
49 | 
50 |     StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, settings);
51 | 
52 | 
53 |     Kafka kafka = new Kafka();
54 |     kafka.version("0.11")
55 |             .topic("eventsource_yhj")
56 |             .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181")
57 |             .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092")
58 |             .property("group.id", "yyb_dev")
59 |             .startFromEarliest();
60 | 
61 |     Schema schema = new Schema();
62 |     TableSchema tableSchema = TableSchema.builder()
63 |             .field("id", DataTypes.STRING())
64 |             .field("time", DataTypes.STRING())
65 |             .build();
66 |     schema.schema(tableSchema);
67 |     ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka)
68 |             .withFormat(new Json().failOnMissingField(true))
69 |             .withSchema(schema);
70 |     tableSource.createTemporaryTable("test");
71 |     String sql = "select * from test";
72 | 
73 |     Table test = flinkTableEnv.from("test");
74 |     test.printSchema();
75 | 
76 | 
77 |     /**
78 |      * 注意 TupleTypeInfoBase 这个 抽象类 有3个直接实现
79 |      * BaseRowTypeInfo, RowTypeInfo, TupleTypeInfo
80 |      * 目前这个程序 只是用了 TupleTypeInfo 这个类
81 |      */
82 |     TupleTypeInfo tupleTypeInfo = new TupleTypeInfo(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
83 | //    DataStream<?> testDataStream = flinkTableEnv.toAppendStream(test, piCLass); //使用 Class 的方式
84 |     DataStream testDataStream = flinkTableEnv.toAppendStream(test, tupleTypeInfo);  //使用 TypeInformation 的方式
85 |     testDataStream.print().setParallelism(1);
86 | 
87 | //    DataStream<Row> testDataStream1 = flinkTableEnv.toAppendStream(test, Row.class);
88 | //
89 | //    String fileSinkPath = "./xxx.text/rs7/";
90 | //    StreamingFileSink<Row> sink = StreamingFileSink.forBulkFormat(
91 | //            new Path(fileSinkPath),
92 | //            ParquetAvroWriters.forReflectRecord(Row.class))
93 | //            .withRollingPolicy(OnCheckpointRollingPolicy.build()).build();
94 | //    testDataStream1.addSink(sink);
95 |     flinkTableEnv.execute("ReadDataFromKafkaConnector");
96 |   }
97 | 
98 | }
99 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka
 2 | 
 3 | import java.util
 4 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 5 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSourceSinkFactory
 6 | import org.apache.flink.table.api.EnvironmentSettings
 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 8 | 
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-06-09
14 |   * @Time 22:21
15 |   */
16 | object ReadDataFromKafkaSource {
17 |   def main(args:Array[String])={
18 |     val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     val flinkTableEnv = StreamTableEnvironment.create(env, settings)
21 | 
22 |     val kafkaSourceFactory = new Kafka010TableSourceSinkFactory()
23 |     val proper = new util.HashMap[String, String]()
24 |     val kafkaSource = kafkaSourceFactory.createStreamTableSource(proper)
25 | 
26 |     flinkTableEnv.registerTableSource( "kafka", kafkaSource)
27 | 
28 |     env.execute("")
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaSourceJava.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka;
 2 | 
 3 | import org.apache.flink.api.common.serialization.DeserializationSchema;
 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource;
 9 | import org.apache.flink.table.api.EnvironmentSettings;
10 | import org.apache.flink.table.api.TableSchema;
11 | import org.apache.flink.table.api.java.StreamTableEnvironment;
12 | import java.util.Properties;
13 | 
14 | /**
15 |  * @Author yyb
16 |  * @Description
17 |  * @Date Create in 2020-06-09
18 |  * @Time 22:46
19 |  */
20 | public class ReadDataFromKafkaSourceJava {
21 |     public static void main(String[] args){
22 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build();
23 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, settings);
25 |         BasicTypeInfo<String> field1 = BasicTypeInfo.STRING_TYPE_INFO;
26 |         BasicTypeInfo<String> field2 = BasicTypeInfo.STRING_TYPE_INFO;
27 |         TableSchema schema = new TableSchema(new String[]{"field1", "field2"}, new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO});
28 |         String topic = "topic";
29 |         Properties properties = new Properties();
30 |         DeserializationSchema deserializationSchema = new SimpleStringSchema();
31 |         Kafka010TableSource kafkaSource = new Kafka010TableSource(schema, topic, properties, deserializationSchema);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/WriteToKafkaByKafkaConnectorOfOrder.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka;
 2 | 
 3 | import com.yyb.flink10.commonEntity.Current1;
 4 | import com.yyb.flink10.commonEntity.Current2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | import org.apache.flink.table.api.DataTypes;
 8 | import org.apache.flink.table.api.EnvironmentSettings;
 9 | import org.apache.flink.table.api.Table;
10 | import org.apache.flink.table.api.TableSchema;
11 | import org.apache.flink.table.api.java.StreamTableEnvironment;
12 | import org.apache.flink.table.descriptors.ConnectTableDescriptor;
13 | import org.apache.flink.table.descriptors.Json;
14 | import org.apache.flink.table.descriptors.Kafka;
15 | import org.apache.flink.table.descriptors.Schema;
16 | 
17 | import java.io.IOException;
18 | import java.io.InputStream;
19 | import java.util.ArrayList;
20 | import java.util.Date;
21 | import java.util.Properties;
22 | 
23 | /**
24 |  * @Author yyb
25 |  * @Description 注意 在 join的时候，是由 水印 触发的 （即 每当 新的水印大于 旧的水印 才会触发计算， join 的时候，由所有流中的 min 的水印决定 这个 join 的水印）
26 |  * @Date Create in 2020-08-03
27 |  * @Time 08:53
28 |  */
29 | public class WriteToKafkaByKafkaConnectorOfOrder {
30 |     public static void main(String [] args) throws Exception {
31 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
34 |         InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties");
35 |         Properties prop = new Properties();
36 |         prop.load(in_env);
37 |         System.out.println(prop.getProperty("zookeeper.connect"));
38 | 
39 |         Kafka kafka = new Kafka();
40 |         kafka.version("0.11")
41 |                 .topic("eventsource_yyb_order")
42 |                 .property("zookeeper.connect", prop.getProperty("zookeeper.connect"))
43 |                 .property("bootstrap.servers", prop.getProperty("bootstrap.servers")).
44 |                 property("group.id", "yyb_dev")
45 |                 .startFromLatest();
46 |         Schema schema = new Schema();
47 |         TableSchema tableSchema1 = TableSchema.builder()
48 |                 .field("rowtime", DataTypes.STRING())
49 |                 .field("amount", DataTypes.INT())
50 |                 .field("currency", DataTypes.STRING())
51 |                 .field("eventTime", DataTypes.BIGINT())
52 |                 .build();
53 |         schema.schema(tableSchema1);
54 |         ConnectTableDescriptor tableSource = blinkTableEnv.connect(kafka)
55 |                 .withFormat(new Json().failOnMissingField(true))
56 |                 .withSchema(schema);
57 |         tableSource.createTemporaryTable("Orders");
58 | 
59 |         ArrayList data = new ArrayList();
60 |         data.add(new Current2( "2016-01-01 00:00:00",3, "Euro", 0L));
61 | 
62 |         DataStreamSource dataDS = env.fromCollection(data);
63 |         Table dataTable = blinkTableEnv.fromDataStream(dataDS);
64 |         blinkTableEnv.registerTable("source", dataTable);
65 | 
66 |         String sql = "insert into Orders select rowtime, amount, currency, eventTime from source";
67 | 
68 |         blinkTableEnv.sqlUpdate(sql);
69 | 
70 |         env.execute("WriteToKafkaByKafkaConnector");
71 |     }
72 | 
73 | 
74 | 
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/blink/stream/kafka/WriteToKafkaByKafkaConnectorOfRates.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.blink.stream.kafka;
 2 | 
 3 | import com.yyb.flink10.commonEntity.Current1;
 4 | import com.yyb.flink10.commonEntity.Rate;
 5 | import com.yyb.flink10.commonEntity.Rate2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.DataTypes;
 9 | import org.apache.flink.table.api.EnvironmentSettings;
10 | import org.apache.flink.table.api.Table;
11 | import org.apache.flink.table.api.TableSchema;
12 | import org.apache.flink.table.api.java.StreamTableEnvironment;
13 | import org.apache.flink.table.descriptors.ConnectTableDescriptor;
14 | import org.apache.flink.table.descriptors.Json;
15 | import org.apache.flink.table.descriptors.Kafka;
16 | import org.apache.flink.table.descriptors.Schema;
17 | 
18 | import java.io.InputStream;
19 | import java.text.SimpleDateFormat;
20 | import java.util.ArrayList;
21 | import java.util.Date;
22 | import java.util.Properties;
23 | 
24 | /**
25 |  * @Author yyb
26 |  * @Description
27 |  * @Date Create in 2020-08-03
28 |  * @Time 08:53
29 |  */
30 | public class WriteToKafkaByKafkaConnectorOfRates {
31 |     public static void main(String [] args) throws Exception {
32 |         EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
33 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
34 |         StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings);
35 |         InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties");
36 |         Properties prop = new Properties();
37 |         prop.load(in_env);
38 |         System.out.println(prop.getProperty("zookeeper.connect"));
39 | 
40 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
41 | 
42 |         Kafka kafka = new Kafka();
43 |         kafka.version("0.11")
44 |                 .topic("eventsource_yyb_rate")
45 |                 .property("zookeeper.connect", prop.getProperty("zookeeper.connect"))
46 |                 .property("bootstrap.servers", prop.getProperty("bootstrap.servers")).
47 |                 property("group.id", "yyb_dev")
48 |                 .startFromLatest();
49 | 
50 |         Schema schema = new Schema();
51 |         TableSchema tableSchema1 = TableSchema.builder()
52 |                 .field("rowtime", DataTypes.STRING())
53 |                 .field("currency", DataTypes.STRING())
54 |                 .field("rate", DataTypes.INT())
55 |                 .field("eventTime", DataTypes.BIGINT())
56 |                 .build();
57 |         schema.schema(tableSchema1);
58 |         ConnectTableDescriptor tableSource = blinkTableEnv.connect(kafka)
59 |                 .withFormat(new Json().failOnMissingField(true))
60 |                 .withSchema(schema);
61 |         tableSource.createTemporaryTable("Rates");
62 | 
63 |         ArrayList data = new ArrayList();
64 |         data.add(new Rate2("2016-01-01 00:00:02", "Euro", 120, 0L));
65 | 
66 |         DataStreamSource dataDS = env.fromCollection(data);
67 |         Table dataTable = blinkTableEnv.fromDataStream(dataDS);
68 |         blinkTableEnv.registerTable("source", dataTable);
69 | 
70 |         String sql = "insert into Rates select rowtime,currency,rate,eventTime from source";
71 | 
72 |         blinkTableEnv.sqlUpdate(sql);
73 | 
74 |         env.execute("WriteToKafkaByKafkaConnector");
75 |     }
76 | 
77 | 
78 | 
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/BatchQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch
 2 | 
 3 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
 4 | import org.apache.flink.api.scala._
 5 | import org.apache.flink.table.api.Table
 6 | import org.apache.flink.table.api.scala.BatchTableEnvironment
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description
11 |   * @Date Create in 2020-04-18
12 |   * @Time 21:05
13 |   */
14 | object BatchQuery {
15 |   def main(args: Array[String]): Unit = {
16 |     val env = ExecutionEnvironment.getExecutionEnvironment
17 |     val batchTableEnv: BatchTableEnvironment = BatchTableEnvironment.create(env)
18 | 
19 |     val words = "hello flink hello lagou"
20 |     val WDS = words.split("\\W+").map(WD(_, 1))
21 | 
22 |     val input: DataSet[WD] = env.fromCollection(WDS)
23 | 
24 |     val table: Table = batchTableEnv.fromDataSet(input)
25 | 
26 | 
27 |     batchTableEnv.createTemporaryView("wordcount", table)
28 | 
29 |     batchTableEnv.sqlQuery("select * from wordcount").printSchema()
30 | 
31 |     val datasetOfTable: DataSet[WD] =  batchTableEnv.toDataSet[WD](table)
32 | 
33 |     datasetOfTable.printToErr()
34 | 
35 | 
36 | 
37 |     batchTableEnv.execute("BatchQuery")
38 | 
39 |   }
40 | 
41 |   case class WD(word:String, count:Int)
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/BatchReadFromParquetQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch
 2 | 
 3 | import org.apache.flink.api.scala.ExecutionEnvironment
 4 | import org.apache.flink.api.scala._
 5 | import org.apache.flink.formats.parquet.ParquetTableSource
 6 | import org.apache.flink.table.api.Table
 7 | import org.apache.flink.table.api.scala.BatchTableEnvironment
 8 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
 9 | import org.apache.parquet.schema.{MessageType, PrimitiveType}
10 | import org.apache.parquet.schema.Type.Repetition
11 | 
12 | /**
13 |   * @Author yyb
14 |   * @Description
15 |   * @Date Create in 2020-04-23
16 |   * @Time 17:40
17 |   */
18 | object BatchReadFromParquetQuery {
19 |   def main(args: Array[String]): Unit = {
20 |     val env = ExecutionEnvironment.getExecutionEnvironment
21 |     val batchTableEnv = BatchTableEnvironment.create(env)
22 | 
23 | 
24 | 
25 | 
26 |     val word = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "word")
27 |     val count = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "count")
28 |     val schema = new MessageType("word", word, count)
29 | 
30 |     val parquetFile = new ParquetTableSource.Builder()
31 |       .path("./xxx.text/rs2/2020-04-23--21/.part-0-0.parquet.inprogress.a6a2a7cd-98bf-4397-8e7d-558b1bb932aa")
32 |       .forParquetSchema(schema)
33 |         .build()
34 | 
35 |     batchTableEnv.registerTableSource("xx", parquetFile)
36 | 
37 |     val sql =
38 |       """
39 |         |select * from xx
40 |       """.stripMargin
41 |     val q1: Table =  batchTableEnv.sqlQuery(sql)
42 | 
43 |     q1.printSchema()
44 | 
45 |     batchTableEnv.toDataSet[WC](q1).print()
46 |   }
47 | 
48 |   case class WC(word:String, ct:Int)
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/BatchReadFromSequenceQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch
 2 | 
 3 | import org.apache.flink.api.scala.ExecutionEnvironment
 4 | import org.apache.flink.api.scala._
 5 | import org.apache.flink.table.api.scala.BatchTableEnvironment
 6 | import org.datanucleus.store.rdbms.valuegenerator.SequenceTable
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description
11 |   * @Date Create in 2020-04-24
12 |   * @Time 09:29
13 |   */
14 | object BatchReadFromSequenceQuery {
15 |   def main(args: Array[String]): Unit = {
16 |     val env = ExecutionEnvironment.getExecutionEnvironment
17 |     val batchTableEnv = BatchTableEnvironment.create(env)
18 | 
19 | //    env.readFileOfPrimitives()
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/BatchJDBCReadByInputformat2TableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation
 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat
 5 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder
 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 7 | import org.apache.flink.api.scala.typeutils.Types
 8 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _}
 9 | import org.apache.flink.table.api.Table
10 | import org.apache.flink.table.api.scala.BatchTableEnvironment
11 | import org.apache.flink.table.descriptors.{BatchTableDescriptor, FileSystem}
12 | import org.apache.flink.table.sinks.CsvTableSink
13 | import org.apache.flink.types.Row
14 | 
15 | /**
16 |   * @Author yyb
17 |   * @Description
18 |   * @Date Create in 2020-04-21
19 |   * @Time 14:15
20 |   */
21 | object BatchJDBCReadByInputformat2TableSource {
22 |   def main(args: Array[String]): Unit = {
23 |     val env = ExecutionEnvironment.getExecutionEnvironment
24 |     val batchTableEnv = BatchTableEnvironment.create(env)
25 | 
26 |     val types =  Array[TypeInformation[_]](Types.STRING, Types.LONG, Types.STRING)
27 |     val fields = Array[String]("MT_KEY1", "MT_KEY2", "MT_COMMENT")
28 |     val typeInfo = new RowTypeInfo(types, fields)
29 | 
30 | 
31 |     val jdbc: JDBCInputFormat = new JDBCInputFormatBuilder()
32 |         .setDBUrl("jdbc:mysql://127.0.0.1:3306/hive?useSSL=false&serverTimezone=UTC")
33 |         .setDrivername("com.mysql.jdbc.Driver")
34 |         .setUsername("hive")
35 |         .setPassword("hive")
36 |         .setQuery("select * from AUX_TABLE")
37 |       .setRowTypeInfo(typeInfo)
38 |         .finish()
39 |     val mysqlSource : DataSet[Row] =  env.createInput(jdbc)
40 | 
41 |     mysqlSource.print()
42 | 
43 | //    val table: ParquetTableSource = new ParquetTableSource()
44 | //    batchTableEnv.registerTableSource("table", table)
45 | 
46 |     val file = new FileSystem() //注意这里只是 实验性质
47 |     val fs: BatchTableDescriptor =  batchTableEnv.connect(file)
48 | 
49 | //    val jdbcTableSink = new JDBCAppendTableSink()
50 | //    batchTableEnv.registerTableSink("jdbcTableSink", jdbcTableSink)
51 | 
52 | 
53 |     val csvTableSink = new CsvTableSink("")
54 | //    batchTableEnv.registerTableSink("csvTableSink", csvTableSink)
55 | 
56 |     val AUX_TABLE: Table =  batchTableEnv.fromDataSet(mysqlSource)
57 |     batchTableEnv.createTemporaryView("AUX_TABLE", AUX_TABLE)
58 | 
59 |     val sql =
60 |       s"""
61 |          |select * from AUX_TABLE
62 |        """.stripMargin
63 |     batchTableEnv.sqlQuery(sql).printSchema()
64 | 
65 | 
66 | 
67 |     //目前来看，只有在 有 sink的情况下，需要 加 execute
68 | //    batchTableEnv.execute("ConnectJDBCBatch")
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/BatchJobReadFromJDBCTableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch.JDBC
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource}
 4 | import org.apache.flink.api.scala.ExecutionEnvironment
 5 | import org.apache.flink.table.api.{Table, TableSchema}
 6 | import org.apache.flink.table.api.scala.BatchTableEnvironment
 7 | import org.apache.flink.table.types.AtomicDataType
 8 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType}
 9 | 
10 | /**
11 |   * @Author yyb
12 |   * @Description
13 |   * @Date Create in 2020-04-26
14 |   * @Time 17:33
15 |   */
16 | object BatchJobReadFromJDBCTableSource {
17 |   def main(args: Array[String]): Unit = {
18 |     val env = ExecutionEnvironment.getExecutionEnvironment
19 |     val batchTableEnv = BatchTableEnvironment.create(env)
20 | 
21 |     val lookOption =  JDBCLookupOptions.builder()
22 |         .setCacheExpireMs(60*1000)
23 |         .setCacheMaxSize(1024*1024)
24 |         .setMaxRetryTimes(10)
25 |         .build()
26 | 
27 |     val jdbcOpition = JDBCOptions.builder()
28 |         .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
29 |        .setDriverName("com.mysql.jdbc.Driver")
30 |        .setUsername("root")
31 |        .setPassword("111111")
32 |         .setTableName("t_order")
33 |         .build()
34 | 
35 |     val jdbcReadOption = JDBCReadOptions.builder()
36 |         .setFetchSize(5000)
37 |         .build()
38 | 
39 |     val tableSchema = TableSchema.builder()
40 |         .field("id", new AtomicDataType(new IntType))
41 |         .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
42 |         .field("time", new AtomicDataType(new DateType))
43 |         .build()
44 | 
45 |     val jdbcTableSource: JDBCTableSource =  JDBCTableSource.builder()
46 |       .setLookupOptions(lookOption)
47 |       .setOptions(jdbcOpition)
48 |       .setReadOptions(jdbcReadOption)
49 |       .setSchema(tableSchema)
50 |       .build()
51 | 
52 |     val t_order: Table = batchTableEnv.fromTableSource(jdbcTableSource)
53 | 
54 |     batchTableEnv.createTemporaryView("t_order", t_order)
55 | 
56 |     val sql =
57 |       s"""
58 |          |select * from t_order
59 |        """.stripMargin
60 |     batchTableEnv.sqlQuery(sql).printSchema()
61 | 
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/WriteJDBCByTableSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.batch.JDBC
 2 | 
 3 | import com.yyb.flink10.table.flink.batch.BatchQuery.WD
 4 | import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation}
 5 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink
 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 7 | import org.apache.flink.api.scala.typeutils.Types
 8 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _}
 9 | import org.apache.flink.table.api.Table
10 | import org.apache.flink.table.api.scala.BatchTableEnvironment
11 | 
12 | /**
13 |   * @Author yyb
14 |   * @Description
15 |   * @Date Create in 2020-04-29
16 |   * @Time 17:25
17 |   */
18 | object WriteJDBCByTableSink {
19 |   def main(args: Array[String]): Unit = {
20 |     val env = ExecutionEnvironment.getExecutionEnvironment
21 |     val batchTableEnv = BatchTableEnvironment.create(env)
22 | 
23 |     val words = "hello flink hello lagou"
24 |     val WDS = words.split("\\W+").map(WD(_, 1))
25 | 
26 |     val input: DataSet[WD] = env.fromCollection(WDS)
27 | 
28 |     val table: Table = batchTableEnv.fromDataSet(input)
29 | 
30 | 
31 |     batchTableEnv.createTemporaryView("wordcount", table)
32 | 
33 | 
34 |     val jdbcAppendTableSink: JDBCAppendTableSink = JDBCAppendTableSink.builder()
35 |         .setBatchSize(2000)
36 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
37 |       .setDrivername("com.mysql.jdbc.Driver")
38 |       .setUsername("root")
39 |       .setPassword("111111")
40 |       .setQuery("insert into wordcount (word, count) values(?, ?)")
41 |       .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)
42 |         .build()
43 | 
44 | 
45 |     batchTableEnv.registerTableSink("wordcount_jdbc",  Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink)
46 | 
47 |     table.insertInto("wordcount_jdbc")
48 | 
49 |     batchTableEnv.execute("WriteJDBCByTableSink")
50 | 
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/batch/kafka/SendData2KafkaByKafkaBatchSink.scala:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.table.flink.batch.kafka
  2 | 
  3 | import java.io.InputStream
  4 | import java.util.Properties
  5 | 
  6 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat
  7 | import com.yyb.flink10.sink.KafkaBatchTableSink
  8 | import org.apache.flink.api.scala._
  9 | import org.apache.flink.formats.json.JsonRowSerializationSchema
 10 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord
 11 | import org.apache.flink.kafka011.shaded.org.apache.kafka.common.serialization.StringSerializer
 12 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 13 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer
 14 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableEnvironment, TableSchema}
 15 | import org.apache.flink.table.api.scala.{BatchTableEnvironment, StreamTableEnvironment}
 16 | import org.apache.flink.table.descriptors.{ConnectTableDescriptor, Json, Kafka, Schema}
 17 | 
 18 | 
 19 | /**
 20 |   * @Author yyb
 21 |   * @Description
 22 |   * @Date Create in 2020-07-28
 23 |   * @Time 16:12
 24 |   */
 25 | object SendData2KafkaByKafkaBatchSink {
 26 |   def main(args: Array[String]): Unit = {
 27 |     val env = ExecutionEnvironment.getExecutionEnvironment
 28 |     val blinkTableEnv = BatchTableEnvironment.create(env)
 29 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
 30 |     val prop: Properties = new Properties()
 31 |     prop.load(in_env)
 32 |     println(prop.getProperty("zookeeper.connect"))
 33 | 
 34 | 
 35 | 
 36 |     val kafka = new Kafka
 37 |     kafka.version("0.11")
 38 |       .topic("eventsource_yhj")
 39 |       .property("zookeeper.connect", prop.getProperty("zookeeper.connect"))
 40 |       .property("bootstrap.servers", prop.getProperty("bootstrap.servers")).
 41 |       property("group.id", "yyb_dev")
 42 |       .startFromLatest
 43 | 
 44 | 
 45 |     val schema = new Schema
 46 |     val tableSchema1 = TableSchema.builder
 47 |       .field("amount", DataTypes.INT)
 48 |       .field("currency", DataTypes.STRING).build
 49 |     schema.schema(tableSchema1)
 50 | 
 51 |     val tableSource = blinkTableEnv.connect(kafka)
 52 |       .withFormat(new Json().failOnMissingField(true))
 53 |       .withSchema(schema)
 54 |     tableSource.createTemporaryTable("Orders_tmp")
 55 | 
 56 |     val schemaString = new JsonRowSerializationSchema.Builder(tableSchema1.toRowType)
 57 |     val kafkaProp = new Properties();
 58 |     kafkaProp.put("key.serializer", classOf[StringSerializer])
 59 |     kafkaProp.put("value.serializer", classOf[StringSerializer])
 60 |     kafkaProp.put("zookeeper.connect", prop.getProperty("zookeeper.connect"))
 61 |     kafkaProp.put("bootstrap.servers", prop.getProperty("bootstrap.servers"))
 62 |     kafkaProp.put("topic", "eventsource_yhj")
 63 | 
 64 |     val kafkaProducer = new FlinkKafkaProducer[String, String](kafkaProp)
 65 |     val data = Array(Current(1, "Euro"))
 66 | 
 67 |     val dataDS = env.fromCollection(data)
 68 | 
 69 |     val datasOfRecord: DataSet[ProducerRecord[String, String]] =  dataDS.map(x => {
 70 |        val record: ProducerRecord[String, String] = new ProducerRecord[String, String]("eventsource_yhj", x.toString)
 71 |       record
 72 |     })
 73 | 
 74 | 
 75 | 
 76 | 
 77 |     /**
 78 |       * 这里的 发送数据 到 kafka是 先 collect 到 driver 才 发送的，所以不是 分布式的处理方法
 79 |       * 需要调优
 80 |       */
 81 | //    datasOfRecord.collect().foreach(kafkaProducer.send(_))
 82 | //    kafkaProducer.flush()
 83 | 
 84 |     /**
 85 |       * 这里使用的是 dataset 的 kafkaOutputFormat
 86 |       */
 87 |     val kafkaOutputFormat = new KafkaOutputFormat(kafkaProp);
 88 | //    dataDS.map(x => x.toString).output(kafkaOutputFormat)
 89 | 
 90 |     val dataTable: Table = blinkTableEnv.fromDataSet(dataDS.map(_.toString))
 91 | 
 92 |     blinkTableEnv.registerTable("dataSource", dataTable)
 93 | 
 94 |     val kafkaBatchTableSink = new KafkaBatchTableSink(kafkaOutputFormat);
 95 |     blinkTableEnv.registerTableSink("kafkaBatchTableSink", kafkaBatchTableSink)
 96 | 
 97 |     var sql =
 98 |       """
 99 |         |insert into kafkaBatchTableSink select * from dataSource
100 |         |""".stripMargin
101 |     //因为 kafka 是 无界的， 所以不能使用 batch 模式 的 kafkatablesink
102 |     //BatchTableSink or OutputFormatTableSink required to emit batch Table.
103 |     blinkTableEnv.sqlUpdate(sql)
104 | 
105 | //    dataTable.insertInto("Orders_tmp")
106 | 
107 | 
108 |     env.execute("SendData2KafkaByKafkaConnector")
109 |   }
110 | 
111 |   case class Current(amount:Int, currency:String){
112 |     override def toString: String = {
113 |       s"""{"amount":"${amount}",currency:"${currency}"}""".stripMargin
114 |     }
115 | 
116 |     def toBytes(): Array[Byte] ={
117 |       toString.getBytes()
118 |     }
119 |   }
120 | }
121 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/InsetMode/AppendOnly.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC.InsetMode;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple2;
 4 | import org.apache.flink.streaming.api.datastream.DataStream;
 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 6 | import org.apache.flink.table.api.Table;
 7 | import org.apache.flink.table.api.java.StreamTableEnvironment;
 8 | import org.apache.flink.types.Row;
 9 | 
10 | /**
11 |  * @Author yyb
12 |  * @Description
13 |  * @Date Create in 2020-08-04
14 |  * @Time 14:57
15 |  */
16 | public class AppendOnly {
17 |     public static void main(String [] args) throws Exception {
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 |         StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env);
20 | 
21 |         DataStream<Tuple2<String, String>> data = env.fromElements(
22 |                 new Tuple2<>("Mary", "./home"),
23 |                 new Tuple2<>("Bob", "./cart"),
24 |                 new Tuple2<>("Mary", "./prod?id=1"),
25 |                 new Tuple2<>("Liz", "./home"),
26 |                 new Tuple2<>("Bob", "./prod?id=3")
27 |         );
28 | 
29 |         Table clicksTable = flinkTableEnv.fromDataStream(data, "user,url");
30 | 
31 |         flinkTableEnv.registerTable("clicks", clicksTable);
32 |         Table rs = flinkTableEnv.sqlQuery("select user , url from clicks where user='Mary'");
33 |         DataStream<Row> rs_ds = flinkTableEnv.toAppendStream(rs, Row.class);
34 |         rs_ds.print().setParallelism(1);
35 | 
36 |         env.execute("AppendOnly");
37 | 
38 |         /**
39 |          * 结果
40 |          * Mary,./prod?id=1
41 |          * Mary,./home
42 |          */
43 | 
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/InsetMode/RetractStream.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC.InsetMode;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeHint;
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStream;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.Table;
 9 | import org.apache.flink.table.api.java.StreamTableEnvironment;
10 | import org.apache.flink.types.Row;
11 | 
12 | /**
13 |  * @Author yyb
14 |  * @Description
15 |  * @Date Create in 2020-08-04
16 |  * @Time 14:57
17 |  */
18 | public class RetractStream {
19 |     public static void main(String [] args) throws Exception {
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 |         StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env);
22 | 
23 |         DataStream<Tuple2<String, String>> data = env.fromElements(
24 |                 new Tuple2<>("Mary", "./home"),
25 |                 new Tuple2<>("Bob", "./cart"),
26 |                 new Tuple2<>("Mary", "./prod?id=1"),
27 |                 new Tuple2<>("Liz", "./home"),
28 |                 new Tuple2<>("Bob", "./prod?id=3")
29 |         );
30 | 
31 |         Table clicksTable = flinkTableEnv.fromDataStream(data, "user,url");
32 | 
33 |         flinkTableEnv.registerTable("clicks", clicksTable);
34 |         Table rs = flinkTableEnv.sqlQuery("select user , count(url) url_ct from clicks group by user");
35 |         //注意这里 使用的是 toRetractStream
36 |         DataStream<Tuple2<Boolean, Tuple2<String, Long>>> rs_ds = flinkTableEnv.toRetractStream(rs, TypeInformation.of(new TypeHint<Tuple2<String, Long>>() {
37 |         }));
38 |         rs_ds.print().setParallelism(1);
39 | 
40 |         env.execute("RetractStream");
41 | 
42 |         /**
43 |          * 结果
44 |          * (true,(Liz,1))
45 |          * (true,(Bob,1))
46 |          * (false,(Bob,1))
47 |          * (true,(Bob,2))
48 |          * (true,(Mary,1))
49 |          * (false,(Mary,1))
50 |          * (true,(Mary,2))
51 |          *
52 |          * 第一个元素为true表示这条数据为要插入的新数据，false表示需要删除的一条旧数据。
53 |          * 也就是说可以把更新表中某条数据分解为先删除一条旧数据再插入一条新数据。
54 |          */
55 | 
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/StreamJDBCReadByInputformat2TableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation
 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder
 5 | import org.apache.flink.api.java.typeutils.RowTypeInfo
 6 | import org.apache.flink.api.scala.typeutils.Types
 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 8 | import org.apache.flink.table.api.Table
 9 | import org.apache.flink.table.api.scala.StreamTableEnvironment
10 | import org.apache.flink.types.Row
11 | 
12 | /**
13 |   * @Author yyb
14 |   * @Description
15 |   * @Date Create in 2020-04-21
16 |   * @Time 14:15
17 |   */
18 | object StreamJDBCReadByInputformat2TableSource {
19 |   def main(args: Array[String]): Unit = {
20 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
21 |     val streamTableEnv = StreamTableEnvironment.create(env)
22 | 
23 |     val types =  Array[TypeInformation[_]](Types.STRING, Types.LONG, Types.STRING)
24 |     val fields = Array[String]("MT_KEY1", "MT_KEY2", "MT_COMMENT")
25 |     val typeInfo = new RowTypeInfo(types, fields)
26 | 
27 | 
28 |     val jdbc = new JDBCInputFormatBuilder()
29 |         .setDBUrl("jdbc:mysql://127.0.0.1:3306/hive?useSSL=false&serverTimezone=UTC")
30 |         .setDrivername("com.mysql.jdbc.Driver")
31 |         .setUsername("hive")
32 |         .setPassword("hive")
33 |         .setQuery("select * from AUX_TABLE")
34 |       .setRowTypeInfo(typeInfo)
35 |         .finish()
36 |     val mysqlSource: DataStream[Row] =  env.createInput(jdbc)
37 | 
38 |     mysqlSource.print()
39 | 
40 |     val table: Table = streamTableEnv.fromDataStream(mysqlSource)
41 | 
42 |     streamTableEnv.createTemporaryView("AUX_TABLE", table)
43 | 
44 |     val table_q: Table = streamTableEnv.sqlQuery("select * from AUX_TABLE")
45 |     table_q.printSchema()
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 |     //目前来看，只有在 有 sink的情况下，需要 加 execute
53 |     streamTableEnv.execute("ConnectJDBCBatch")
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/StreamJobReadFromJDBCTableSource.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource}
 4 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 5 | import org.apache.flink.streaming.api.scala._
 6 | import org.apache.flink.table.api.{Table, TableSchema}
 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 8 | import org.apache.flink.table.types.AtomicDataType
 9 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType}
10 | 
11 | /**
12 |   * @Author yyb
13 |   * @Description
14 |   * @Date Create in 2020-04-26
15 |   * @Time 21:39
16 |   */
17 | object StreamJobReadFromJDBCTableSource {
18 |   def main(args: Array[String]): Unit = {
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     val streamTableEnv = StreamTableEnvironment.create(env)
21 | 
22 |     val lookOption =  JDBCLookupOptions.builder()
23 |       .setCacheExpireMs(60*1000)
24 |       .setCacheMaxSize(1024*1024)
25 |       .setMaxRetryTimes(10)
26 |       .build()
27 | 
28 |     val jdbcOpition = JDBCOptions.builder()
29 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
30 |       .setDriverName("com.mysql.jdbc.Driver")
31 |       .setUsername("root")
32 |       .setPassword("111111")
33 |       .setTableName("t_order")
34 |       .build()
35 | 
36 |     val jdbcReadOption = JDBCReadOptions.builder()
37 |       .setFetchSize(5000)
38 |       .build()
39 | 
40 |     val tableSchema = TableSchema.builder()
41 |       .field("id", new AtomicDataType(new IntType))
42 |       .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
43 |       .field("time", new AtomicDataType(new DateType))
44 |       .build()
45 | 
46 |     val jdbcTableSource =  JDBCTableSource.builder()
47 |       .setLookupOptions(lookOption)
48 |       .setOptions(jdbcOpition)
49 |       .setReadOptions(jdbcReadOption)
50 |       .setSchema(tableSchema)
51 |       .build()
52 | 
53 |     val t_order: Table =  streamTableEnv.fromTableSource(jdbcTableSource)
54 | 
55 |     streamTableEnv.registerTableSource("t_order1", jdbcTableSource)
56 | 
57 |     streamTableEnv.createTemporaryView("t_order", t_order)
58 | 
59 |     val sql =
60 |       """
61 |         |select * from t_order
62 |       """.stripMargin
63 |     streamTableEnv.sqlQuery(sql).printSchema()
64 | 
65 | //    streamTableEnv.registerTableSink()
66 | 
67 | 
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/WriteDataByJDBCTableUpsertSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 4 | import org.apache.flink.api.java.io.jdbc.{JDBCOptions, JDBCUpsertTableSink}
 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 6 | import org.apache.flink.table.api.{Table, TableSchema}
 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 8 | import org.apache.flink.table.types.AtomicDataType
 9 | import org.apache.flink.table.types.logical.{IntType, VarCharType}
10 | 
11 | /**
12 |   * @Author yyb
13 |   * @Description
14 |   * @Date Create in 2020-05-05
15 |   * @Time 13:04
16 |   */
17 | object WriteDataByJDBCTableUpsertSink {
18 |   def main(args: Array[String]): Unit = {
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     val flinkSteramTableEnv = StreamTableEnvironment.create(env)
21 | 
22 |     val fileSourcePath = "./data/data.txt"
23 | 
24 |     val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
25 |       .flatMap(_.split("\\W+"))
26 |       .filter(_.nonEmpty)
27 |       .map((_, 1))
28 |       .keyBy(0)
29 |       .sum(1)
30 | 
31 |     val source: Table =  flinkSteramTableEnv.fromDataStream(wcStream)
32 | 
33 |     flinkSteramTableEnv.registerTable("word_flink", source)
34 | 
35 |     val jdbcOpition = JDBCOptions.builder()
36 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
37 |       .setDriverName("com.mysql.jdbc.Driver")
38 |       .setUsername("root")
39 |       .setPassword("111111")
40 |       .setTableName("wordcount")
41 |       .build()
42 | 
43 |     val tableSchema = TableSchema.builder()
44 |       .field("word", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647
45 |       .field("count", new AtomicDataType(new IntType))
46 |       .build()
47 | 
48 |     val jdbcUpsertTableSink =  JDBCUpsertTableSink.builder()
49 |       .setOptions(jdbcOpition)
50 |       .setFlushIntervalMills(1000)
51 |       .setFlushMaxSize(1024*1024*12)
52 |       .setTableSchema(tableSchema)
53 |       .build()
54 |     jdbcUpsertTableSink.setKeyFields(Array("word"))
55 |     jdbcUpsertTableSink.setIsAppendOnly(false)
56 |     flinkSteramTableEnv.registerTableSink("word_mysql", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcUpsertTableSink)
57 |     //注意这里， 这里的 sql 需要的是 有 聚合操作的，如果没有的话， 那么 结果表里面就会出现多条记录，因为  jdbcUpsertTableSink 里面的 IsAppendOnly，KeyFields 是 flink 执行计划推断出来的
58 |     source.insertInto("word_mysql")
59 | 
60 |     flinkSteramTableEnv.execute("WriteDataByJDBCTableUpsertSink")
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/WriteDataByTableSink.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.JDBC
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 4 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink
 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _}
 6 | import org.apache.flink.table.api.Table
 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 8 | 
 9 | /**
10 |   * @Author yyb
11 |   * @Description
12 |   * @Date Create in 2020-04-30
13 |   * @Time 09:16
14 |   */
15 | object WriteDataByTableSink {
16 |   def main(args: Array[String]): Unit = {
17 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
18 |     val streamTableEnv = StreamTableEnvironment.create(env)
19 | 
20 |     val fileSourcePath = "./data/data.txt"
21 | 
22 |     val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
23 |       .flatMap(_.split("\\W+"))
24 |       .filter(_.nonEmpty)
25 |       .map((_, 1))
26 |       .keyBy(0)
27 |       .sum(1)
28 | 
29 |     val table: Table =  streamTableEnv.fromDataStream(wcStream)
30 | 
31 |     streamTableEnv.createTemporaryView("wd", table)
32 | 
33 | 
34 | 
35 |     streamTableEnv.sqlQuery("select * from wd").printSchema()
36 | 
37 | 
38 |     val jdbcAppendTableSink = JDBCAppendTableSink.builder()
39 |       .setBatchSize(2000)
40 |       .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC")
41 |       .setDrivername("com.mysql.jdbc.Driver")
42 |       .setUsername("root")
43 |       .setPassword("111111")
44 |       .setQuery("insert into wordcount (word, count) values(?, ?)")
45 |       .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)
46 |       .build()
47 | 
48 |     streamTableEnv.registerTableSink("mysql_wordcount", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink)
49 |     table.insertInto("mysql_wordcount")
50 | 
51 |     streamTableEnv.execute("WriteDataByTableSink")
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/StreamQuery.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream
 2 | 
 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 4 | import org.apache.flink.streaming.api.scala._
 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table}
 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 7 | 
 8 | /**
 9 |   * @Author yyb
10 |   * @Description
11 |   * @Date Create in 2020-04-19
12 |   * @Time 12:44
13 |   */
14 | object StreamQuery {
15 |   def main(args: Array[String]): Unit = {
16 |     //注意 这里新加了一个 EnvironmentSettings
17 |     val flinkStreamSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
18 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
19 |     val streamTableEnv = StreamTableEnvironment.create(env, flinkStreamSettings)
20 | 
21 |     val fileSourcePath = "/Users/yyb/Downloads/1.txt"
22 | 
23 |     val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath)
24 |         .flatMap(_.split("\\W+"))
25 |         .filter(_.nonEmpty)
26 |         .map((_, 1))
27 |         .keyBy(0)
28 |         .sum(1)
29 | 
30 |     val table: Table =  streamTableEnv.fromDataStream(wcStream)
31 | 
32 |     streamTableEnv.createTemporaryView("wd", table)
33 | 
34 | 
35 | 
36 |     streamTableEnv.sqlQuery("select * from wd").printSchema()
37 | 
38 |     val appendDateStream: DataStream[WD] =  streamTableEnv.toAppendStream[WD](table)
39 | 
40 |     appendDateStream.print()
41 | 
42 | 
43 |     env.execute("StreamQuery")
44 |   }
45 | 
46 |   case class WD(word:String, count:Int)
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/table/flink/stream/kafka/SendData2KafkaByKafkaConnector.scala:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.table.flink.stream.kafka
 2 | 
 3 | import java.io.InputStream
 4 | import java.util.Properties
 5 | 
 6 | import org.apache.flink.api.scala._
 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 8 | import org.apache.flink.table.api.scala.StreamTableEnvironment
 9 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema}
10 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema}
11 | 
12 | 
13 | /**
14 |   * @Author yyb
15 |   * @Description
16 |   * @Date Create in 2020-07-28
17 |   * @Time 16:12
18 |   */
19 | object SendData2KafkaByKafkaConnector {
20 |   def main(args: Array[String]): Unit = {
21 |     val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build()
22 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
23 |     val blinkTableEnv = StreamTableEnvironment.create(env, settings)
24 | 
25 |     val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties")
26 |     val prop: Properties = new Properties()
27 |     prop.load(in_env)
28 |     println(prop.getProperty("zookeeper.connect"))
29 | 
30 |     val kafka = new Kafka
31 |     kafka.version("0.11")
32 |       .topic("eventsource_yhj")
33 |       .property("zookeeper.connect", prop.getProperty("zookeeper.connect"))
34 |       .property("bootstrap.servers", prop.getProperty("bootstrap.servers")).
35 |       property("group.id", "yyb_dev")
36 |       .startFromLatest
37 | 
38 | 
39 |     val schema = new Schema
40 |     val tableSchema1 = TableSchema.builder
41 |       .field("amount", DataTypes.INT)
42 |       .field("currency", DataTypes.STRING).build
43 |     schema.schema(tableSchema1)
44 |     val tableSource = blinkTableEnv.connect(kafka)
45 |       .withFormat(new Json().failOnMissingField(true))
46 |       .withSchema(schema)
47 |     tableSource.createTemporaryTable("Orders_tmp")
48 | 
49 |     val data = Array(Current(1, "Euro"))
50 | 
51 |     val dataDS = env.fromCollection(data)
52 | 
53 |     val dataTable: Table = blinkTableEnv.fromDataStream(dataDS)
54 | 
55 | //    blinkTableEnv.registerTable("dataSource", dataTable)
56 | 
57 |     var sql =
58 |       """
59 |         |insert into Orders_tmp select * from dataSource
60 |         |""".stripMargin
61 |     //因为 kafka 是 无界的， 所以不能使用 batch 模式 的 kafkatablesink
62 | //    blinkTableEnv.sqlUpdate(sql)
63 | 
64 |     dataTable.insertInto("Orders_tmp")
65 | 
66 | 
67 |     env.execute("SendData2KafkaByKafkaConnector")
68 |   }
69 | 
70 |   case class Current(amount:Int, currency:String)
71 | }
72 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/util/ParquetAvroWritersSelf.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.util;
 2 | 
 3 | import org.apache.avro.Schema;
 4 | import org.apache.avro.generic.GenericData;
 5 | import org.apache.avro.reflect.ReflectData;
 6 | import org.apache.flink.formats.parquet.ParquetBuilder;
 7 | import org.apache.flink.formats.parquet.ParquetWriterFactory;
 8 | import org.apache.parquet.avro.AvroParquetWriter;
 9 | import org.apache.parquet.hadoop.ParquetWriter;
10 | import org.apache.parquet.io.OutputFile;
11 | 
12 | import java.io.IOException;
13 | 
14 | /**
15 |  * @Author yyb
16 |  * @Description
17 |  * @Date Create in 2020-06-17
18 |  * @Time 14:37
19 |  */
20 | public class ParquetAvroWritersSelf{
21 |     public static ParquetWriterFactory<GenericData.Record> forGenericRecord(Schema schema) {
22 |         final String schemaString = schema.toString();
23 |         final ParquetBuilder<GenericData.Record> builder = (out) -> createAvroParquetWriter(schemaString, GenericData.get(), out);
24 |         return new ParquetWriterFactory<>(builder);
25 |     }
26 | 
27 |     private static <T> ParquetWriter<T> createAvroParquetWriter(
28 |             String schemaString,
29 |             GenericData dataModel,
30 |             OutputFile out) throws IOException {
31 | 
32 |         final Schema schema = new Schema.Parser().parse(schemaString);
33 | 
34 |         return AvroParquetWriter.<T>builder(out)
35 |                 .withSchema(schema)
36 |                 .withDataModel(dataModel)
37 |                 .build();
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/util/RecordTypeInfo.java:
--------------------------------------------------------------------------------
  1 | package com.yyb.flink10.util;
  2 | 
  3 | import org.apache.avro.Schema;
  4 | import org.apache.avro.generic.GenericData;
  5 | import org.apache.avro.generic.GenericRecord;
  6 | import org.apache.flink.api.common.ExecutionConfig;
  7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
  8 | import org.apache.flink.api.common.typeutils.CompositeType;
  9 | import org.apache.flink.api.common.typeutils.TypeSerializer;
 10 | 
 11 | import java.util.List;
 12 | 
 13 | /**
 14 |  * @Author yyb
 15 |  * @Description
 16 |  * @Date Create in 2020-06-17
 17 |  * @Time 15:15
 18 |  */
 19 | public class RecordTypeInfo<T> extends CompositeType<T> implements GenericRecord, Comparable<GenericData.Record>  {
 20 | 
 21 | //    private final Object[] fields;
 22 | 
 23 |     public RecordTypeInfo(Class<T> typeClass, TypeInformation<?>... types){
 24 |         super(typeClass);
 25 |     }
 26 |     @Override
 27 |     public int compareTo(GenericData.Record o) {
 28 |         return 0;
 29 |     }
 30 | 
 31 |     @Override
 32 |     public void put(String key, Object v) {
 33 | 
 34 |     }
 35 | 
 36 |     @Override
 37 |     public Object get(String key) {
 38 |         return null;
 39 |     }
 40 | 
 41 |     @Override
 42 |     public void put(int i, Object v) {
 43 | 
 44 |     }
 45 | 
 46 |     @Override
 47 |     public Object get(int i) {
 48 |         return null;
 49 |     }
 50 | 
 51 |     @Override
 52 |     public Schema getSchema() {
 53 |         return null;
 54 |     }
 55 | 
 56 |     @Override
 57 |     public void getFlatFields(String fieldExpression, int offset, List result) {
 58 | 
 59 |     }
 60 | 
 61 |     @Override
 62 |     public TypeInformation getTypeAt(String fieldExpression) {
 63 |         return null;
 64 |     }
 65 | 
 66 |     @Override
 67 |     public TypeInformation getTypeAt(int pos) {
 68 |         return null;
 69 |     }
 70 | 
 71 |     @Override
 72 |     protected TypeComparatorBuilder createTypeComparatorBuilder() {
 73 |         return null;
 74 |     }
 75 | 
 76 |     @Override
 77 |     public String[] getFieldNames() {
 78 |         return new String[0];
 79 |     }
 80 | 
 81 |     @Override
 82 |     public int getFieldIndex(String fieldName) {
 83 |         return 0;
 84 |     }
 85 | 
 86 |     @Override
 87 |     public boolean isBasicType() {
 88 |         return false;
 89 |     }
 90 | 
 91 |     @Override
 92 |     public boolean isTupleType() {
 93 |         return false;
 94 |     }
 95 | 
 96 |     @Override
 97 |     public int getArity() {
 98 |         return 0;
 99 |     }
100 | 
101 |     @Override
102 |     public int getTotalFields() {
103 |         return 0;
104 |     }
105 | 
106 |     @Override
107 |     public TypeSerializer createSerializer(ExecutionConfig config) {
108 |         return null;
109 |     }
110 | }
111 | 


--------------------------------------------------------------------------------
/src/main/scala/com/yyb/flink10/util1/Demo.java:
--------------------------------------------------------------------------------
 1 | package com.yyb.flink10.util1;
 2 | 
 3 | /**
 4 |  * @Author yyb
 5 |  * @Description
 6 |  * @Date Create in 2020-08-20
 7 |  * @Time 16:09
 8 |  */
 9 | public class Demo {
10 |     public static void main(String[] args){
11 |         System.out.println(("LSJA24W96KS001123".hashCode() & Integer.MAX_VALUE )%1080 );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/scala/flink/api/java/RecordAvroTypeInfo.java:
--------------------------------------------------------------------------------
 1 | package flink.api.java;
 2 | 
 3 | import org.apache.avro.generic.GenericData;
 4 | import org.apache.flink.api.common.ExecutionConfig;
 5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 6 | import org.apache.flink.api.common.typeutils.TypeSerializer;
 7 | 
 8 | /**
 9 |  * @Author yyb
10 |  * @Description
11 |  * @Date Create in 2020-06-29
12 |  * @Time 11:21
13 |  */
14 | public class RecordAvroTypeInfo extends TypeInformation<GenericData.Record> {
15 |     @Override
16 |     public boolean isBasicType() {
17 |         return false;
18 |     }
19 | 
20 |     @Override
21 |     public boolean isTupleType() {
22 |         return false;
23 |     }
24 | 
25 |     @Override
26 |     public int getArity() {
27 |         return 0;
28 |     }
29 | 
30 |     @Override
31 |     public int getTotalFields() {
32 |         return 0;
33 |     }
34 | 
35 |     @Override
36 |     public Class<GenericData.Record> getTypeClass() {
37 |         return null;
38 |     }
39 | 
40 |     @Override
41 |     public boolean isKeyType() {
42 |         return false;
43 |     }
44 | 
45 |     @Override
46 |     public TypeSerializer<GenericData.Record> createSerializer(ExecutionConfig config) {
47 |         return null;
48 |     }
49 | 
50 |     @Override
51 |     public String toString() {
52 |         return null;
53 |     }
54 | 
55 |     @Override
56 |     public boolean equals(Object obj) {
57 |         return false;
58 |     }
59 | 
60 |     @Override
61 |     public int hashCode() {
62 |         return 0;
63 |     }
64 | 
65 |     @Override
66 |     public boolean canEqual(Object obj) {
67 |         return false;
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/scala/flink/api/java/Tuple0.java:
--------------------------------------------------------------------------------
 1 | package flink.api.java;
 2 | 
 3 | import org.apache.avro.Schema;
 4 | import org.apache.avro.generic.GenericData;
 5 | import org.apache.avro.generic.GenericRecord;
 6 | 
 7 | /**
 8 |  * @Author yyb
 9 |  * @Description
10 |  * @Date Create in 2020-06-18
11 |  * @Time 14:07
12 |  */
13 | public class Tuple0 extends org.apache.flink.api.java.tuple.Tuple0 implements GenericRecord, Comparable<GenericData.Record>{
14 |     @Override
15 |     public int compareTo(GenericData.Record o) {
16 |         return 0;
17 |     }
18 | 
19 |     @Override
20 |     public void put(String key, Object v) {
21 | 
22 |     }
23 | 
24 |     @Override
25 |     public Object get(String key) {
26 |         return null;
27 |     }
28 | 
29 |     @Override
30 |     public void put(int i, Object v) {
31 | 
32 |     }
33 | 
34 |     @Override
35 |     public Object get(int i) {
36 |         return null;
37 |     }
38 | 
39 |     @Override
40 |     public Schema getSchema() {
41 |         return null;
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/flink/api/java/Tuple1.java:
--------------------------------------------------------------------------------
 1 | package flink.api.java;
 2 | 
 3 | import org.apache.avro.Schema;
 4 | import org.apache.avro.generic.GenericData;
 5 | import org.apache.avro.generic.GenericRecord;
 6 | 
 7 | /**
 8 |  * @Author yyb
 9 |  * @Description
10 |  * @Date Create in 2020-06-18
11 |  * @Time 14:07
12 |  */
13 | public class Tuple1<T0> extends org.apache.flink.api.java.tuple.Tuple1<T0> implements GenericRecord, Comparable<GenericData.Record>{
14 |     @Override
15 |     public int compareTo(GenericData.Record o) {
16 |         return 0;
17 |     }
18 | 
19 |     @Override
20 |     public void put(String key, Object v) {
21 | 
22 |     }
23 | 
24 |     @Override
25 |     public Object get(String key) {
26 |         return null;
27 |     }
28 | 
29 |     @Override
30 |     public void put(int i, Object v) {
31 | 
32 |     }
33 | 
34 |     @Override
35 |     public Object get(int i) {
36 |         return null;
37 |     }
38 | 
39 |     @Override
40 |     public Schema getSchema() {
41 |         return null;
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/flink/api/java/Tuple2.java:
--------------------------------------------------------------------------------
 1 | package flink.api.java;
 2 | 
 3 | import org.apache.avro.AvroRuntimeException;
 4 | import org.apache.avro.Schema;
 5 | import org.apache.avro.generic.GenericData;
 6 | import org.apache.avro.generic.GenericRecord;
 7 | 
 8 | /**
 9 |  * @Author yyb
10 |  * @Description
11 |  * @Date Create in 2020-06-18
12 |  * @Time 14:07
13 |  */
14 | public class Tuple2<T0, T1> extends org.apache.flink.api.java.tuple.Tuple2<T0, T1> implements GenericRecord, Comparable<GenericData.Record>{
15 |     private  Object[] values = new Object[2];
16 |     private  Schema schema = null;
17 |     public Tuple2(){
18 |         super();
19 |     }
20 |     public Tuple2(Schema schema) {
21 |         super();
22 |         if (schema == null || !Schema.Type.RECORD.equals(schema.getType()))
23 |             throw new AvroRuntimeException("Not a record schema: "+schema);
24 |         this.schema = schema;
25 |         this.values = new Object[schema.getFields().size()];
26 |     }
27 | 
28 |     public Tuple2(Schema schema, T0 value0, T1 value1) {
29 |         super(value0, value1);
30 |         if (schema == null || !Schema.Type.RECORD.equals(schema.getType()))
31 |             throw new AvroRuntimeException("Not a record schema: "+schema);
32 |         this.schema = schema;
33 |         this.values = new Object[2];
34 |         this.values[0] = value0;
35 |         this.values[1] = value1;
36 |     }
37 | 
38 |     public Tuple2(T0 value0, T1 value1) {
39 |         super(value0, value1);
40 |         this.values = new Object[2];
41 |         this.values[0] = value0;
42 |         this.values[1] = value1;
43 |     }
44 | 
45 |     @Override
46 |     public int compareTo(GenericData.Record o) {
47 |         return GenericData.get().compare(this, o, schema);
48 |     }
49 | 
50 |     @Override
51 |     public void put(String key, Object v) {
52 |         Schema.Field field = schema.getField(key);
53 |         if (field == null)
54 |             throw new AvroRuntimeException("Not a valid schema field: "+key);
55 | 
56 |         values[field.pos()] = v;
57 |     }
58 | 
59 |     @Override
60 |     public Object get(String key) {
61 |         Schema.Field field = schema.getField(key);
62 |         if (field == null) return null;
63 |         return values[field.pos()];
64 |     }
65 | 
66 |     @Override
67 |     public void put(int i, Object v) {
68 |         values[i] = v;
69 |     }
70 | 
71 |     @Override
72 |     public Object get(int i) {
73 |         return values[i];
74 |     }
75 | 
76 |     @Override
77 |     public Schema getSchema() {
78 |         return schema;
79 |     }
80 | 
81 |     @Override
82 |     public <T> void setField(T value, int pos){
83 |         super.setField(value, pos);
84 |         this.values[pos] = value;
85 |     }
86 | 
87 |     @Override
88 |     public void setFields(T0 value0, T1 value1){
89 |         super.setFields(value0, value1);
90 |         this.values[0] = value0;
91 |         this.values[1] = value1;
92 |     }
93 | 
94 |     public static <T0, T1> org.apache.flink.api.java.tuple.Tuple2<T0, T1> of(T0 value0, T1 value1) {
95 |         return new Tuple2<T0, T1>(value0,
96 |                 value1);
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------