├── .gitignore ├── DeplyREADME.md ├── FLINKSQL.md ├── FlinkOnYarnDeploy.md ├── LICENSE ├── README.md ├── data ├── data.txt └── t_order.sql ├── pom.xml └── src └── main ├── TestResources ├── saic_cdh6_dev │ ├── core-site.xml │ ├── hdfs-site.xml │ └── hive-site.xml ├── saic_dev │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── hive-site.xml │ ├── mapred-site.xml │ └── yarn-site.xml └── weekend │ ├── core-site.xml │ ├── hdfs-site.xml │ ├── hive-site.xml │ ├── mapred-site.xml │ └── yarn-site.xml ├── resources ├── core-site.xml ├── env.properties ├── hdfs-site.xml ├── hive-site.xml └── log4j.properties └── scala ├── com └── yyb │ └── flink10 │ ├── DataSet │ ├── JDBC │ │ ├── ReadFromJDBCInputFormat.scala │ │ └── WriteToMysqlByOutputformat.scala │ ├── State │ │ └── StateOfCountWindowAverage.java │ ├── WordCount.scala │ └── kafka │ │ └── SendData2KafkaByKafkaOutputFormat.scala │ ├── DataStream │ ├── ProcessFunction │ │ ├── KeyedProcessFunctionDemo.java │ │ └── KeyedProcessFunctionOnNOKeyStreamDemo.java │ ├── State │ │ └── StateOfCountWindowAverage.java │ ├── data │ │ └── WordCountData.java │ ├── kafka │ │ ├── EventTimeDemo.java │ │ ├── SendData2KafkaByKafkaConnector.scala │ │ ├── SendData2KafkaByKafkaConnectorBrowse.scala │ │ └── SendData2KafkaByKafkaConnectorProduct.scala │ ├── parquet │ │ └── WriteParquetWordCount.scala │ └── sink │ │ ├── JDBC │ │ ├── ReadFromInputFormat.scala │ │ └── WriteToMysqlByJDBCOutputformat.scala │ │ └── StreamingFileSink │ │ ├── BulkEncodedSink │ │ ├── WordCountFileSourceStreamFileSinkOfParquet.scala │ │ ├── WordCountFileSourceStreamFileSinkOfParquetToHDFS.scala │ │ └── WordCountFileSourceStreamFileSinkOfSequence.scala │ │ └── RowEncodedSink │ │ ├── WordCountElementsSourceStreamFileSink.scala │ │ ├── WordCountElementsSourceStreamFileSinkJava.java │ │ └── WordCountFileSourceStreamFileSink.scala │ ├── OutputFormat │ └── KafkaOutputFormat.java │ ├── commonEntity │ ├── Current1.java │ ├── Current2.java │ ├── Pi.java │ ├── ProductInfo.java │ ├── Rate.java │ ├── Rate2.java │ └── UserBrowseLog.java │ ├── sink │ ├── KafkaBatchTableSink.java │ ├── ParquetSinkFunction.scala │ └── ParquetWriterSink.scala │ ├── table │ ├── blink │ │ ├── batch │ │ │ ├── BatchQuery.scala │ │ │ ├── BlinkHiveBatchDemo.scala │ │ │ ├── JDBC │ │ │ │ ├── BlinkBatchReadFromJDBCTableSource.scala │ │ │ │ └── BlinkBatchWriteToJDBCTableSink.scala │ │ │ ├── hive │ │ │ │ └── Fromkafka2HiveUseCatalog.java │ │ │ └── kafka │ │ │ │ └── WriteJsonDataByKafkaConnector.java │ │ └── stream │ │ │ ├── FileSystem │ │ │ ├── ReadFromKafkaConnectorWriteToLocalParquetFileJava.java │ │ │ ├── ReadFromKafkaConnectorWriteToLocalParquetFilePiJava.java │ │ │ └── ReadFromKafkaConnectorWriteToLocalTextFileJava.java │ │ │ ├── JDBC │ │ │ ├── ReadDataFromJDBCTableSource.scala │ │ │ ├── WriteDataByJDBCTableSink.scala │ │ │ └── WriteDataByJDBCTableUpsertSink.scala │ │ │ ├── StreamQuery.scala │ │ │ ├── TemporalTable │ │ │ └── MysqlTemporalTable.java │ │ │ ├── elasticsearch │ │ │ ├── WriteData2EsByConnectorTestEnv.java │ │ │ ├── WriteData2EsBySink.java │ │ │ ├── WriteData2EsBySinkTestEnv.java │ │ │ └── WriteData2EsByTableDesc.java │ │ │ ├── hive │ │ │ ├── Fromkafka2HiveUseCatalog.java │ │ │ ├── WriteData2HiveJavaReadFromkafkaTableSource.java │ │ │ └── WriteData2HiveReadFromkafkaTableSource.scala │ │ │ ├── join │ │ │ └── temporaltable │ │ │ │ ├── JoinWithKafkaConsumerTeporalTableFunction.java │ │ │ │ ├── JoinWithKafkaTeporalTableFunction.java │ │ │ │ ├── JoinWithLookupFunctionCheckpoint.java │ │ │ │ ├── JoinWithTeporalTableFunction.java │ │ │ │ ├── JoinWithTeporalTableFunctionWithJDBCConnection.java │ │ │ │ ├── TemporalTableDemo.java │ │ │ │ ├── TemporalTableFunction.java │ │ │ │ └── TemporalTableFunctionDemo.java │ │ │ └── kafka │ │ │ ├── EventTimeDemo.java │ │ │ ├── ReadDataFromKafkaConnector.scala │ │ │ ├── ReadDataFromKafkaConnectorJava.java │ │ │ ├── ReadDataFromKafkaSource.scala │ │ │ ├── ReadDataFromKafkaSourceJava.java │ │ │ ├── WriteToKafkaByKafkaConnectorOfOrder.java │ │ │ ├── WriteToKafkaByKafkaConnectorOfRates.java │ │ │ └── watermark │ │ │ └── UseProcesstimeAsWatermark.java │ └── flink │ │ ├── batch │ │ ├── BatchQuery.scala │ │ ├── BatchReadFromParquetQuery.scala │ │ ├── BatchReadFromSequenceQuery.scala │ │ ├── JDBC │ │ │ ├── BatchJDBCReadByInputformat2TableSource.scala │ │ │ ├── BatchJobReadFromJDBCTableSource.scala │ │ │ └── WriteJDBCByTableSink.scala │ │ └── kafka │ │ │ └── SendData2KafkaByKafkaBatchSink.scala │ │ └── stream │ │ ├── JDBC │ │ ├── InsetMode │ │ │ ├── AppendOnly.java │ │ │ ├── RetractStream.java │ │ │ └── UpsertStream.java │ │ ├── StreamJDBCReadByInputformat2TableSource.scala │ │ ├── StreamJobReadFromJDBCTableSource.scala │ │ ├── WriteDataByJDBCTableUpsertSink.scala │ │ ├── WriteDataByTableSink.scala │ │ └── WriteMysqlByJDBCConnectorUpsertMode.java │ │ ├── StreamQuery.scala │ │ └── kafka │ │ └── SendData2KafkaByKafkaConnector.scala │ ├── util │ ├── ParquetAvroWritersSelf.java │ └── RecordTypeInfo.java │ └── util1 │ ├── Demo.java │ └── GeneratorClassByASM.java ├── flink └── api │ └── java │ ├── RecordAvroTypeInfo.java │ ├── Tuple0.java │ ├── Tuple1.java │ └── Tuple2.java └── org └── apache └── flink └── streaming └── connectors └── Elasticsearch7UpsertTableSinkPlus.java /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | *.iml 4 | xxx.text/ 5 | metastore_db/ 6 | *.log 7 | derby.log 8 | dependency-reduced-pom.xml 9 | .DS_Store -------------------------------------------------------------------------------- /DeplyREADME.md: -------------------------------------------------------------------------------- 1 | # Deploy 2 | ## StandAlone 3 | bin/flink run --class com.... -classpath xxx xxx.jar 4 | ## Flink On Yarn 5 | bin/flink run -m yarn-cluster --class xxx --classpath xxx xxx.jar -------------------------------------------------------------------------------- /FlinkOnYarnDeploy.md: -------------------------------------------------------------------------------- 1 | #FlinkOnYarkDeploy -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /data/data.txt: -------------------------------------------------------------------------------- 1 | "To be, or not to be,--that is the question:--", 2 | "Whether 'tis nobler in the mind to suffer", 3 | "The slings and arrows of outrageous fortune", 4 | "Or to take arms against a sea of troubles,", 5 | "And by opposing end them?--To die,--to sleep,--", 6 | "No more; and by a sleep to say we end", 7 | "The heartache, and the thousand natural shocks", 8 | "That flesh is heir to,--'tis a consummation", 9 | "Devoutly to be wish'd. To die,--to sleep;--", 10 | "To sleep! perchance to dream:--ay, there's the rub;", 11 | "For in that sleep of death what dreams may come,", 12 | "When we have shuffled off this mortal coil,", 13 | "Must give us pause: there's the respect", 14 | "That makes calamity of so long life;", 15 | "For who would bear the whips and scorns of time,", 16 | "The oppressor's wrong, the proud man's contumely,", 17 | "The pangs of despis'd love, the law's delay,", 18 | "The insolence of office, and the spurns", 19 | "That patient merit of the unworthy takes,", 20 | "When he himself might his quietus make", 21 | "With a bare bodkin? who would these fardels bear,", 22 | "To grunt and sweat under a weary life,", 23 | "But that the dread of something after death,--", 24 | "The undiscover'd country, from whose bourn", 25 | "No traveller returns,--puzzles the will,", 26 | "And makes us rather bear those ills we have", 27 | "Than fly to others that we know not of?", 28 | "Thus conscience does make cowards of us all;", 29 | "And thus the native hue of resolution", 30 | "Is sicklied o'er with the pale cast of thought;", 31 | "And enterprises of great pith and moment,", 32 | "With this regard, their currents turn awry,", 33 | "And lose the name of action.--Soft you now!", 34 | "The fair Ophelia!--Nymph, in thy orisons", 35 | "Be all my sins remember'd." -------------------------------------------------------------------------------- /data/t_order.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat Premium Data Transfer 3 | 4 | Source Server : persona-test-new 5 | Source Server Type : MySQL 6 | Source Server Version : 50721 7 | Source Host : 172.16.11.82 8 | Source Database : persona 9 | 10 | Target Server Type : MySQL 11 | Target Server Version : 50721 12 | File Encoding : utf-8 13 | 14 | Date: 04/26/2020 10:15:29 AM 15 | */ 16 | 17 | SET NAMES utf8; 18 | SET FOREIGN_KEY_CHECKS = 0; 19 | 20 | -- ---------------------------- 21 | -- Table structure for `t_order` 22 | -- ---------------------------- 23 | DROP TABLE IF EXISTS `t_order`; 24 | CREATE TABLE `t_order` ( 25 | `id` int(11) DEFAULT NULL, 26 | `name` varchar(50) DEFAULT NULL, 27 | `time` date DEFAULT NULL 28 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 29 | 30 | -- ---------------------------- 31 | -- Records of `t_order` 32 | -- ---------------------------- 33 | BEGIN; 34 | INSERT INTO `t_order` VALUES ('1001', '用券', '2019-04-01'), ('1002', '不用券', '2019-05-10'), ('1001', '不用券', '2019-05-01'), ('1003', '不用券', '2019-04-12'), ('1001', '不用券', '2019-05-11'), ('1002', '用券', '2019-05-30'), ('1001', '不用券', '2019-05-22'), ('1003', '用券', '2019-05-24'); 35 | COMMIT; 36 | 37 | SET FOREIGN_KEY_CHECKS = 1; 38 | -------------------------------------------------------------------------------- /src/main/TestResources/saic_cdh6_dev/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://nameservice1 8 | 9 | 10 | fs.trash.interval 11 | 1 12 | 13 | 14 | io.compression.codecs 15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec 16 | 17 | 18 | hadoop.security.authentication 19 | simple 20 | 21 | 22 | hadoop.security.authorization 23 | false 24 | 25 | 26 | hadoop.rpc.protection 27 | authentication 28 | 29 | 30 | hadoop.security.auth_to_local 31 | DEFAULT 32 | 33 | 34 | hadoop.proxyuser.oozie.hosts 35 | * 36 | 37 | 38 | hadoop.proxyuser.oozie.groups 39 | * 40 | 41 | 42 | hadoop.proxyuser.flume.hosts 43 | * 44 | 45 | 46 | hadoop.proxyuser.flume.groups 47 | * 48 | 49 | 50 | hadoop.proxyuser.HTTP.hosts 51 | * 52 | 53 | 54 | hadoop.proxyuser.HTTP.groups 55 | * 56 | 57 | 58 | hadoop.proxyuser.hive.hosts 59 | * 60 | 61 | 62 | hadoop.proxyuser.hive.groups 63 | * 64 | 65 | 66 | hadoop.proxyuser.hue.hosts 67 | * 68 | 69 | 70 | hadoop.proxyuser.hue.groups 71 | * 72 | 73 | 74 | hadoop.proxyuser.httpfs.hosts 75 | * 76 | 77 | 78 | hadoop.proxyuser.httpfs.groups 79 | * 80 | 81 | 82 | hadoop.proxyuser.hdfs.groups 83 | * 84 | 85 | 86 | hadoop.proxyuser.hdfs.hosts 87 | * 88 | 89 | 90 | hadoop.proxyuser.yarn.hosts 91 | * 92 | 93 | 94 | hadoop.proxyuser.yarn.groups 95 | * 96 | 97 | 98 | hadoop.security.group.mapping 99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping 100 | 101 | 102 | hadoop.security.instrumentation.requires.admin 103 | false 104 | 105 | 106 | net.topology.script.file.name 107 | /etc/hadoop/conf.cloudera.yarn/topology.py 108 | 109 | 110 | io.file.buffer.size 111 | 65536 112 | 113 | 114 | hadoop.ssl.enabled 115 | false 116 | 117 | 118 | hadoop.ssl.require.client.cert 119 | false 120 | true 121 | 122 | 123 | hadoop.ssl.keystores.factory.class 124 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 125 | true 126 | 127 | 128 | hadoop.ssl.server.conf 129 | ssl-server.xml 130 | true 131 | 132 | 133 | hadoop.ssl.client.conf 134 | ssl-client.xml 135 | true 136 | 137 | 138 | fs.protected.directories 139 | /testdir 140 | 141 | 142 | ipc.maximum.data.length 143 | 268435456 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/main/TestResources/saic_cdh6_dev/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | dfs.client.failover.proxy.provider.nameservice1 8 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 9 | 10 | 11 | dfs.ha.automatic-failover.enabled.nameservice1 12 | true 13 | 14 | 15 | ha.zookeeper.quorum 16 | njdev-nn01.nj:2181,njdev-nn02.nj:2181,njdev-nn03.nj:2181 17 | 18 | 19 | dfs.ha.namenodes.nameservice1 20 | namenode82,namenode89 21 | 22 | 23 | dfs.namenode.rpc-address.nameservice1.namenode82 24 | njdev-nn01.nj:8020 25 | 26 | 27 | dfs.namenode.servicerpc-address.nameservice1.namenode82 28 | njdev-nn01.nj:8022 29 | 30 | 31 | dfs.namenode.http-address.nameservice1.namenode82 32 | njdev-nn01.nj:9870 33 | 34 | 35 | dfs.namenode.https-address.nameservice1.namenode82 36 | njdev-nn01.nj:9871 37 | 38 | 39 | dfs.namenode.rpc-address.nameservice1.namenode89 40 | njdev-nn02.nj:8020 41 | 42 | 43 | dfs.namenode.servicerpc-address.nameservice1.namenode89 44 | njdev-nn02.nj:8022 45 | 46 | 47 | dfs.namenode.http-address.nameservice1.namenode89 48 | njdev-nn02.nj:9870 49 | 50 | 51 | dfs.namenode.https-address.nameservice1.namenode89 52 | njdev-nn02.nj:9871 53 | 54 | 55 | dfs.replication 56 | 2 57 | 58 | 59 | dfs.blocksize 60 | 134217728 61 | 62 | 63 | dfs.client.use.datanode.hostname 64 | false 65 | 66 | 67 | fs.permissions.umask-mode 68 | 022 69 | 70 | 71 | dfs.client.block.write.locateFollowingBlock.retries 72 | 7 73 | 74 | 75 | dfs.namenode.acls.enabled 76 | true 77 | 78 | 79 | dfs.client.read.shortcircuit 80 | true 81 | 82 | 83 | dfs.domain.socket.path 84 | /var/run/hdfs-sockets/dn 85 | 86 | 87 | dfs.client.read.shortcircuit.skip.checksum 88 | false 89 | 90 | 91 | dfs.client.domain.socket.data.traffic 92 | false 93 | 94 | 95 | dfs.datanode.hdfs-blocks-metadata.enabled 96 | true 97 | 98 | 99 | dfs.nameservices 100 | nameservice1,njdev-cdh5 101 | 102 | 103 | dfs.ha.namenodes.njdev-cdh5 104 | nn1,nn2 105 | 106 | 107 | dfs.namenode.rpc-address.njdev-cdh5.nn1 108 | 172.16.10.148:8020 109 | 110 | 111 | dfs.namenode.rpc-address.njdev-cdh5.nn2 112 | 172.16.10.149:8020 113 | 114 | 115 | dfs.namenode.http-address.njdev-cdh5.nn1 116 | 172.16.10.148:50070 117 | 118 | 119 | dfs.namenode.http-address.njdev-cdh5.nn2 120 | 172.16.10.149:50070 121 | 122 | 123 | dfs.client.failover.proxy.provider.njdev-cdh5 124 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 125 | 126 | 127 | -------------------------------------------------------------------------------- /src/main/TestResources/saic_dev/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://nameservice1 8 | 9 | 10 | fs.trash.interval 11 | 1 12 | 13 | 14 | io.compression.codecs 15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec 16 | 17 | 18 | hadoop.security.authentication 19 | simple 20 | 21 | 22 | hadoop.security.authorization 23 | false 24 | 25 | 26 | hadoop.rpc.protection 27 | authentication 28 | 29 | 30 | hadoop.security.auth_to_local 31 | DEFAULT 32 | 33 | 34 | hadoop.proxyuser.oozie.hosts 35 | * 36 | 37 | 38 | hadoop.proxyuser.oozie.groups 39 | * 40 | 41 | 42 | hadoop.proxyuser.mapred.hosts 43 | * 44 | 45 | 46 | hadoop.proxyuser.mapred.groups 47 | * 48 | 49 | 50 | hadoop.proxyuser.flume.hosts 51 | * 52 | 53 | 54 | hadoop.proxyuser.flume.groups 55 | * 56 | 57 | 58 | hadoop.proxyuser.HTTP.hosts 59 | * 60 | 61 | 62 | hadoop.proxyuser.HTTP.groups 63 | * 64 | 65 | 66 | hadoop.proxyuser.hive.hosts 67 | * 68 | 69 | 70 | hadoop.proxyuser.hive.groups 71 | * 72 | 73 | 74 | hadoop.proxyuser.hue.hosts 75 | * 76 | 77 | 78 | hadoop.proxyuser.hue.groups 79 | * 80 | 81 | 82 | hadoop.proxyuser.httpfs.hosts 83 | * 84 | 85 | 86 | hadoop.proxyuser.httpfs.groups 87 | * 88 | 89 | 90 | hadoop.proxyuser.hdfs.groups 91 | * 92 | 93 | 94 | hadoop.proxyuser.hdfs.hosts 95 | * 96 | 97 | 98 | hadoop.proxyuser.yarn.hosts 99 | * 100 | 101 | 102 | hadoop.proxyuser.yarn.groups 103 | * 104 | 105 | 106 | hadoop.security.group.mapping 107 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping 108 | 109 | 110 | hadoop.security.instrumentation.requires.admin 111 | false 112 | 113 | 114 | net.topology.script.file.name 115 | /etc/hadoop/conf.cloudera.yarn/topology.py 116 | 117 | 118 | io.file.buffer.size 119 | 65536 120 | 121 | 122 | hadoop.ssl.enabled 123 | false 124 | 125 | 126 | hadoop.ssl.require.client.cert 127 | false 128 | true 129 | 130 | 131 | hadoop.ssl.keystores.factory.class 132 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 133 | true 134 | 135 | 136 | hadoop.ssl.server.conf 137 | ssl-server.xml 138 | true 139 | 140 | 141 | hadoop.ssl.client.conf 142 | ssl-client.xml 143 | true 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/main/TestResources/saic_dev/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | dfs.nameservices 7 | nameservice1 8 | 9 | 10 | dfs.client.failover.proxy.provider.nameservice1 11 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 12 | 13 | 14 | dfs.ha.automatic-failover.enabled.nameservice1 15 | true 16 | 17 | 18 | ha.zookeeper.quorum 19 | njdev-cdh5-dn01.nj:2181,njdev-cdh5-nn01.nj:2181,njdev-cdh5-nn02.nj:2181 20 | 21 | 22 | dfs.ha.namenodes.nameservice1 23 | namenode98,namenode62 24 | 25 | 26 | dfs.namenode.rpc-address.nameservice1.namenode98 27 | njdev-cdh5-nn01.nj:8020 28 | 29 | 30 | dfs.namenode.servicerpc-address.nameservice1.namenode98 31 | njdev-cdh5-nn01.nj:8022 32 | 33 | 34 | dfs.namenode.http-address.nameservice1.namenode98 35 | njdev-cdh5-nn01.nj:50070 36 | 37 | 38 | dfs.namenode.https-address.nameservice1.namenode98 39 | njdev-cdh5-nn01.nj:50470 40 | 41 | 42 | dfs.namenode.rpc-address.nameservice1.namenode62 43 | njdev-cdh5-nn02.nj:8020 44 | 45 | 46 | dfs.namenode.servicerpc-address.nameservice1.namenode62 47 | njdev-cdh5-nn02.nj:8022 48 | 49 | 50 | dfs.namenode.http-address.nameservice1.namenode62 51 | njdev-cdh5-nn02.nj:50070 52 | 53 | 54 | dfs.namenode.https-address.nameservice1.namenode62 55 | njdev-cdh5-nn02.nj:50470 56 | 57 | 58 | dfs.replication 59 | 2 60 | 61 | 62 | dfs.blocksize 63 | 134217728 64 | 65 | 66 | dfs.client.use.datanode.hostname 67 | false 68 | 69 | 70 | fs.permissions.umask-mode 71 | 022 72 | 73 | 74 | dfs.namenode.acls.enabled 75 | false 76 | 77 | 78 | dfs.client.use.legacy.blockreader 79 | false 80 | 81 | 82 | dfs.client.read.shortcircuit 83 | true 84 | 85 | 86 | dfs.domain.socket.path 87 | /var/run/hdfs-sockets/dn 88 | 89 | 90 | dfs.client.read.shortcircuit.skip.checksum 91 | false 92 | 93 | 94 | dfs.client.domain.socket.data.traffic 95 | false 96 | 97 | 98 | dfs.datanode.hdfs-blocks-metadata.enabled 99 | true 100 | 101 | 102 | -------------------------------------------------------------------------------- /src/main/TestResources/weekend/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | fs.defaultFS 22 | hdfs://ns1 23 | 24 | 25 | ha.zookeeper.quorum 26 | weekend110:2181,weekend111:2181,weekend112:2181 27 | 28 | 29 | io.file.buffer.size 30 | 131072 31 | 32 | 33 | 34 | hadoop.tmp.dir 35 | /app/hadoop-2.9.2/tmp 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/main/TestResources/weekend/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | dfs.nameservices 23 | ns1 24 | 25 | 26 | 27 | dfs.ha.namenodes.ns1 28 | nn1,nn2 29 | 30 | 31 | 32 | dfs.namenode.rpc-address.ns1.nn1 33 | weekend110:9000 34 | 35 | 36 | 37 | dfs.namenode.http-address.ns1.nn1 38 | weekend110:50070 39 | 40 | 41 | 42 | dfs.namenode.rpc-address.ns1.nn2 43 | weekend111:9000 44 | 45 | 46 | 47 | dfs.namenode.http-address.ns1.nn2 48 | weekend111:50070 49 | 50 | 51 | 52 | dfs.namenode.shared.edits.dir 53 | qjournal://weekend110:8485;weekend111:8485;weekend112:8485/ns1 54 | 55 | 56 | 57 | dfs.journalnode.edits.dir 58 | /app/hadoop-2.9.2/hdfs/journal 59 | 60 | 61 | dfs.namenode.name.dir 62 | /app/hadoop-2.9.2/hdfs/name 63 | 64 | 65 | dfs.datanode.data.dir 66 | /app/hadoop-2.9.2/hdfs/data 67 | 68 | 69 | 70 | dfs.ha.automatic-failover.enabled 71 | true 72 | 73 | 74 | 75 | dfs.client.failover.proxy.provider.ns1 76 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 77 | 78 | 79 | 80 | dfs.ha.fencing.methods 81 | 82 | sshfence 83 | shell(/bin/true) 84 | 85 | 86 | 87 | 88 | dfs.ha.fencing.ssh.private-key-files 89 | /root/.ssh/id_rsa 90 | 91 | 92 | 93 | dfs.ha.fencing.ssh.connect-timeout 94 | 30000 95 | 96 | 97 | -------------------------------------------------------------------------------- /src/main/TestResources/weekend/mapred-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | 17 | 18 | 19 | 20 | 21 | mapreduce.framework.name 22 | yarn 23 | 24 | 25 | 26 | mapreduce.jobhistory.address 27 | weekend111:10020 28 | 29 | 30 | 31 | mapreduce.jobhistory.webapp.address 32 | weekend111:19888 33 | 34 | 35 | 36 | yarn.app.mapreduce.am.staging-dir 37 | /history 38 | 39 | 40 | 41 | mapreduce.map.log.level 42 | INFO 43 | 44 | 45 | mapreduce.reduce.log.level 46 | INFO 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/main/TestResources/weekend/yarn-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 17 | 18 | yarn.resourcemanager.ha.enabled 19 | true 20 | 21 | 22 | 23 | yarn.resourcemanager.cluster-id 24 | yrc 25 | 26 | 27 | 28 | yarn.resourcemanager.ha.rm-ids 29 | rm1,rm2 30 | 31 | 32 | 33 | yarn.resourcemanager.hostname.rm1 34 | weekend111 35 | 36 | 37 | yarn.resourcemanager.hostname.rm2 38 | weekend112 39 | 40 | 41 | yarn.resourcemanager.webapp.address.rm1 42 | weekend111:8088 43 | 44 | 45 | yarn.resourcemanager.webapp.address.rm2 46 | weekend112:8088 47 | 48 | 49 | 50 | yarn.resourcemanager.zk-address 51 | weekend110:2181,weekend111:2181,weekend112:2181 52 | 53 | 54 | yarn.nodemanager.aux-services 55 | mapreduce_shuffle 56 | 57 | 58 | yarn.nodemanager.vmem-check-enabled 59 | false 60 | Whether virtual memory limits will be enforced for containers 61 | 62 | 63 | yarn.nodemanager.vmem-pmem-ratio 64 | 4 65 | Ratio between virtual memory to physical memory when setting memory limits for containers 66 | 67 | 68 | -------------------------------------------------------------------------------- /src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | fs.defaultFS 7 | hdfs://nameservice1 8 | 9 | 10 | fs.trash.interval 11 | 1 12 | 13 | 14 | io.compression.codecs 15 | org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec 16 | 17 | 18 | hadoop.security.authentication 19 | simple 20 | 21 | 22 | hadoop.security.authorization 23 | false 24 | 25 | 26 | hadoop.rpc.protection 27 | authentication 28 | 29 | 30 | hadoop.security.auth_to_local 31 | DEFAULT 32 | 33 | 34 | hadoop.proxyuser.oozie.hosts 35 | * 36 | 37 | 38 | hadoop.proxyuser.oozie.groups 39 | * 40 | 41 | 42 | hadoop.proxyuser.flume.hosts 43 | * 44 | 45 | 46 | hadoop.proxyuser.flume.groups 47 | * 48 | 49 | 50 | hadoop.proxyuser.HTTP.hosts 51 | * 52 | 53 | 54 | hadoop.proxyuser.HTTP.groups 55 | * 56 | 57 | 58 | hadoop.proxyuser.hive.hosts 59 | * 60 | 61 | 62 | hadoop.proxyuser.hive.groups 63 | * 64 | 65 | 66 | hadoop.proxyuser.hue.hosts 67 | * 68 | 69 | 70 | hadoop.proxyuser.hue.groups 71 | * 72 | 73 | 74 | hadoop.proxyuser.httpfs.hosts 75 | * 76 | 77 | 78 | hadoop.proxyuser.httpfs.groups 79 | * 80 | 81 | 82 | hadoop.proxyuser.hdfs.groups 83 | * 84 | 85 | 86 | hadoop.proxyuser.hdfs.hosts 87 | * 88 | 89 | 90 | hadoop.proxyuser.yarn.hosts 91 | * 92 | 93 | 94 | hadoop.proxyuser.yarn.groups 95 | * 96 | 97 | 98 | hadoop.security.group.mapping 99 | org.apache.hadoop.security.ShellBasedUnixGroupsMapping 100 | 101 | 102 | hadoop.security.instrumentation.requires.admin 103 | false 104 | 105 | 106 | net.topology.script.file.name 107 | /etc/hadoop/conf.cloudera.yarn/topology.py 108 | 109 | 110 | io.file.buffer.size 111 | 65536 112 | 113 | 114 | hadoop.ssl.enabled 115 | false 116 | 117 | 118 | hadoop.ssl.require.client.cert 119 | false 120 | true 121 | 122 | 123 | hadoop.ssl.keystores.factory.class 124 | org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory 125 | true 126 | 127 | 128 | hadoop.ssl.server.conf 129 | ssl-server.xml 130 | true 131 | 132 | 133 | hadoop.ssl.client.conf 134 | ssl-client.xml 135 | true 136 | 137 | 138 | fs.protected.directories 139 | /testdir 140 | 141 | 142 | ipc.maximum.data.length 143 | 268435456 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/main/resources/env.properties: -------------------------------------------------------------------------------- 1 | env=dev 2 | path=/root/script/jars/Pi.class 3 | zookeeper.connect=${zookeeper.connect} 4 | bootstrap.servers=${bootstrap.servers} 5 | es.protocol=http 6 | es.hosts=172.16.10.89:9200,172.16.10.75:9200,172.16.10.90:9200 7 | es.username=elastic 8 | es.password=Es#172.10 9 | #es.hosts=172.16.11.104:9200,172.16.11.66:9200,172.16.11.67:9200 10 | # Authorization: Basic ZWxhc3RpYzpFcyMxNzIuMTA= -------------------------------------------------------------------------------- /src/main/resources/hdfs-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | dfs.client.failover.proxy.provider.nameservice1 8 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 9 | 10 | 11 | dfs.ha.automatic-failover.enabled.nameservice1 12 | true 13 | 14 | 15 | ha.zookeeper.quorum 16 | njdev-nn01.nj:2181,njdev-nn02.nj:2181,njdev-nn03.nj:2181 17 | 18 | 19 | dfs.ha.namenodes.nameservice1 20 | namenode82,namenode89 21 | 22 | 23 | dfs.namenode.rpc-address.nameservice1.namenode82 24 | njdev-nn01.nj:8020 25 | 26 | 27 | dfs.namenode.servicerpc-address.nameservice1.namenode82 28 | njdev-nn01.nj:8022 29 | 30 | 31 | dfs.namenode.http-address.nameservice1.namenode82 32 | njdev-nn01.nj:9870 33 | 34 | 35 | dfs.namenode.https-address.nameservice1.namenode82 36 | njdev-nn01.nj:9871 37 | 38 | 39 | dfs.namenode.rpc-address.nameservice1.namenode89 40 | njdev-nn02.nj:8020 41 | 42 | 43 | dfs.namenode.servicerpc-address.nameservice1.namenode89 44 | njdev-nn02.nj:8022 45 | 46 | 47 | dfs.namenode.http-address.nameservice1.namenode89 48 | njdev-nn02.nj:9870 49 | 50 | 51 | dfs.namenode.https-address.nameservice1.namenode89 52 | njdev-nn02.nj:9871 53 | 54 | 55 | dfs.replication 56 | 2 57 | 58 | 59 | dfs.blocksize 60 | 134217728 61 | 62 | 63 | dfs.client.use.datanode.hostname 64 | false 65 | 66 | 67 | fs.permissions.umask-mode 68 | 022 69 | 70 | 71 | dfs.client.block.write.locateFollowingBlock.retries 72 | 7 73 | 74 | 75 | dfs.namenode.acls.enabled 76 | true 77 | 78 | 79 | dfs.client.read.shortcircuit 80 | true 81 | 82 | 83 | dfs.domain.socket.path 84 | /var/run/hdfs-sockets/dn 85 | 86 | 87 | dfs.client.read.shortcircuit.skip.checksum 88 | false 89 | 90 | 91 | dfs.client.domain.socket.data.traffic 92 | false 93 | 94 | 95 | dfs.datanode.hdfs-blocks-metadata.enabled 96 | true 97 | 98 | 99 | dfs.nameservices 100 | nameservice1,njdev-cdh5 101 | 102 | 103 | dfs.ha.namenodes.njdev-cdh5 104 | nn1,nn2 105 | 106 | 107 | dfs.namenode.rpc-address.njdev-cdh5.nn1 108 | 172.16.10.148:8020 109 | 110 | 111 | dfs.namenode.rpc-address.njdev-cdh5.nn2 112 | 172.16.10.149:8020 113 | 114 | 115 | dfs.namenode.http-address.njdev-cdh5.nn1 116 | 172.16.10.148:50070 117 | 118 | 119 | dfs.namenode.http-address.njdev-cdh5.nn2 120 | 172.16.10.149:50070 121 | 122 | 123 | dfs.client.failover.proxy.provider.njdev-cdh5 124 | org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 125 | 126 | 127 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | 22 | 23 | log4j.appender.console=org.apache.log4j.ConsoleAppender 24 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 25 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 26 | 27 | log4j.logger.org.apache.flink=INFO -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataSet/JDBC/ReadFromJDBCInputFormat.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataSet.JDBC 2 | 3 | import java.sql.Date 4 | 5 | import org.apache.flink.api.common.typeinfo.TypeInformation 6 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat 7 | import org.apache.flink.api.java.typeutils.RowTypeInfo 8 | import org.apache.flink.api.scala.ExecutionEnvironment 9 | import org.apache.flink.api.scala._ 10 | import org.apache.flink.api.scala.typeutils.Types 11 | import org.apache.flink.types.Row 12 | 13 | /** 14 | * @Author yyb 15 | * @Description 16 | * @Date Create in 2020-04-26 17 | * @Time 09:52 18 | */ 19 | object ReadFromJDBCInputFormat { 20 | def main(args: Array[String]): Unit = { 21 | val env = ExecutionEnvironment.getExecutionEnvironment 22 | 23 | val types = Array[TypeInformation[_]](Types.INT, Types.STRING, Types.SQL_DATE) 24 | val fields = Array[String]("id", "name", "time") 25 | val typeInfo = new RowTypeInfo(types, fields) 26 | 27 | val jdbcInformat: JDBCInputFormat = JDBCInputFormat.buildJDBCInputFormat() 28 | .setAutoCommit(false) 29 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 30 | .setDrivername("com.mysql.jdbc.Driver") 31 | .setUsername("root") 32 | .setPassword("111111") 33 | .setQuery("select * from t_order") 34 | .setRowTypeInfo(typeInfo) 35 | .finish() 36 | 37 | val AUX_TABLE: DataSet[Row] = env.createInput(jdbcInformat) 38 | 39 | AUX_TABLE.print() 40 | 41 | val orderDataSet: DataSet[Order] = AUX_TABLE.map(x => Order(x.getField(0).asInstanceOf[Int], x.getField(1).asInstanceOf[String], x.getField(2).asInstanceOf[Date])) 42 | orderDataSet.print() 43 | 44 | // env.execute("ReadFromJDBCInputFormat") 45 | 46 | } 47 | 48 | case class Order(id:Int, name:String, time:Date) 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataSet/JDBC/WriteToMysqlByOutputformat.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataSet.JDBC 2 | 3 | import com.yyb.flink10.DataStream.data.WordCountData 4 | import com.yyb.flink10.DataStream.parquet.WriteParquetWordCount.WORDCOUNT 5 | import org.apache.flink.api.java.io.jdbc.JDBCOutputFormat 6 | import org.apache.flink.api.scala.ExecutionEnvironment 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.types.Row 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-04-26 14 | * @Time 10:32 15 | */ 16 | object WriteToMysqlByOutputformat { 17 | def main(args: Array[String]): Unit = { 18 | val env = ExecutionEnvironment.getExecutionEnvironment 19 | 20 | val text = env.fromElements(WordCountData.WORDS: _*) 21 | val counts: DataSet[WORDCOUNT] = text.flatMap(_.toLowerCase.split("\\W+")) 22 | .filter(_.nonEmpty) 23 | .map(WORDCOUNT(_, 1)) 24 | .groupBy(0) 25 | .sum(1) 26 | val countRecord: DataSet[Row] = counts.map(x => Row.of(x.word, x.count.asInstanceOf[Integer])) 27 | 28 | val mysqlOutput: JDBCOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat() 29 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 30 | .setDrivername("com.mysql.jdbc.Driver") 31 | .setUsername("root") 32 | .setPassword("111111") 33 | .setQuery("insert into wordcount (word, count) values(?, ?)") //注意这里是 mysql 的插入语句 34 | .setSqlTypes(Array(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)) //这里是每行数据的 类型 35 | .finish() 36 | 37 | countRecord.output(mysqlOutput) 38 | 39 | 40 | env.execute("WriteToMysqlByOutputformat") 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataSet/WordCount.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataSet 2 | 3 | import org.apache.flink.api.scala.ExecutionEnvironment 4 | import org.apache.flink.api.scala._ 5 | 6 | /** 7 | * @Author yyb 8 | * @Description 9 | * @Date Create in 2020-04-15 10 | * @Time 17:00 11 | */ 12 | object WordCount { 13 | def main(args: Array[String]): Unit = { 14 | val env = ExecutionEnvironment.getExecutionEnvironment 15 | 16 | val filePtah = "/Users/yyb/Downloads/1.txt" 17 | val filepathtosave = "/Users/yyb/Downloads/1_rs.csv" 18 | val text = env.readTextFile(filePtah) 19 | val wordCounts = text.flatMap(_.toLowerCase.split("\\W+") filter { _.nonEmpty}) 20 | .map((_, 1)) 21 | .groupBy(0) 22 | .sum(1) 23 | wordCounts.setParallelism(1).print() 24 | wordCounts.setParallelism(1).writeAsCsv(filepathtosave) 25 | 26 | 27 | env.execute("WordCount") 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataSet/kafka/SendData2KafkaByKafkaOutputFormat.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataSet.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat 7 | import com.yyb.flink10.sink.KafkaBatchTableSink 8 | import org.apache.flink.api.scala._ 9 | import org.apache.flink.formats.json.JsonRowSerializationSchema 10 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord 11 | import org.apache.flink.kafka011.shaded.org.apache.kafka.common.serialization.StringSerializer 12 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer 13 | import org.apache.flink.table.api.scala.BatchTableEnvironment 14 | import org.apache.flink.table.api.{DataTypes, Table, TableSchema} 15 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema} 16 | 17 | 18 | /** 19 | * @Author yyb 20 | * @Description 21 | * @Date Create in 2020-07-28 22 | * @Time 16:12 23 | */ 24 | object SendData2KafkaByKafkaOutputFormat { 25 | def main(args: Array[String]): Unit = { 26 | val env = ExecutionEnvironment.getExecutionEnvironment 27 | // val blinkTableEnv = BatchTableEnvironment.create(env) 28 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 29 | val prop: Properties = new Properties() 30 | prop.load(in_env) 31 | println(prop.getProperty("zookeeper.connect")) 32 | 33 | 34 | 35 | val kafka = new Kafka 36 | kafka.version("0.11") 37 | .topic("eventsource_yhj") 38 | .property("zookeeper.connect", prop.getProperty("zookeeper.connect")) 39 | .property("bootstrap.servers", prop.getProperty("bootstrap.servers")). 40 | property("group.id", "yyb_dev") 41 | .startFromLatest 42 | 43 | 44 | val schema = new Schema 45 | val tableSchema1 = TableSchema.builder 46 | .field("amount", DataTypes.INT) 47 | .field("currency", DataTypes.STRING).build 48 | schema.schema(tableSchema1) 49 | 50 | // val tableSource = blinkTableEnv.connect(kafka) 51 | // .withFormat(new Json().failOnMissingField(true)) 52 | // .withSchema(schema) 53 | // tableSource.createTemporaryTable("Orders_tmp") 54 | 55 | val schemaString = new JsonRowSerializationSchema.Builder(tableSchema1.toRowType) 56 | val kafkaProp = new Properties(); 57 | kafkaProp.put("key.serializer", classOf[StringSerializer]) 58 | kafkaProp.put("value.serializer", classOf[StringSerializer]) 59 | kafkaProp.put("zookeeper.connect", prop.getProperty("zookeeper.connect")) 60 | kafkaProp.put("bootstrap.servers", prop.getProperty("bootstrap.servers")) 61 | kafkaProp.put("topic", "eventsource_yhj") 62 | 63 | val kafkaProducer = new FlinkKafkaProducer[String, String](kafkaProp) 64 | val data = Array(Current(1, "Euro")) 65 | 66 | val dataDS = env.fromCollection(data) 67 | 68 | val datasOfRecord: DataSet[ProducerRecord[String, String]] = dataDS.map(x => { 69 | val record: ProducerRecord[String, String] = new ProducerRecord[String, String]("eventsource_yhj", x.toString) 70 | record 71 | }) 72 | 73 | 74 | 75 | 76 | /** 77 | * 这里的 发送数据 到 kafka是 先 collect 到 driver 才 发送的,所以不是 分布式的处理方法 78 | * 需要调优 79 | */ 80 | // datasOfRecord.collect().foreach(kafkaProducer.send(_)) 81 | // kafkaProducer.flush() 82 | 83 | /** 84 | * 这里使用的是 dataset 的 kafkaOutputFormat 85 | */ 86 | val kafkaOutputFormat = new KafkaOutputFormat(kafkaProp); 87 | dataDS.map(x => x.toString).output(kafkaOutputFormat) 88 | 89 | // val dataTable: Table = blinkTableEnv.fromDataSet(dataDS.map(_.toString)) 90 | 91 | // blinkTableEnv.registerTable("dataSource", dataTable) 92 | 93 | // val kafkaBatchTableSink = new KafkaBatchTableSink(kafkaOutputFormat); 94 | // blinkTableEnv.registerTableSink("kafkaBatchTableSink", kafkaBatchTableSink) 95 | 96 | var sql = 97 | """ 98 | |insert into kafkaBatchTableSink select * from dataSource 99 | |""".stripMargin 100 | //因为 kafka 是 无界的, 所以不能使用 batch 模式 的 kafkatablesink 101 | //BatchTableSink or OutputFormatTableSink required to emit batch Table. 102 | // blinkTableEnv.sqlUpdate(sql) 103 | 104 | // dataTable.insertInto("Orders_tmp") 105 | 106 | 107 | env.execute("SendData2KafkaByKafkaConnector") 108 | } 109 | 110 | case class Current(amount:Int, currency:String){ 111 | override def toString: String = { 112 | s"""{"amount":"${amount}",currency:"${currency}"}""".stripMargin 113 | } 114 | 115 | def toBytes(): Array[Byte] ={ 116 | toString.getBytes() 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/State/StateOfCountWindowAverage.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.State; 2 | 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 4 | import org.apache.flink.api.common.state.ValueState; 5 | import org.apache.flink.api.common.state.ValueStateDescriptor; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.api.java.tuple.Tuple3; 10 | import org.apache.flink.configuration.Configuration; 11 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.util.Collector; 15 | 16 | /** 17 | * @Author yyb 18 | * @Description 19 | * @Date Create in 2020-08-03 20 | * @Time 14:52 21 | */ 22 | public class StateOfCountWindowAverage { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | DataStreamSource> soure = env.fromElements( 27 | Tuple2.of(1L, 3L), Tuple2.of(1L, 5L), Tuple2.of(1L, 7L), 28 | Tuple2.of(1L, 4L), Tuple2.of(1L, 2L), Tuple2.of(2L, 2L), 29 | Tuple2.of(2L, 4L) 30 | 31 | ); 32 | 33 | // soure.keyBy(0).flatMap(new CountWindowAverage()) 34 | // .print(); 35 | 36 | SingleOutputStreamOperator> rs1 = soure.keyBy(0).flatMap(new GroupSum()); 37 | rs1.print(); 38 | 39 | 40 | env.execute("StateOfCountWindowAverage"); 41 | 42 | 43 | } 44 | 45 | /** 46 | * 为什么要用 flatMap ,flatMap 有 抽取一层数据的意思,这个 把 2 个元素 计算 输出了 1个元素 47 | */ 48 | static class CountWindowAverage extends RichFlatMapFunction, Tuple2>{ 49 | private transient ValueState> sum; 50 | 51 | @Override 52 | public void flatMap(Tuple2 value, Collector> out) throws Exception { 53 | Tuple2 currentSum = sum.value(); 54 | currentSum.f0 = currentSum.f0 + 1; 55 | currentSum.f1 = currentSum.f1 + value.f1; 56 | sum.update(currentSum); 57 | if(currentSum.f0 >=2){ //这里遇到 2 个 一组的元素 求均值后 ,输出 ,清空 sum;如果需要求这一组的 平均值,则不需要这里 58 | out.collect(new Tuple2<>(value.f0, currentSum.f1 / currentSum.f0)); 59 | sum.clear(); 60 | } 61 | } 62 | 63 | @Override 64 | public void open(Configuration parameters) throws Exception { 65 | //初始化 和 get sum ValueState 66 | ValueStateDescriptor> descriptor = 67 | new ValueStateDescriptor<>( 68 | "average", // the state name 69 | TypeInformation.of(new TypeHint>() {}), // type information 70 | Tuple2.of(0L, 0L)); // default value of the state, if nothing was set 71 | sum = getRuntimeContext().getState(descriptor); 72 | } 73 | } 74 | 75 | static class GroupSum extends RichFlatMapFunction, Tuple3>{ 76 | //这里好输出的类型对应 77 | private transient ValueState> sum; 78 | 79 | //第一个字段 输入的类型 80 | //第二个字段 输出的类型 81 | @Override 82 | public void flatMap(Tuple2 value, Collector> out) throws Exception { 83 | Tuple3 currentSum = sum.value(); 84 | currentSum.f0 = value.f0; 85 | currentSum.f1 = currentSum.f0 + 1; 86 | currentSum.f2 = currentSum.f2 + value.f1; 87 | sum.update(currentSum); 88 | out.collect(Tuple3.of(currentSum.f0, currentSum.f1, currentSum.f2)); //注意这里每一条数据都会返回出去,所以这个不适合 RichFlatMapFunction 做 聚合的 操作的 89 | } 90 | 91 | @Override 92 | public void open(Configuration parameters) throws Exception { 93 | ValueStateDescriptor> descriptor = new ValueStateDescriptor<>( 94 | "sum", 95 | TypeInformation.of(new TypeHint>() { //和输出类型对应 96 | }), 97 | Tuple3.of(0L, 0L, 0L) 98 | ); 99 | 100 | sum = getRuntimeContext().getState(descriptor); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/data/WordCountData.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.data; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-04-15 7 | * @Time 21:52 8 | */ 9 | public class WordCountData { 10 | public static final String[] WORDS = new String[] { 11 | "To be, or not to be,--that is the question:--", 12 | "Whether 'tis nobler in the mind to suffer", 13 | "The slings and arrows of outrageous fortune", 14 | "Or to take arms against a sea of troubles,", 15 | "And by opposing end them?--To die,--to sleep,--", 16 | "No more; and by a sleep to say we end", 17 | "The heartache, and the thousand natural shocks", 18 | "That flesh is heir to,--'tis a consummation", 19 | "Devoutly to be wish'd. To die,--to sleep;--", 20 | "To sleep! perchance to dream:--ay, there's the rub;", 21 | "For in that sleep of death what dreams may come,", 22 | "When we have shuffled off this mortal coil,", 23 | "Must give us pause: there's the respect", 24 | "That makes calamity of so long life;", 25 | "For who would bear the whips and scorns of time,", 26 | "The oppressor's wrong, the proud man's contumely,", 27 | "The pangs of despis'd love, the law's delay,", 28 | "The insolence of office, and the spurns", 29 | "That patient merit of the unworthy takes,", 30 | "When he himself might his quietus make", 31 | "With a bare bodkin? who would these fardels bear,", 32 | "To grunt and sweat under a weary life,", 33 | "But that the dread of something after death,--", 34 | "The undiscover'd country, from whose bourn", 35 | "No traveller returns,--puzzles the will,", 36 | "And makes us rather bear those ills we have", 37 | "Than fly to others that we know not of?", 38 | "Thus conscience does make cowards of us all;", 39 | "And thus the native hue of resolution", 40 | "Is sicklied o'er with the pale cast of thought;", 41 | "And enterprises of great pith and moment,", 42 | "With this regard, their currents turn awry,", 43 | "And lose the name of action.--Soft you now!", 44 | "The fair Ophelia!--Nymph, in thy orisons", 45 | "Be all my sins remember'd." 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/kafka/EventTimeDemo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.yyb.flink10.commonEntity.Current1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.streaming.api.CheckpointingMode; 7 | import org.apache.flink.streaming.api.TimeCharacteristic; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.streaming.api.functions.ProcessFunction; 11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 14 | import org.apache.flink.table.api.EnvironmentSettings; 15 | import org.apache.flink.table.api.Table; 16 | import org.apache.flink.table.api.java.StreamTableEnvironment; 17 | import org.apache.flink.types.Row; 18 | import org.apache.flink.util.Collector; 19 | 20 | import java.io.InputStream; 21 | import java.util.Properties; 22 | 23 | /** 24 | * @Author yyb 25 | * @Description 26 | * @Date Create in 2020-08-10 27 | * @Time 18:04 28 | */ 29 | public class EventTimeDemo { 30 | public static void main(String[] args) throws Exception { 31 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 34 | 35 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 36 | env.enableCheckpointing(3000); 37 | env.getCheckpointConfig().setTolerableCheckpointFailureNumber(3); 38 | env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); 39 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 40 | 41 | env.getConfig().setAutoWatermarkInterval(1000); 42 | 43 | InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties"); 44 | Properties prop = new Properties(); 45 | prop.load(in_env); 46 | 47 | 48 | 49 | Properties properties = new Properties(); 50 | properties.setProperty("bootstrap.servers", prop.getProperty("bootstrap.servers")); 51 | properties.setProperty("zookeeper.connect", prop.getProperty("zookeeper.connect")); 52 | properties.setProperty("group.id", "test"); 53 | 54 | FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011("eventsource_yyb", new SimpleStringSchema(), properties); 55 | DataStream stream = env.addSource(kafkaSource); 56 | 57 | 58 | DataStream currentDS = stream.process(new ProcessFunction() { 59 | @Override 60 | public void processElement(String value, Context ctx, Collector out) throws Exception { 61 | Current1 current1 = JSON.parseObject(value, Current1.class); 62 | out.collect(current1); 63 | } 64 | }); 65 | 66 | currentDS.assignTimestampsAndWatermarks(new TimestampExtractor(Time.seconds(0))); 67 | 68 | currentDS.print().setParallelism(1); 69 | 70 | // sql rowtime 71 | //注意 第一个 rowtime 是自己的 rowtime,user_action_time.rowtime才是 真正的 eventTime 72 | Table t = blinkTableEnv.fromDataStream(currentDS, "rowtime,amount,currency,user_action_time.rowtime"); 73 | 74 | DataStream tRow = blinkTableEnv.toAppendStream(t, Row.class); 75 | tRow.print().setParallelism(1); 76 | env.execute("EventTimeDemo"); 77 | 78 | 79 | } 80 | 81 | static class TimestampExtractor extends BoundedOutOfOrdernessTimestampExtractor { 82 | 83 | public TimestampExtractor(Time maxOutOfOrderness){ 84 | super(maxOutOfOrderness); 85 | } 86 | @Override 87 | public long extractTimestamp(Current1 element) { 88 | return Long.parseLong(element.getRowtime()); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnector.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import org.apache.flink.api.scala._ 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011 9 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema 10 | import org.apache.flink.table.api.scala.StreamTableEnvironment 11 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema} 12 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema} 13 | 14 | 15 | /** 16 | * @Author yyb 17 | * @Description 18 | * @Date Create in 2020-07-28 19 | * @Time 16:12 20 | */ 21 | object SendData2KafkaByKafkaConnector { 22 | def main(args: Array[String]): Unit = { 23 | val env = StreamExecutionEnvironment.getExecutionEnvironment 24 | 25 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 26 | val prop: Properties = new Properties() 27 | prop.load(in_env) 28 | 29 | val data = Array(Current(1, "Euro")) 30 | 31 | val dataDS: DataStream[Current] = env.fromCollection(data) 32 | 33 | 34 | val kafkaSink = new FlinkKafkaProducer011[String]( 35 | prop.getProperty("bootstrap.servers"), // broker list 36 | "eventsource_yyb", // target topic 37 | new SimpleStringSchema()); // serialization schema 38 | kafkaSink.setWriteTimestampToKafka(true) 39 | 40 | dataDS.map(_.toString).addSink(kafkaSink) 41 | 42 | 43 | env.execute("SendData2KafkaByKafkaConnector") 44 | } 45 | 46 | case class Current(amount:Int, currency:String) 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnectorBrowse.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import com.alibaba.fastjson.JSON 7 | import com.yyb.flink10.commonEntity.UserBrowseLog 8 | import org.apache.flink.api.scala._ 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011 11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema 12 | 13 | 14 | /** 15 | * @Author yyb 16 | * @Description 17 | * @Date Create in 2020-07-28 18 | * @Time 16:12 19 | */ 20 | object SendData2KafkaByKafkaConnectorBrowse { 21 | def main(args: Array[String]): Unit = { 22 | val env = StreamExecutionEnvironment.getExecutionEnvironment 23 | 24 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 25 | val prop: Properties = new Properties() 26 | prop.load(in_env) 27 | 28 | val data = Array( 29 | new UserBrowseLog("user_1", "2016-01-01 00:00:00", "browse", "product_5", 20, 0L), 30 | new UserBrowseLog("user_1", "2016-01-01 00:00:01", "browse", "product_5", 20, 0L), 31 | new UserBrowseLog("user_1", "2016-01-01 00:00:02", "browse", "product_5", 20, 0L), 32 | new UserBrowseLog("user_1", "2016-01-01 00:00:03", "browse", "product_5", 20, 0L), 33 | new UserBrowseLog("user_1", "2016-01-01 00:00:04", "browse", "product_5", 20, 0L), 34 | new UserBrowseLog("user_1", "2016-01-01 00:00:05", "browse", "product_5", 20, 0L), 35 | new UserBrowseLog("user_1", "2016-01-01 00:00:06", "browse", "product_5", 20, 0L), 36 | new UserBrowseLog("user_2", "2016-01-01 00:00:01", "browse", "product_3", 20, 0L), 37 | new UserBrowseLog("user_2", "2016-01-01 00:00:02", "browse", "product_3", 20, 0L), 38 | new UserBrowseLog("user_2", "2016-01-01 00:00:05", "browse", "product_3", 20, 0L), 39 | new UserBrowseLog("user_2", "2016-01-01 00:00:06", "browse", "product_3", 20, 0L) 40 | ) 41 | 42 | val dataDS: DataStream[UserBrowseLog] = env.fromCollection(data) 43 | 44 | 45 | val kafkaSink = new FlinkKafkaProducer011[String]( 46 | prop.getProperty("bootstrap.servers"), // broker list 47 | "eventsource_yyb_browse", // target topic 48 | new SimpleStringSchema()); // serialization schema 49 | kafkaSink.setWriteTimestampToKafka(true) 50 | 51 | dataDS.map(JSON.toJSON(_).toString).addSink(kafkaSink) 52 | 53 | 54 | env.execute("SendData2KafkaByKafkaConnectorBrowse") 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/kafka/SendData2KafkaByKafkaConnectorProduct.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import com.alibaba.fastjson.JSON 7 | import com.yyb.flink10.commonEntity.{ProductInfo, UserBrowseLog} 8 | import org.apache.flink.api.scala._ 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011 11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema 12 | 13 | 14 | /** 15 | * @Author yyb 16 | * @Description 17 | * @Date Create in 2020-07-28 18 | * @Time 16:12 19 | */ 20 | object SendData2KafkaByKafkaConnectorProduct { 21 | def main(args: Array[String]): Unit = { 22 | val env = StreamExecutionEnvironment.getExecutionEnvironment 23 | 24 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 25 | val prop: Properties = new Properties() 26 | prop.load(in_env) 27 | 28 | val data = Array( 29 | new ProductInfo("product_5", "name50", "category50", "2016-01-01 00:00:00", 0L), 30 | new ProductInfo("product_5", "name52", "category52", "2016-01-01 00:00:02", 0L), 31 | new ProductInfo("product_5", "name55", "category55", "2016-01-01 00:00:05", 0L), 32 | new ProductInfo("product_3", "name32", "category32", "2016-01-01 00:00:02", 0L), 33 | new ProductInfo("product_3", "name35", "category35", "2016-01-01 00:00:05", 0L) 34 | ) 35 | 36 | val dataDS: DataStream[ProductInfo] = env.fromCollection(data) 37 | 38 | 39 | val kafkaSink = new FlinkKafkaProducer011[String]( 40 | prop.getProperty("bootstrap.servers"), // broker list 41 | "eventsource_yyb_product", // target topic 42 | new SimpleStringSchema()); // serialization schema 43 | kafkaSink.setWriteTimestampToKafka(true) 44 | 45 | dataDS.map(JSON.toJSON(_).toString).addSink(kafkaSink) 46 | 47 | 48 | env.execute("SendData2KafkaByKafkaConnectorBrowse") 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/parquet/WriteParquetWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.parquet 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import com.yyb.flink10.DataStream.data.WordCountData 6 | import org.apache.flink.api.common.serialization.SimpleStringEncoder 7 | import org.apache.flink.api.java.utils.ParameterTool 8 | import org.apache.flink.core.fs.Path 9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters 10 | import org.apache.flink.runtime.state.StateBackend 11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend 12 | import org.apache.flink.streaming.api.CheckpointingMode 13 | import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup 14 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner 15 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy 16 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 17 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 18 | import org.apache.flink.streaming.api.scala._ 19 | 20 | /** 21 | * @Author yyb 22 | * @Description 23 | * @Date Create in 2020-04-16 24 | * @Time 09:53 25 | */ 26 | object WriteParquetWordCount { 27 | def main(args: Array[String]): Unit = { 28 | val env = StreamExecutionEnvironment.getExecutionEnvironment 29 | 30 | val params = ParameterTool.fromArgs(args) 31 | 32 | env.getConfig.setGlobalJobParameters(params) 33 | 34 | 35 | 36 | // env.enableCheckpointing(1000) 37 | // env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) 38 | // env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION) 39 | // // checkpoint错误次数,是否任务会失败 40 | // env.getCheckpointConfig.setTolerableCheckpointFailureNumber(3) 41 | // env.setStateBackend(new FsStateBackend("/tmp/xxx").asInstanceOf[StateBackend]) 42 | 43 | 44 | 45 | 46 | val config = OutputFileConfig 47 | .builder() 48 | .withPartPrefix("wordcount") 49 | .withPartSuffix(".exe") 50 | .build() 51 | 52 | val text = 53 | if(params.has("--input")){ 54 | env.readTextFile(params.get("--input")) 55 | }else{ 56 | println("Executing WordCount example with default inputs data set.") 57 | println("Use --input to specify file input.") 58 | // get default test text data 59 | env.fromElements(WordCountData.WORDS: _*) 60 | } 61 | 62 | val counts: DataStream[WORDCOUNT] = text.flatMap(_.toLowerCase.split("\\W+")) 63 | .filter(_.nonEmpty) 64 | .map(WORDCOUNT(_, 1)) 65 | .keyBy(0) 66 | .sum(1) 67 | 68 | if(params.has("--output")){ 69 | counts.writeAsText(params.get("--output")) 70 | //注意这里的 范型 要和 counts 的范型 一致 71 | val filesink: StreamingFileSink[WORDCOUNT] = StreamingFileSink 72 | .forBulkFormat(new Path(params.get("--output")), ParquetAvroWriters.forReflectRecord(classOf[WORDCOUNT])) 73 | .withBucketAssigner(new DateTimeBucketAssigner()) 74 | // .withRollingPolicy(OnCheckpointRollingPolicy.build()) 75 | // .withOutputFileConfig(config) // 设置输出文件的 前后缀 76 | .build() 77 | 78 | val sink: StreamingFileSink[WORDCOUNT] = StreamingFileSink 79 | 80 | .forRowFormat(new Path(""), new SimpleStringEncoder[WORDCOUNT]("UTF-8")) 81 | // .withBucketAssigner(new DateTimeBucketAssigner()) 82 | // .withRollingPolicy(OnCheckpointRollingPolicy.build()) 83 | 84 | // .withOutputFileConfig(config) 85 | .build() 86 | 87 | 88 | 89 | counts.addSink(filesink) 90 | 91 | }else{ 92 | println("Printing result to stdout. Use --output to specify output path.") 93 | counts.print() 94 | } 95 | 96 | env.execute("StreamWordCount") 97 | } 98 | 99 | case class WORDCOUNT(word:String, count:Int) 100 | } 101 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/JDBC/ReadFromInputFormat.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat 5 | import org.apache.flink.api.java.typeutils.RowTypeInfo 6 | import org.apache.flink.api.scala.typeutils.Types 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 8 | import org.apache.flink.streaming.api.scala._ 9 | import org.apache.flink.types.Row 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-04-29 15 | * @Time 16:25 16 | */ 17 | object ReadFromInputFormat { 18 | def main(args: Array[String]): Unit = { 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | 21 | val types = Array[TypeInformation[_]](Types.INT, Types.STRING, Types.SQL_DATE) 22 | val fields = Array[String]("id", "name", "time") 23 | val typeInfo = new RowTypeInfo(types, fields) 24 | val jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat() 25 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 26 | .setDrivername("com.mysql.jdbc.Driver") 27 | .setUsername("root") 28 | .setPassword("111111") 29 | .setQuery("select * from t_order") 30 | .setRowTypeInfo(typeInfo) 31 | .finish() 32 | 33 | val t_order: DataStream[Row] = env.createInput(jdbcInputFormat) 34 | t_order.print() 35 | 36 | env.execute("ReadFromInputFormat") 37 | 38 | // t_order.addSink() //flink-jdbc 的 sinkFunction 都是 非 public的,不可用的,里面的 sinkFunction 是在 tableSource 中使用的 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/JDBC/WriteToMysqlByJDBCOutputformat.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.JDBC 2 | 3 | import com.yyb.flink10.DataStream.data.WordCountData 4 | import org.apache.flink.api.java.io.jdbc.{JDBCAppendTableSink, JDBCOutputFormat, JDBCSinkFunction} 5 | import org.apache.flink.api.scala.DataSet 6 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 7 | import org.apache.flink.types.Row 8 | 9 | /** 10 | * @Author yyb 11 | * @Description 注意在 DataStream 的模式下 会出现 多条聚合数据 12 | * @Date Create in 2020-04-26 13 | * @Time 13:50 14 | */ 15 | object WriteToMysqlByJDBCOutputformat { 16 | def main(args: Array[String]): Unit = { 17 | val env = StreamExecutionEnvironment.getExecutionEnvironment 18 | 19 | val text: DataStream[String] = env.fromElements(WordCountData.WORDS: _*) 20 | val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase.split("\\W+")) 21 | .filter(_.nonEmpty) 22 | .map((_, 1)) 23 | .keyBy(0) 24 | .sum(1) 25 | 26 | val mysqlOutput: JDBCOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat() 27 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 28 | .setDrivername("com.mysql.jdbc.Driver") 29 | .setUsername("root") 30 | .setPassword("111111") 31 | .setQuery("insert into wordcount (word, count) values(?, ?)") //注意这里是 mysql 的插入语句 32 | .setSqlTypes(Array(java.sql.Types.VARCHAR, java.sql.Types.INTEGER)) //这里是每行数据的 类型 33 | .finish() 34 | 35 | // val jdbcSink = new JDBCSinkFunction(mysqlOutput) //注意这个 类不能这样实用化,因为它不是 public class 36 | 37 | 38 | val countRecord: DataStream[Row] = counts.map(x => Row.of(x._1, x._2.asInstanceOf[Integer])) 39 | 40 | 41 | countRecord.writeUsingOutputFormat(mysqlOutput) 42 | 43 | 44 | env.execute("WriteToMysqlByJDBCOutputformat") 45 | 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfParquet.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink 2 | 3 | import com.yyb.flink10.sink.ParquetWriterSink 4 | import org.apache.avro.reflect.ReflectData 5 | import org.apache.flink.api.common.serialization.SimpleStringEncoder 6 | import org.apache.flink.api.java.utils.ParameterTool 7 | import org.apache.flink.core.fs.Path 8 | import org.apache.flink.formats.parquet.ParquetWriterFactory 9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters 10 | import org.apache.flink.streaming.api.CheckpointingMode 11 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 12 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 13 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner 14 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy 15 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 16 | import org.apache.flink.streaming.api.scala._ 17 | import org.apache.parquet.hadoop.metadata.CompressionCodecName 18 | 19 | /** 20 | * @Author yyb 21 | * @Description 22 | * @Date Create in 2020-04-18 23 | * @Time 17:40 24 | */ 25 | object WordCountFileSourceStreamFileSinkOfParquet { 26 | def main(args: Array[String]): Unit = { 27 | System.setProperty("HADOOP_USER_NAME", "yyb") 28 | val env = StreamExecutionEnvironment.getExecutionEnvironment 29 | val params = ParameterTool.fromArgs(args) 30 | 31 | env.getConfig.setGlobalJobParameters(params) 32 | 33 | env.enableCheckpointing(20) 34 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) 35 | 36 | val fileOutputCofig = OutputFileConfig 37 | .builder() 38 | .withPartSuffix(".parquet") 39 | .build() 40 | 41 | val fileSourcePath = "./data/data.txt" 42 | val fileSinkPath = "./xxx.text/rs2" 43 | val fileSinkPath1 = "./xxx.text/rs3" 44 | val fileSinkPath2 = "file:///Users/yyb/ScalaSource/flink10_learn/xxx.text/rs4" 45 | 46 | val wc = env.readTextFile(fileSourcePath) 47 | .flatMap(_.split("\\W+")) 48 | .filter(_.nonEmpty) 49 | .map(WC(_, 1)) 50 | .keyBy(0) 51 | .sum(1) 52 | 53 | val simleSink = StreamingFileSink.forRowFormat(new Path(fileSinkPath), new SimpleStringEncoder[WC]()) 54 | .build() 55 | 56 | // 注意在使用这种方式的 parquet sink 与 一次运行的 dataStream 配合的时候,一般会出现 parquet文件没有写完整的问题 57 | val parquetSink: StreamingFileSink[WC] = StreamingFileSink.forBulkFormat(new Path(fileSinkPath), 58 | ParquetAvroWriters.forReflectRecord(classOf[WC])) 59 | // .withNewBucketAssigner(new BasePathBucketAssigner()) 60 | // .withOutputFileConfig(fileOutputCofig) 61 | // .withBucketCheckInterval(10) 62 | .withRollingPolicy(OnCheckpointRollingPolicy.build()) 63 | .build() 64 | 65 | wc.print() 66 | 67 | // wc.addSink(parquetSink).setParallelism(1) 68 | 69 | val parquetSinkmy = new ParquetWriterSink[WC](fileSinkPath2, 70 | ReflectData.get.getSchema(classOf[WC]).toString, 71 | CompressionCodecName.UNCOMPRESSED) 72 | 73 | wc.addSink(parquetSinkmy) 74 | 75 | // wc.addSink(txtSink).setParallelism(1) 76 | 77 | 78 | env.execute("WordCountFileSourceStreamFileSinkOfParquet") 79 | } 80 | 81 | case class WC(word:String, ct:Int) 82 | } 83 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfParquetToHDFS.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringEncoder 4 | import org.apache.flink.api.java.utils.ParameterTool 5 | import org.apache.flink.core.fs.Path 6 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters 7 | import org.apache.flink.streaming.api.CheckpointingMode 8 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy 9 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 10 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _} 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-04-18 16 | * @Time 17:40 17 | */ 18 | object WordCountFileSourceStreamFileSinkOfParquetToHDFS { 19 | def main(args: Array[String]): Unit = { 20 | System.setProperty("HADOOP_USER_NAME", "root") 21 | val env = StreamExecutionEnvironment.getExecutionEnvironment 22 | val params = ParameterTool.fromArgs(args) 23 | 24 | env.getConfig.setGlobalJobParameters(params) 25 | 26 | env.enableCheckpointing(20) 27 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) 28 | 29 | val fileOutputCofig = OutputFileConfig 30 | .builder() 31 | .withPartSuffix(".parquet") 32 | .build() 33 | 34 | val fileSourcePath = "/Users/yyb/Downloads/1.txt" 35 | val fileSinkPath = "hdfs://ns1/user/yyb/parquet" 36 | 37 | val wc = env.readTextFile(fileSourcePath) 38 | .flatMap(_.split("\\W+")) 39 | .filter(_.nonEmpty) 40 | .map(WC(_, 1)) 41 | .keyBy(0) 42 | .sum(1) 43 | 44 | val simleSink = StreamingFileSink.forRowFormat(new Path(fileSinkPath), new SimpleStringEncoder[WC]()) 45 | .build() 46 | 47 | val parquetSink = StreamingFileSink.forBulkFormat(new Path(fileSinkPath), 48 | ParquetAvroWriters.forReflectRecord(classOf[WC])) 49 | // .withNewBucketAssigner(new BasePathBucketAssigner()) 50 | // .withOutputFileConfig(fileOutputCofig) 51 | // .withBucketCheckInterval(10) 52 | .withRollingPolicy(OnCheckpointRollingPolicy.build()) 53 | .build() 54 | 55 | // wc.print() 56 | 57 | wc.addSink(parquetSink).setParallelism(1) 58 | 59 | 60 | env.execute("WordCountFileSourceStreamFileSinkOfParquet") 61 | } 62 | 63 | case class WC(word:String, ct:Int) 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/BulkEncodedSink/WordCountFileSourceStreamFileSinkOfSequence.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink 2 | 3 | import com.yyb.flink10.DataStream.sink.StreamingFileSink.BulkEncodedSink.WordCountFileSourceStreamFileSinkOfParquet.WC 4 | import org.apache.flink.api.java.tuple 5 | import org.apache.hadoop.conf.Configuration 6 | import org.apache.flink.api.java.utils.ParameterTool 7 | import org.apache.flink.configuration.GlobalConfiguration 8 | import org.apache.flink.core.fs.Path 9 | import org.apache.flink.formats.sequencefile.SequenceFileWriterFactory 10 | import org.apache.flink.runtime.util.HadoopUtils 11 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 12 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 13 | import org.apache.flink.streaming.api.scala._ 14 | import org.apache.hadoop.io.{LongWritable, Text} 15 | 16 | /** 17 | * @Author yyb 18 | * @Description 19 | * @Date Create in 2020-04-18 20 | * @Time 18:25 21 | */ 22 | object WordCountFileSourceStreamFileSinkOfSequence { 23 | def main(args: Array[String]): Unit = { 24 | val env = StreamExecutionEnvironment.getExecutionEnvironment 25 | 26 | env.generateSequence(1, 100) 27 | 28 | val params = ParameterTool.fromArgs(args) 29 | 30 | env.getConfig.setGlobalJobParameters(params) 31 | 32 | val fileOutputCofig = OutputFileConfig 33 | .builder() 34 | .withPartSuffix(".sequence") 35 | .build() 36 | 37 | val fileSinkPath = "./xxx.text/rs3" 38 | 39 | val wc: DataStream[tuple.Tuple2[LongWritable, Text]] = env.generateSequence(1, 100).map(x => new tuple.Tuple2(new LongWritable(x), new Text(x.toString))) 40 | 41 | 42 | 43 | 44 | /** 45 | * 这里的 LongWritable 和 Text 都是 org.apache.hadoop.io 下的包, 46 | * 是在 hadoop-common 依赖中的,你可以直接 依赖这个包, 47 | * 也可以 依赖 hadoop-client 这个包, hadoop-client 这个包里面 有 hadoop-common 这个依赖。 48 | */ 49 | val hadoopConf: Configuration = HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()) 50 | val parquetSink: StreamingFileSink[tuple.Tuple2[LongWritable, Text]] = StreamingFileSink.forBulkFormat(new Path(fileSinkPath), 51 | new SequenceFileWriterFactory(hadoopConf, classOf[LongWritable], classOf[Text])) 52 | // .withNewBucketAssigner(new BasePathBucketAssigner()) 53 | .withOutputFileConfig(fileOutputCofig) 54 | .build() 55 | 56 | wc.addSink(parquetSink).setParallelism(1) 57 | 58 | env.execute("WordCountFileSourceStreamFileSinkOfParquet") 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountElementsSourceStreamFileSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink 2 | 3 | import com.yyb.flink10.DataStream.data.WordCountData 4 | import org.apache.flink.api.common.serialization.SimpleStringEncoder 5 | import org.apache.flink.api.java.utils.ParameterTool 6 | import org.apache.flink.core.fs.Path 7 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner 8 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 10 | import org.apache.flink.streaming.api.scala._ 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-04-15 16 | * @Time 21:45 17 | */ 18 | object WordCountElementsSourceStreamFileSink { 19 | def main(args: Array[String]): Unit = { 20 | val env = StreamExecutionEnvironment.getExecutionEnvironment 21 | val params = ParameterTool.fromArgs(args) 22 | 23 | env.getConfig.setGlobalJobParameters(params) 24 | 25 | val config = OutputFileConfig 26 | .builder() 27 | .withPartPrefix("wordcount") 28 | .withPartSuffix("exe") 29 | .build() 30 | 31 | val text: DataStream[String] = 32 | if(params.has("--input")){ 33 | env.readTextFile(params.get("--input")) 34 | }else{ 35 | println("Executing WordCount example with default inputs data set.") 36 | println("Use --input to specify file input.") 37 | // get default test text data 38 | env.fromElements(WordCountData.WORDS: _*) 39 | } 40 | 41 | val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase.split("\\W+")) 42 | .filter(_.nonEmpty) 43 | .map((_, 1)) 44 | .keyBy(0) 45 | .sum(1) 46 | 47 | if(params.has("--output")){ 48 | counts.writeAsText(params.get("--output")) //方法已经被 压制,推荐使用下面的 sink 方法 49 | //注意这里的 范型 要和 counts 的范型 一致 50 | /** 51 | * 注意这里有一个 scala 的 多个 .with。。。 build的bug,可以使用 java 版本编写,或者提高 flink的版本 52 | */ 53 | val filesink: StreamingFileSink[(String, Int)] = StreamingFileSink 54 | .forRowFormat(new Path(params.get("--output")), new SimpleStringEncoder[(String, Int)]("UTF-8")) 55 | // .withRollingPolicy( 56 | // DefaultRollingPolicy.builder() 57 | // .withRolloverInterval(TimeUnit.MINUTES.toMillis(15)) 58 | // .withInactivityInterval(TimeUnit.MINUTES.toMillis(5)) 59 | // .withMaxPartSize(1024 * 1024 * 1024) 60 | // .build()) 61 | // .withBucketAssigner(new DateTimeBucketAssigner()) //这种方式,即以时间格式 yyyy-MM-dd--HH,可以自己修改 以时间格式 和 时区 62 | 63 | .withBucketAssigner(new BasePathBucketAssigner[(String, Int)]) //这种方式就是 不会指定 子文件的命名方式。 64 | // .withOutputFileConfig(config) // 设置输出文件的 前后缀 65 | .build() 66 | 67 | counts.addSink(filesink) 68 | 69 | }else{ 70 | println("Printing result to stdout. Use --output to specify output path.") 71 | counts.print() 72 | } 73 | 74 | env.execute("StreamWordCount") 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountElementsSourceStreamFileSinkJava.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink; 2 | 3 | import com.yyb.flink10.DataStream.data.WordCountData; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringEncoder; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.core.fs.Path; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; 13 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 14 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner; 15 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.DefaultRollingPolicy; 16 | import org.apache.flink.util.Collector; 17 | 18 | import java.util.concurrent.TimeUnit; 19 | 20 | /** 21 | * @Author yyb 22 | * @Description 23 | * @Date Create in 2020-04-17 24 | * @Time 17:20 25 | */ 26 | public class WordCountElementsSourceStreamFileSinkJava { 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | ParameterTool params = ParameterTool.fromArgs(args); 30 | env.getConfig().setGlobalJobParameters(params); 31 | 32 | OutputFileConfig config = OutputFileConfig 33 | .builder() 34 | .withPartPrefix("wordcount") 35 | .withPartSuffix("exe") 36 | .build(); 37 | 38 | DataStream text = null; 39 | if(params.has("--input")){ 40 | text = env.readTextFile(params.get("--input")); 41 | }else{ 42 | System.out.println("Executing WordCount example with default inputs data set."); 43 | System.out.println("Use --input to specify file input."); 44 | // get default test text data 45 | text = env.fromElements(WordCountData.WORDS); 46 | } 47 | 48 | DataStream> counts = text.flatMap(new myFlatMap()); 49 | SingleOutputStreamOperator> rs = counts.keyBy(0).sum(1); 50 | 51 | StreamingFileSink streamingFileSink = StreamingFileSink.forRowFormat(new Path("./xxx.text/rs4"), 52 | new SimpleStringEncoder>("utf-8")) 53 | .withRollingPolicy(DefaultRollingPolicy.builder() 54 | .withRolloverInterval(TimeUnit.MINUTES.toMillis(15)) 55 | .withInactivityInterval(TimeUnit.MINUTES.toMillis(5)) 56 | .withMaxPartSize(1024 * 1024 * 1024) 57 | .build()) 58 | .withBucketAssigner(new BasePathBucketAssigner>() ) 59 | .withOutputFileConfig(config) 60 | .build(); 61 | 62 | rs.addSink(streamingFileSink); 63 | 64 | rs.setParallelism(1).print(); 65 | 66 | env.execute("xxx"); 67 | 68 | } 69 | public static final class myFlatMap implements FlatMapFunction>{ 70 | 71 | @Override 72 | public void flatMap(String s, Collector> collector) throws Exception { 73 | String[] tokens = s.toLowerCase().split("\\W+"); 74 | for(String token : tokens){ 75 | if(token.length() >0 ){ 76 | collector.collect(new Tuple2<>(token, 1)); 77 | } 78 | } 79 | } 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/DataStream/sink/StreamingFileSink/RowEncodedSink/WordCountFileSourceStreamFileSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.DataStream.sink.StreamingFileSink.RowEncodedSink 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringEncoder 4 | import org.apache.flink.api.java.utils.ParameterTool 5 | import org.apache.flink.core.fs.Path 6 | import org.apache.flink.streaming.api.functions.sink.filesystem.{OutputFileConfig, StreamingFileSink} 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 8 | import org.apache.flink.streaming.api.scala._ 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-04-18 14 | * @Time 17:17 15 | */ 16 | object WordCountFileSourceStreamFileSink { 17 | def main(args: Array[String]): Unit = { 18 | val env = StreamExecutionEnvironment.getExecutionEnvironment 19 | val params = ParameterTool.fromArgs(args) 20 | 21 | env.getConfig.setGlobalJobParameters(params) 22 | 23 | 24 | val fileSourcePath = "/Users/yyb/Downloads/1.txt" 25 | val fileSinkPath = "./xxx.text/rs1" 26 | 27 | val wc: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 28 | .flatMap(_.toLowerCase.split("\\W+")) 29 | .filter(_.nonEmpty) 30 | .map((_, 1)) 31 | .keyBy(0) 32 | .sum(1) 33 | 34 | val outputFileConfig = OutputFileConfig 35 | .builder() 36 | .withPartPrefix("filesource") 37 | .withPartSuffix(".finksink") 38 | .build() 39 | 40 | val fileSink: StreamingFileSink[(String, Int)] = StreamingFileSink.forRowFormat(new Path(fileSinkPath), 41 | new SimpleStringEncoder[(String, Int)]("UTF-8")) 42 | .withOutputFileConfig(outputFileConfig) 43 | .build() 44 | 45 | // wc.addSink(fileSink) 46 | wc.addSink(fileSink).setParallelism(1) //这样减少输出文件的个数,但是生产环境不建议使用,会影响性能 47 | 48 | env.execute("WordCountFileSourceStreamFileSink") 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/OutputFormat/KafkaOutputFormat.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.OutputFormat; 2 | 3 | import org.apache.flink.api.common.io.RichOutputFormat; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord; 6 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer; 7 | 8 | import java.io.IOException; 9 | import java.util.Properties; 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-07-29 15 | * @Time 17:45 16 | */ 17 | public class KafkaOutputFormat extends RichOutputFormat { 18 | private Properties properties; 19 | 20 | private FlinkKafkaProducer flinkKafkaProducer; 21 | public KafkaOutputFormat(Properties properties){ 22 | this.properties = properties; 23 | } 24 | 25 | 26 | @Override 27 | public void configure(Configuration parameters) { 28 | 29 | } 30 | 31 | @Override 32 | public void open(int taskNumber, int numTasks) throws IOException { 33 | flinkKafkaProducer = new FlinkKafkaProducer(properties); 34 | } 35 | 36 | @Override 37 | public void writeRecord(String record) throws IOException { 38 | ProducerRecord recordP = new ProducerRecord(this.properties.getProperty("topic"), record); 39 | flinkKafkaProducer.send(recordP); 40 | } 41 | 42 | @Override 43 | public void close() throws IOException { 44 | flinkKafkaProducer.flush(); 45 | flinkKafkaProducer.close(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/Current1.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-03 7 | * @Time 09:25 8 | */ 9 | public class Current1 { 10 | private String rowtime; 11 | private int amount; 12 | private String currency; 13 | public Current1(){ 14 | 15 | } 16 | 17 | public Current1(String rowtime, int amount, String currency) { 18 | this.rowtime = rowtime; 19 | this.amount = amount; 20 | this.currency = currency; 21 | } 22 | 23 | public String getRowtime() { 24 | return rowtime; 25 | } 26 | 27 | public void setRowtime(String rowtime) { 28 | this.rowtime = rowtime; 29 | } 30 | 31 | public int getAmount() { 32 | return amount; 33 | } 34 | 35 | public void setAmount(int amount) { 36 | this.amount = amount; 37 | } 38 | 39 | public String getCurrency() { 40 | return currency; 41 | } 42 | 43 | public void setCurrency(String currency) { 44 | this.currency = currency; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return "Current1{" + 50 | "rowtime='" + rowtime + '\'' + 51 | ", amount=" + amount + 52 | ", currency='" + currency + '\'' + 53 | '}'; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/Current2.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-03 7 | * @Time 09:25 8 | */ 9 | public class Current2 { 10 | private String rowtime; 11 | private int amount; 12 | private String currency; 13 | private Long eventTime; 14 | public Current2(){ 15 | 16 | } 17 | 18 | public Current2(String rowtime, int amount, String currency, Long eventTime) { 19 | this.rowtime = rowtime; 20 | this.amount = amount; 21 | this.currency = currency; 22 | this.eventTime = eventTime; 23 | } 24 | 25 | public String getRowtime() { 26 | return rowtime; 27 | } 28 | 29 | public void setRowtime(String rowtime) { 30 | this.rowtime = rowtime; 31 | } 32 | 33 | public int getAmount() { 34 | return amount; 35 | } 36 | 37 | public void setAmount(int amount) { 38 | this.amount = amount; 39 | } 40 | 41 | public String getCurrency() { 42 | return currency; 43 | } 44 | 45 | public void setCurrency(String currency) { 46 | this.currency = currency; 47 | } 48 | 49 | public Long getEventTime() { 50 | return eventTime; 51 | } 52 | 53 | public void setEventTime(Long eventTime) { 54 | this.eventTime = eventTime; 55 | } 56 | 57 | @Override 58 | public String toString() { 59 | return "Current2{" + 60 | "rowtime='" + rowtime + '\'' + 61 | ", amount=" + amount + 62 | ", currency='" + currency + '\'' + 63 | ", eventTime=" + eventTime + 64 | '}'; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/Pi.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-06-10 7 | * @Time 11:38 8 | */ 9 | public class Pi{ 10 | private String id; 11 | private String time; 12 | 13 | public String getId() { 14 | return id; 15 | } 16 | 17 | public void setId(String id) { 18 | this.id = id; 19 | } 20 | 21 | public String getTime() { 22 | return time; 23 | } 24 | 25 | public void setTime(String time) { 26 | this.time = time; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/ProductInfo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-19 7 | * @Time 09:17 8 | */ 9 | public class ProductInfo { 10 | private String productID; 11 | private String productName; 12 | private String productCategory; 13 | private String updatedAt; 14 | private Long updatedAtTimestamp; 15 | 16 | public ProductInfo() { 17 | } 18 | 19 | public ProductInfo(String productID, String productName, String productCategory, String updatedAt, Long updatedAtTimestamp) { 20 | this.productID = productID; 21 | this.productName = productName; 22 | this.productCategory = productCategory; 23 | this.updatedAt = updatedAt; 24 | this.updatedAtTimestamp = updatedAtTimestamp; 25 | } 26 | 27 | public String getProductID() { 28 | return productID; 29 | } 30 | 31 | public void setProductID(String productID) { 32 | this.productID = productID; 33 | } 34 | 35 | public String getProductName() { 36 | return productName; 37 | } 38 | 39 | public void setProductName(String productName) { 40 | this.productName = productName; 41 | } 42 | 43 | public String getProductCategory() { 44 | return productCategory; 45 | } 46 | 47 | public void setProductCategory(String productCategory) { 48 | this.productCategory = productCategory; 49 | } 50 | 51 | public String getUpdatedAt() { 52 | return updatedAt; 53 | } 54 | 55 | public void setUpdatedAt(String updatedAt) { 56 | this.updatedAt = updatedAt; 57 | } 58 | 59 | public Long getUpdatedAtTimestamp() { 60 | return updatedAtTimestamp; 61 | } 62 | 63 | public void setUpdatedAtTimestamp(Long updatedAtTimestamp) { 64 | this.updatedAtTimestamp = updatedAtTimestamp; 65 | } 66 | 67 | @Override 68 | public String toString() { 69 | return "ProductInfo{" + 70 | "productID='" + productID + '\'' + 71 | ", productName='" + productName + '\'' + 72 | ", productCategory='" + productCategory + '\'' + 73 | ", updatedAt='" + updatedAt + '\'' + 74 | ", updatedAtTimestamp=" + updatedAtTimestamp + 75 | '}'; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/Rate.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-06 7 | * @Time 17:56 8 | */ 9 | public class Rate { 10 | private String rowtime; 11 | private String currency; 12 | private int rate; 13 | 14 | public Rate() { 15 | } 16 | 17 | public Rate(String rowtime, String currency, Integer rate) { 18 | this.rowtime = rowtime; 19 | this.currency = currency; 20 | this.rate = rate; 21 | } 22 | 23 | public String getRowtime() { 24 | return rowtime; 25 | } 26 | 27 | public void setRowtime(String rowtime) { 28 | this.rowtime = rowtime; 29 | } 30 | 31 | public String getCurrency() { 32 | return currency; 33 | } 34 | 35 | public void setCurrency(String currency) { 36 | this.currency = currency; 37 | } 38 | 39 | public int getRate() { 40 | return rate; 41 | } 42 | 43 | public void setRate(int rate) { 44 | this.rate = rate; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return "Rate{" + 50 | "rowtime='" + rowtime + '\'' + 51 | ", currency='" + currency + '\'' + 52 | ", rate=" + rate + 53 | '}'; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/Rate2.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-06 7 | * @Time 17:56 8 | */ 9 | public class Rate2 { 10 | private String rowtime; 11 | private String currency; 12 | private int rate; 13 | private Long eventTime; 14 | 15 | public Rate2() { 16 | } 17 | 18 | public Rate2(String rowtime, String currency, int rate, Long eventTime) { 19 | this.rowtime = rowtime; 20 | this.currency = currency; 21 | this.rate = rate; 22 | this.eventTime = eventTime; 23 | } 24 | 25 | public String getRowtime() { 26 | return rowtime; 27 | } 28 | 29 | public void setRowtime(String rowtime) { 30 | this.rowtime = rowtime; 31 | } 32 | 33 | public String getCurrency() { 34 | return currency; 35 | } 36 | 37 | public void setCurrency(String currency) { 38 | this.currency = currency; 39 | } 40 | 41 | public int getRate() { 42 | return rate; 43 | } 44 | 45 | public void setRate(int rate) { 46 | this.rate = rate; 47 | } 48 | 49 | public Long getEventTime() { 50 | return eventTime; 51 | } 52 | 53 | public void setEventTime(Long eventTime) { 54 | this.eventTime = eventTime; 55 | } 56 | 57 | @Override 58 | public String toString() { 59 | return "Rate2{" + 60 | "rowtime='" + rowtime + '\'' + 61 | ", currency='" + currency + '\'' + 62 | ", rate=" + rate + 63 | ", eventTime=" + eventTime + 64 | '}'; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/commonEntity/UserBrowseLog.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.commonEntity; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-19 7 | * @Time 09:05 8 | */ 9 | public class UserBrowseLog { 10 | private String userID; 11 | private String eventTime; 12 | private String eventType; 13 | private String productID; 14 | private Integer productPrice; 15 | private Long eventTimeTimestamp; 16 | 17 | public UserBrowseLog() { 18 | } 19 | 20 | public UserBrowseLog(String userID, String eventTime, String eventType, String productID, Integer productPrice, Long eventTimeTimestamp) { 21 | this.userID = userID; 22 | this.eventTime = eventTime; 23 | this.eventType = eventType; 24 | this.productID = productID; 25 | this.productPrice = productPrice; 26 | this.eventTimeTimestamp = eventTimeTimestamp; 27 | } 28 | 29 | public String getUserID() { 30 | return userID; 31 | } 32 | 33 | public void setUserID(String userID) { 34 | this.userID = userID; 35 | } 36 | 37 | public String getEventTime() { 38 | return eventTime; 39 | } 40 | 41 | public void setEventTime(String eventTime) { 42 | this.eventTime = eventTime; 43 | } 44 | 45 | public String getEventType() { 46 | return eventType; 47 | } 48 | 49 | public void setEventType(String eventType) { 50 | this.eventType = eventType; 51 | } 52 | 53 | public String getProductID() { 54 | return productID; 55 | } 56 | 57 | public void setProductID(String productID) { 58 | this.productID = productID; 59 | } 60 | 61 | public Integer getProductPrice() { 62 | return productPrice; 63 | } 64 | 65 | public void setProductPrice(Integer productPrice) { 66 | this.productPrice = productPrice; 67 | } 68 | 69 | public Long getEventTimeTimestamp() { 70 | return eventTimeTimestamp; 71 | } 72 | 73 | public void setEventTimeTimestamp(Long eventTimeTimestamp) { 74 | this.eventTimeTimestamp = eventTimeTimestamp; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | return "UserBrowseLog{" + 80 | "userID='" + userID + '\'' + 81 | ", eventTime='" + eventTime + '\'' + 82 | ", eventType='" + eventType + '\'' + 83 | ", productID='" + productID + '\'' + 84 | ", productPrice=" + productPrice + 85 | ", eventTimeTimestamp=" + eventTimeTimestamp + 86 | '}'; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/sink/KafkaBatchTableSink.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.sink; 2 | 3 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.java.DataSet; 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo; 7 | import org.apache.flink.table.sinks.BatchTableSink; 8 | import org.apache.flink.table.sinks.TableSink; 9 | import org.apache.flink.types.Row; 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-07-30 15 | * @Time 13:36 16 | */ 17 | public class KafkaBatchTableSink implements BatchTableSink { 18 | 19 | private final KafkaOutputFormat kafkaOutputFormat; 20 | private String[] fieldNames = new String[]{"value"}; 21 | private TypeInformation[] fieldTypes = new TypeInformation[]{TypeInformation.of(String.class)}; 22 | 23 | public KafkaBatchTableSink(KafkaOutputFormat kafkaOutputFormat){ 24 | this.kafkaOutputFormat = kafkaOutputFormat; 25 | } 26 | 27 | @Override 28 | public void emitDataSet(DataSet dataSet) { 29 | dataSet.output(kafkaOutputFormat); 30 | } 31 | 32 | @Override 33 | public TableSink configure(String[] fieldNames, TypeInformation[] fieldTypes) { 34 | return null; 35 | } 36 | 37 | @Override 38 | public String[] getFieldNames() { 39 | return fieldNames; 40 | } 41 | 42 | @Override 43 | public TypeInformation[] getFieldTypes() { 44 | return fieldTypes; 45 | } 46 | 47 | @Override 48 | public TypeInformation getOutputType() { 49 | return TypeInformation.of(String.class); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/sink/ParquetSinkFunction.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.sink 2 | 3 | import org.apache.flink.api.common.functions.RuntimeContext 4 | import org.apache.flink.configuration.Configuration 5 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} 6 | import org.apache.parquet.hadoop.ParquetWriter 7 | import org.apache.parquet.hadoop.metadata.CompressionCodecName 8 | 9 | /** 10 | * @Author yyb 11 | * @Description 12 | * @Date Create in 2020-04-25 13 | * @Time 21:47 14 | */ 15 | class ParquetSinkFunction[IN](val path: String, val schema: String, val compressionCodecName: CompressionCodecName) extends RichSinkFunction[IN]{ 16 | var parquetWriter: ParquetWriter[IN] = null 17 | 18 | override def close(): Unit = { 19 | parquetWriter.close() 20 | } 21 | 22 | override def invoke(value: IN, context: SinkFunction.Context[_]): Unit = { 23 | parquetWriter.write(value) 24 | } 25 | 26 | override def open(parameters: Configuration): Unit = { 27 | super.open(parameters) 28 | val ctx = getRuntimeContext 29 | parquetWriter 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/sink/ParquetWriterSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.sink 2 | 3 | import org.apache.avro.Schema 4 | import org.apache.avro.reflect.ReflectData 5 | import org.apache.flink.configuration.Configuration 6 | import org.apache.hadoop.fs.Path 7 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} 8 | import org.apache.parquet.avro.AvroParquetWriter 9 | import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter} 10 | import org.apache.parquet.hadoop.metadata.CompressionCodecName 11 | 12 | class ParquetWriterSink[IN](val path: String, val schema: String, val compressionCodecName: CompressionCodecName) extends RichSinkFunction[IN] { 13 | var parquetWriter: ParquetWriter[IN] = null 14 | 15 | override def open(parameters: Configuration): Unit = { 16 | parquetWriter = AvroParquetWriter.builder[IN](new Path(path)) 17 | .withSchema(new Schema.Parser().parse(schema)) 18 | .withCompressionCodec(compressionCodecName) 19 | // .withPageSize(config.pageSize) 20 | // .withRowGroupSize(config.blockSize) 21 | // .withDictionaryEncoding(config.enableDictionary) 22 | .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) 23 | // .withValidation(config.validating) 24 | .withDataModel(ReflectData.get) 25 | .build() 26 | } 27 | 28 | override def close(): Unit = { 29 | parquetWriter.close() 30 | } 31 | 32 | override def invoke(value: IN, context: SinkFunction.Context[_]): Unit = { 33 | parquetWriter.write(value) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/BatchQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch 2 | 3 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment} 4 | import org.apache.flink.table.sources.CsvTableSource 5 | import org.apache.flink.table.types.AtomicDataType 6 | import org.apache.flink.table.types.logical.{IntType, VarCharType} 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 注意 Blink 11 | * @Date Create in 2020-04-18 12 | * @Time 21:05 13 | */ 14 | object BatchQuery { 15 | def main(args: Array[String]): Unit = { 16 | val bbSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build() 17 | val bbTableEnv: TableEnvironment = TableEnvironment.create(bbSettings) 18 | 19 | 20 | 21 | val sourceFIlePath = "/Users/yyb/Dwonloads/1_rs.csv" 22 | 23 | val stringField = new AtomicDataType(new VarCharType(50)) 24 | val intField = new AtomicDataType(new IntType) 25 | val csvTableSource: CsvTableSource = CsvTableSource.builder() 26 | .path(sourceFIlePath) 27 | .field("word", stringField) 28 | // .field("word", Types.STRING) //方法已压制 29 | .field("count", intField) 30 | // .field("count", Types.INT) //方法已压制 31 | .build() 32 | 33 | val input: Table = bbTableEnv.fromTableSource(csvTableSource) 34 | 35 | 36 | 37 | 38 | bbTableEnv.createTemporaryView("wordcount", input) 39 | 40 | bbTableEnv.sqlQuery("select * from wordcount").printSchema() 41 | 42 | 43 | bbTableEnv.execute("BatchQuery") 44 | 45 | 46 | } 47 | 48 | case class WORDCOUNT(word:String, count:Int) 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/BlinkHiveBatchDemo.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch 2 | 3 | import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment} 4 | import org.apache.flink.table.catalog.hive.HiveCatalog 5 | 6 | /** 7 | * @Author yyb 8 | * @Description 9 | * @Date Create in 2020-04-20 10 | * @Time 13:33 11 | */ 12 | object BlinkHiveBatchDemo { 13 | def main(args: Array[String]): Unit = { 14 | val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build() 15 | val bbTableEnv = TableEnvironment.create(settings) 16 | 17 | val name = "myhive" 18 | val defaultDatabase = "flink" 19 | // val hiveConfDir = "src/main/resources/" //hive-site.xml的本地目录 ,注意 当有 hive-site.xml 在 resources 下的时候 ,hiveConfDir 也需要设置,否则会提示 20 | //Required table missing : "DBS" in Catalog "" Schema "". DataNucleus requires this table to perform its persistence operations. Either your MetaData is incorrect, or you need to enable "datanucleus.schema.autoCreateTables" 21 | val hiveConfDir = this.getClass.getResource("/").getFile //可以通过这一种方式设置 hiveConfDir,这样的话,开发与测试和生产环境可以保持一致 22 | 23 | val version = "2.3.6" 24 | val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version) 25 | 26 | bbTableEnv.registerCatalog("myhive", hive) 27 | bbTableEnv.useCatalog("myhive") 28 | 29 | //注意 查询语句 myhive.flink.a myhive是你的Hcatalog的别称,flink是库名称,a是别名称 30 | bbTableEnv.sqlQuery("select * from myhive.flink.a").printSchema() 31 | 32 | 33 | // bbTableEnv.execute("BlinkHiveBatchDemo") 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/JDBC/BlinkBatchReadFromJDBCTableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch.JDBC 2 | 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource} 4 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment, TableSchema} 5 | import org.apache.flink.table.types.AtomicDataType 6 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType} 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-04-27 12 | * @Time 11:09 13 | */ 14 | object BlinkBatchReadFromJDBCTableSource { 15 | def main(args: Array[String]): Unit = { 16 | val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build() 17 | val blinkBatchTableEnv = TableEnvironment.create(settings) 18 | 19 | val lookOption = JDBCLookupOptions.builder() 20 | .setCacheExpireMs(60*1000) 21 | .setCacheMaxSize(1024*1024) 22 | .setMaxRetryTimes(10) 23 | .build() 24 | 25 | val jdbcOpition = JDBCOptions.builder() 26 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 27 | .setDriverName("com.mysql.jdbc.Driver") 28 | .setUsername("root") 29 | .setPassword("111111") 30 | .setTableName("t_order") 31 | .build() 32 | 33 | val jdbcReadOption = JDBCReadOptions.builder() 34 | .setFetchSize(5000) 35 | .build() 36 | 37 | val tableSchema = TableSchema.builder() 38 | .field("id", new AtomicDataType(new IntType)) 39 | .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 40 | .field("time", new AtomicDataType(new DateType)) 41 | .build() 42 | 43 | val jdbcTableSource: JDBCTableSource = JDBCTableSource.builder() 44 | .setLookupOptions(lookOption) 45 | .setOptions(jdbcOpition) 46 | .setReadOptions(jdbcReadOption) 47 | .setSchema(tableSchema) 48 | .build() 49 | 50 | 51 | val t_order: Table = blinkBatchTableEnv.fromTableSource(jdbcTableSource) 52 | 53 | blinkBatchTableEnv.createTemporaryView("t_order", t_order) 54 | 55 | blinkBatchTableEnv.sqlQuery("select * from t_order").printSchema() 56 | 57 | 58 | 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/JDBC/BlinkBatchWriteToJDBCTableSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch.JDBC 2 | 3 | 4 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo 5 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableEnvironment} 7 | import org.apache.flink.table.sources.CsvTableSource 8 | import org.apache.flink.table.types.AtomicDataType 9 | import org.apache.flink.table.types.logical.{IntType, VarCharType} 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-04-27 15 | * @Time 13:19 16 | */ 17 | object BlinkBatchWriteToJDBCTableSink { 18 | def main(args: Array[String]): Unit = { 19 | //blink env 20 | val settings = EnvironmentSettings.newInstance().inBatchMode().useBlinkPlanner().build() 21 | val blinkBatchTableEnv = TableEnvironment.create(settings) 22 | 23 | val sourceFIlePath = "/Users/yyb/Downloads/1_rs.csv" 24 | 25 | val stringField = new AtomicDataType(new VarCharType(50)) 26 | val intField = new AtomicDataType(new IntType) 27 | val csvTableSource: CsvTableSource = CsvTableSource.builder() 28 | .path(sourceFIlePath) 29 | .field("word", stringField) 30 | // .field("word", Types.STRING) //方法已压制 31 | .field("count", intField) 32 | // .field("count", Types.INT) //方法已压制 33 | .build() 34 | 35 | val word: Table = blinkBatchTableEnv.fromTableSource(csvTableSource) 36 | 37 | 38 | val jdbcAppendTableSink = JDBCAppendTableSink.builder() 39 | .setBatchSize(5000) 40 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 41 | .setDrivername("com.mysql.jdbc.Driver") 42 | .setUsername("root") 43 | .setPassword("111111") 44 | .setQuery("insert into wordcount (word, count) values(?, ?)") 45 | .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER) 46 | .build() 47 | 48 | 49 | blinkBatchTableEnv.createTemporaryView("word", word) 50 | 51 | val sql = 52 | s""" 53 | |select * from word 54 | """.stripMargin 55 | blinkBatchTableEnv.sqlQuery(sql).printSchema() 56 | 57 | blinkBatchTableEnv.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink) 58 | 59 | blinkBatchTableEnv.sqlQuery(sql).insertInto("word1") //注意 这样 把数据 倒入到 sink 中去 60 | 61 | 62 | blinkBatchTableEnv.execute("BlinkBatchWriteToJDBCTableSink") 63 | 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/hive/Fromkafka2HiveUseCatalog.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch.hive; 2 | 3 | import com.yyb.flink10.commonEntity.Pi; 4 | import com.yyb.flink10.table.blink.stream.hive.WriteData2HiveJavaReadFromkafkaTableSource; 5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.typeutils.RowTypeInfo; 8 | import org.apache.flink.formats.json.JsonRowDeserializationSchema; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource; 12 | import org.apache.flink.streaming.connectors.kafka.config.StartupMode; 13 | import org.apache.flink.table.api.*; 14 | import org.apache.flink.table.api.java.StreamTableEnvironment; 15 | import org.apache.flink.table.catalog.ObjectPath; 16 | import org.apache.flink.table.catalog.hive.HiveCatalog; 17 | import org.apache.flink.table.descriptors.Schema; 18 | 19 | import java.util.Collections; 20 | import java.util.List; 21 | import java.util.Optional; 22 | import java.util.Properties; 23 | 24 | /** 25 | * @Author yyb 26 | * @Description 经过多次尝试,目前 flink 不支持 table insert hive table 27 | * @Date Create in 2020-07-07 28 | * @Time 16:22 29 | */ 30 | public class Fromkafka2HiveUseCatalog { 31 | public static void main(String[] args) throws Exception { 32 | // System.setProperty("HADOOP_USER_NAME", "center"); 33 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build(); 34 | TableEnvironment tableEnv = TableEnvironment.create(settings); 35 | 36 | String name = "myhive"; 37 | String defaultDatabase = "test"; 38 | String hiveConfDir = WriteData2HiveJavaReadFromkafkaTableSource.class.getResource("/").getFile(); //可以通过这一种方式设置 hiveConfDir,这样的话,开发与测试和生产环境可以保持一致 39 | String version = "2.1.1"; 40 | HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version); 41 | 42 | tableEnv.registerCatalog("myhive", hive); 43 | tableEnv.useCatalog("myhive"); 44 | 45 | /** 46 | * kafka start 47 | */ 48 | Schema schema = new Schema(); 49 | TableSchema tableSchema = TableSchema.builder() 50 | .field("id", DataTypes.STRING()) 51 | .field("time", DataTypes.STRING()) 52 | .build(); 53 | schema.schema(tableSchema); 54 | Properties prop = new Properties(); 55 | prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181"); 56 | prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092"); 57 | prop.put("group.id", "yyb_dev"); 58 | 59 | TypeInformation[] types = new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}; 60 | String[] fields = new String[]{"id", "time"}; 61 | RowTypeInfo rowTypeINfo = new RowTypeInfo(types, fields); 62 | JsonRowDeserializationSchema jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(rowTypeINfo).build(); 63 | // Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema); 64 | //指定 从 kafka 的 earliest 开始消费 65 | Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, Optional.empty(), Collections.emptyList(), Optional.empty(),"eventsource_yhj", prop, jsonRowDeserializationSchema 66 | , StartupMode.EARLIEST, Collections.emptyMap()); 67 | 68 | Table kafkaTable = tableEnv.fromTableSource(kafka); 69 | 70 | tableEnv.createTemporaryView("kafkaTable", kafkaTable); 71 | 72 | /** 73 | * kafka end 74 | */ 75 | 76 | 77 | List dbs = hive.listDatabases(); 78 | for(String db : dbs){ 79 | System.out.println(db); 80 | } 81 | 82 | System.out.println("------------------"); 83 | 84 | List tbs = hive.listTables("test"); 85 | for(String tb : tbs){ 86 | System.out.println(tb); 87 | } 88 | 89 | boolean xx = hive.tableExists(new ObjectPath("test", "a")); 90 | System.out.println(xx + " cvb--------------"); 91 | Table sink = tableEnv.from("test.a"); 92 | sink.printSchema(); 93 | 94 | 95 | // tableEnv.insertInto("test.a", kafkaTable); 96 | // kafkaTable.insertInto("test.a"); 97 | 98 | String sql = "insert into test.a partition(dt=20200305) select * from kafkaTable"; 99 | tableEnv.sqlUpdate(sql); 100 | 101 | tableEnv.execute("Fromkafka2HiveUseCatalog"); 102 | 103 | 104 | 105 | 106 | 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/batch/kafka/WriteJsonDataByKafkaConnector.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.batch.kafka; 2 | 3 | import org.apache.flink.table.api.DataTypes; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.TableEnvironment; 6 | import org.apache.flink.table.api.TableSchema; 7 | import org.apache.flink.table.descriptors.ConnectTableDescriptor; 8 | import org.apache.flink.table.descriptors.Json; 9 | import org.apache.flink.table.descriptors.Kafka; 10 | import org.apache.flink.table.descriptors.Schema; 11 | 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | 15 | /** 16 | * @Author yyb 17 | * @Description 18 | * @Date Create in 2020-07-27 19 | * @Time 19:22 20 | */ 21 | public class WriteJsonDataByKafkaConnector { 22 | public static void main(String[] args){ 23 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build(); 24 | TableEnvironment blinkBatchTableEnv = TableEnvironment.create(settings); 25 | 26 | Kafka kafka = new Kafka(); 27 | kafka.version("0.11") 28 | .topic("eventsource_yhj") 29 | .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 30 | .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 31 | .property("group.id", "yyb_dev") 32 | .startFromEarliest(); 33 | 34 | Schema schema = new Schema(); 35 | TableSchema tableSchema = TableSchema.builder() 36 | .field("id", DataTypes.STRING()) 37 | .field("time", DataTypes.STRING()) 38 | .build(); 39 | schema.schema(tableSchema); 40 | ConnectTableDescriptor tableSink = blinkBatchTableEnv.connect(kafka) 41 | .withFormat(new Json().failOnMissingField(true)) 42 | .withSchema(schema); 43 | 44 | 45 | tableSink.createTemporaryTable("kafka_sink"); 46 | 47 | 48 | 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/FileSystem/ReadFromKafkaConnectorWriteToLocalParquetFilePiJava.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.FileSystem; 2 | 3 | import com.yyb.flink10.commonEntity.Pi; 4 | import org.apache.avro.JsonProperties; 5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 6 | import org.apache.flink.api.java.typeutils.TupleTypeInfo; 7 | import org.apache.flink.core.fs.Path; 8 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters; 9 | import org.apache.flink.streaming.api.CheckpointingMode; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 13 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; 14 | import org.apache.flink.table.api.DataTypes; 15 | import org.apache.flink.table.api.EnvironmentSettings; 16 | import org.apache.flink.table.api.Table; 17 | import org.apache.flink.table.api.TableSchema; 18 | import org.apache.flink.table.api.java.StreamTableEnvironment; 19 | import org.apache.flink.table.descriptors.*; 20 | 21 | import java.util.ArrayList; 22 | 23 | /** 24 | * @Author yyb 25 | * @Description 26 | * @Date Create in 2020-06-16 27 | * @Time 10:24 28 | */ 29 | public class ReadFromKafkaConnectorWriteToLocalParquetFilePiJava { 30 | public static void main(String[] args) throws Exception { 31 | EnvironmentSettings setttings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build(); 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, setttings); 34 | 35 | env.enableCheckpointing(20); 36 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 37 | 38 | Kafka kafka = new Kafka(); 39 | kafka.version("0.11") 40 | .topic("eventsource_yhj") 41 | .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 42 | .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 43 | .property("group.id", "yyb_dev") 44 | .startFromEarliest(); 45 | 46 | Schema schema = new Schema(); 47 | TableSchema tableSchema = TableSchema.builder() 48 | .field("id", DataTypes.STRING()) 49 | .field("time", DataTypes.STRING()) 50 | .build(); 51 | schema.schema(tableSchema); 52 | ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka) 53 | .withFormat(new Json().failOnMissingField(true)) 54 | .withSchema(schema); 55 | tableSource.createTemporaryTable("test"); 56 | String sql = "select * from test"; 57 | 58 | Table test = flinkTableEnv.from("test"); 59 | test.printSchema(); 60 | 61 | 62 | //transfor 2 dataStream 63 | DataStream testDataStream = flinkTableEnv.toAppendStream(test, Pi.class); //使用 Class 的方式 64 | 65 | String fileSinkPath = "./xxx.text/rs6/"; 66 | 67 | StreamingFileSink parquetSink = StreamingFileSink. 68 | forBulkFormat(new Path(fileSinkPath), 69 | ParquetAvroWriters.forReflectRecord(Pi.class)) 70 | .withRollingPolicy(OnCheckpointRollingPolicy.build()) 71 | .build(); 72 | 73 | testDataStream.addSink(parquetSink).setParallelism(1); 74 | 75 | flinkTableEnv.execute("ReadFromKafkaConnectorWriteToLocalFileJava"); 76 | 77 | 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/FileSystem/ReadFromKafkaConnectorWriteToLocalTextFileJava.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.FileSystem; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.DataTypes; 5 | import org.apache.flink.table.api.EnvironmentSettings; 6 | import org.apache.flink.table.api.Table; 7 | import org.apache.flink.table.api.TableSchema; 8 | import org.apache.flink.table.api.java.StreamTableEnvironment; 9 | import org.apache.flink.table.descriptors.*; 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-06-16 15 | * @Time 10:24 16 | */ 17 | public class ReadFromKafkaConnectorWriteToLocalTextFileJava { 18 | public static void main(String[] args) throws Exception { 19 | EnvironmentSettings setttings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build(); 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, setttings); 22 | 23 | Kafka kafka = new Kafka(); 24 | kafka.version("0.11") 25 | .topic("eventsource_yhj") 26 | .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 27 | .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 28 | .property("group.id", "yyb_dev") 29 | .startFromEarliest(); 30 | 31 | Schema schema = new Schema(); 32 | TableSchema tableSchema = TableSchema.builder() 33 | .field("id", DataTypes.STRING()) 34 | .field("time", DataTypes.STRING()) 35 | .build(); 36 | schema.schema(tableSchema); 37 | ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka) 38 | .withFormat(new Json().failOnMissingField(true)) 39 | .withSchema(schema); 40 | tableSource.createTemporaryTable("test"); 41 | String sql = "select * from test"; 42 | 43 | Table test = flinkTableEnv.from("test"); 44 | test.printSchema(); 45 | 46 | 47 | String path = "./xxx.text/rs5/"; 48 | FileSystem localFIle = new FileSystem(); 49 | localFIle.path(path); 50 | 51 | ConnectTableDescriptor tableSink = flinkTableEnv.connect(localFIle) 52 | .withFormat(new OldCsv()) 53 | .withSchema(schema); 54 | 55 | tableSink.createTemporaryTable("test_sink"); 56 | 57 | flinkTableEnv.insertInto(test, "test_sink"); 58 | 59 | flinkTableEnv.execute("ReadFromKafkaConnectorWriteToLocalFileJava"); 60 | 61 | 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/ReadDataFromJDBCTableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.JDBC 2 | 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource} 4 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableSchema} 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment 7 | import org.apache.flink.table.types.AtomicDataType 8 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType} 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-04-30 14 | * @Time 10:01 15 | */ 16 | object ReadDataFromJDBCTableSource { 17 | def main(args: Array[String]): Unit = { 18 | val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | val blinkStreamTable = StreamTableEnvironment.create(env, settings) 21 | 22 | val fileSourcePath = "/Users/yyb/Downloads/1.txt" 23 | 24 | val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 25 | .flatMap(_.split("\\W+")) 26 | .filter(_.nonEmpty) 27 | .map((_, 1)) 28 | .keyBy(0) 29 | .sum(1) 30 | 31 | 32 | val table: Table = blinkStreamTable.fromDataStream(wcStream) 33 | 34 | blinkStreamTable.createTemporaryView("wd", table) 35 | 36 | var sql = 37 | """ 38 | |select * from wd 39 | """.stripMargin 40 | 41 | blinkStreamTable.sqlQuery(sql).printSchema() 42 | 43 | val lookOption = JDBCLookupOptions.builder() 44 | .setCacheExpireMs(60*1000) 45 | .setCacheMaxSize(1024*1024) 46 | .setMaxRetryTimes(10) 47 | .build() 48 | 49 | val jdbcOpition = JDBCOptions.builder() 50 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 51 | .setDriverName("com.mysql.jdbc.Driver") 52 | .setUsername("root") 53 | .setPassword("111111") 54 | .setTableName("t_order") 55 | .build() 56 | 57 | val jdbcReadOption = JDBCReadOptions.builder() 58 | .setFetchSize(5000) 59 | .build() 60 | 61 | val tableSchema = TableSchema.builder() 62 | .field("id", new AtomicDataType(new IntType)) 63 | .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 64 | .field("time", new AtomicDataType(new DateType)) 65 | .build() 66 | 67 | val jdbcTableSource: JDBCTableSource = JDBCTableSource.builder() 68 | .setLookupOptions(lookOption) 69 | .setOptions(jdbcOpition) 70 | .setReadOptions(jdbcReadOption) 71 | .setSchema(tableSchema) 72 | .build() 73 | 74 | blinkStreamTable.registerTableSource("mysql_t_order", jdbcTableSource) 75 | 76 | blinkStreamTable.sqlQuery("select * from mysql_t_order") 77 | 78 | 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/WriteDataByJDBCTableSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo 4 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table} 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment 8 | 9 | /** 10 | * @Author yyb 11 | * @Description 12 | * @Date Create in 2020-04-30 13 | * @Time 10:25 14 | */ 15 | object WriteDataByJDBCTableSink { 16 | def main(args: Array[String]): Unit = { 17 | val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() 18 | val env = StreamExecutionEnvironment.getExecutionEnvironment 19 | val blinkStreamTable = StreamTableEnvironment.create(env, settings) 20 | 21 | val fileSourcePath = "./data/data.txt" 22 | 23 | val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 24 | .flatMap(_.split("\\W+")) 25 | .filter(_.nonEmpty) 26 | .map((_, 1)) 27 | .keyBy(0) 28 | .sum(1) 29 | 30 | 31 | val table: Table = blinkStreamTable.fromDataStream(wcStream) 32 | 33 | blinkStreamTable.createTemporaryView("wd", table) 34 | 35 | var sql = 36 | """ 37 | |select * from wd 38 | """.stripMargin 39 | 40 | blinkStreamTable.sqlQuery(sql).printSchema() 41 | 42 | val jdbcAppendTableSink = JDBCAppendTableSink.builder() 43 | .setBatchSize(5000) 44 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 45 | .setDrivername("com.mysql.jdbc.Driver") 46 | .setUsername("root") 47 | .setPassword("111111") 48 | .setQuery("insert into wordcount (word, count) values(?, ?)") 49 | .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER) 50 | .build() 51 | 52 | blinkStreamTable.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink) 53 | 54 | table.insertInto("word1") 55 | 56 | blinkStreamTable.execute("WriteDataByJDBCTableSink") 57 | 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/JDBC/WriteDataByJDBCTableUpsertSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.JDBC 2 | 3 | import org.apache.flink.api.java.io.jdbc.{JDBCAppendTableSink, JDBCOptions, JDBCUpsertTableSink} 4 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 5 | import org.apache.flink.table.api.scala.StreamTableEnvironment 6 | import org.apache.flink.table.api.{EnvironmentSettings, Table, TableSchema} 7 | import org.apache.flink.table.types.AtomicDataType 8 | import org.apache.flink.table.types.logical.{BigIntType, DateType, IntType, VarCharType} 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-04-30 14 | * @Time 10:25 15 | */ 16 | object WriteDataByJDBCTableUpsertSink { 17 | def main(args: Array[String]): Unit = { 18 | val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | val blinkStreamTable = StreamTableEnvironment.create(env, settings) 21 | 22 | val fileSourcePath = "./data/data.txt" 23 | 24 | val wcStream: DataStream[WordCount] = env.readTextFile(fileSourcePath) 25 | .flatMap(_.split("\\W+")) 26 | .filter(_.nonEmpty) 27 | .map((_, 1)) 28 | .keyBy(0) 29 | .sum(1) 30 | .map(x => WordCount(x._1, x._2)) 31 | 32 | 33 | val table: Table = blinkStreamTable.fromDataStream(wcStream) 34 | 35 | 36 | blinkStreamTable.createTemporaryView("wd", table) 37 | 38 | var sql = 39 | """ 40 | |select * from wd 41 | """.stripMargin 42 | blinkStreamTable.sqlQuery(sql).printSchema() 43 | sql = 44 | s""" 45 | |select word , count(`count`) from wd group by word 46 | |""".stripMargin 47 | 48 | 49 | 50 | 51 | 52 | val jdbcOpition = JDBCOptions.builder() 53 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 54 | .setDriverName("com.mysql.jdbc.Driver") 55 | .setUsername("root") 56 | .setPassword("111111") 57 | .setTableName("wordcount") 58 | .build() 59 | 60 | val tableSchema = TableSchema.builder() 61 | .field("word", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 62 | .field("count", new AtomicDataType(new BigIntType())) 63 | .build() 64 | 65 | val jdbcUpsertTableSink = JDBCUpsertTableSink.builder() 66 | .setOptions(jdbcOpition) 67 | .setFlushIntervalMills(1000) 68 | .setFlushMaxSize(1024*1024*12) 69 | .setTableSchema(tableSchema) 70 | .build() 71 | jdbcUpsertTableSink.setKeyFields(Array("word")) 72 | jdbcUpsertTableSink.setIsAppendOnly(false) 73 | // blinkStreamTable.registerTableSink("word1", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcUpsertTableSink) 74 | blinkStreamTable.registerTableSink("word1", jdbcUpsertTableSink) 75 | 76 | 77 | //注意这里, 这里的 sql 需要的是 有 聚合操作的,如果没有的话, 那么 结果表里面就会出现多条记录,因为 jdbcUpsertTableSink 里面的 IsAppendOnly,KeyFields 是 flink 执行计划推断出来的 78 | blinkStreamTable.sqlQuery(sql).insertInto("word1") 79 | 80 | 81 | blinkStreamTable.execute("WriteDataByJDBCTableUpsertSink") 82 | 83 | } 84 | 85 | case class WordCount(word:String, count:Int) 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/StreamQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream 2 | 3 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table} 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-04-18 12 | * @Time 21:05 13 | */ 14 | object StreamQuery { 15 | def main(args: Array[String]): Unit = { 16 | val blinkStreamSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() 17 | val streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 18 | 19 | val streamTableEnv: StreamTableEnvironment = StreamTableEnvironment.create(streamExecutionEnvironment, blinkStreamSettings) 20 | 21 | val fileSourcePath = "/Users/yyb/Downloads/1.txt" 22 | 23 | val wcStream: DataStream[(String, Int)] = streamExecutionEnvironment.readTextFile(fileSourcePath) 24 | .flatMap(_.split("\\W+")) 25 | .filter(_.nonEmpty) 26 | .map((_, 1)) 27 | .keyBy(0) 28 | .sum(1) 29 | 30 | 31 | val table: Table = streamTableEnv.fromDataStream(wcStream) 32 | 33 | streamTableEnv.createTemporaryView("wd", table) 34 | 35 | var sql = 36 | """ 37 | |select * from wd 38 | """.stripMargin 39 | 40 | streamTableEnv.sqlQuery(sql).printSchema() 41 | 42 | val dataStream: DataStream[WD] = streamTableEnv.toAppendStream[WD](table) 43 | 44 | dataStream.print() 45 | 46 | streamTableEnv.execute("StreamQuery") 47 | } 48 | 49 | case class WD(word:String, count:Int) 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/hive/WriteData2HiveJavaReadFromkafkaTableSource.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.hive; 2 | 3 | import com.yyb.flink10.commonEntity.Pi; 4 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo; 7 | import org.apache.flink.formats.json.JsonRowDeserializationSchema; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource; 11 | import org.apache.flink.streaming.connectors.kafka.config.StartupMode; 12 | import org.apache.flink.table.api.DataTypes; 13 | import org.apache.flink.table.api.EnvironmentSettings; 14 | import org.apache.flink.table.api.Table; 15 | import org.apache.flink.table.api.TableSchema; 16 | import org.apache.flink.table.api.java.StreamTableEnvironment; 17 | import org.apache.flink.table.catalog.hive.HiveCatalog; 18 | import org.apache.flink.table.descriptors.Schema; 19 | 20 | import java.util.Collections; 21 | import java.util.Optional; 22 | import java.util.Properties; 23 | 24 | 25 | /** 26 | * @Author yyb 27 | * @Description 28 | * @Date Create in 2020-07-07 29 | * @Time 14:28 30 | */ 31 | public class WriteData2HiveJavaReadFromkafkaTableSource { 32 | public static void main(String[] args) throws Exception { 33 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 35 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings); 36 | Schema schema = new Schema(); 37 | TableSchema tableSchema = TableSchema.builder() 38 | .field("id", DataTypes.STRING()) 39 | .field("time", DataTypes.STRING()) 40 | .build(); 41 | schema.schema(tableSchema); 42 | Properties prop = new Properties(); 43 | prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181"); 44 | prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092"); 45 | prop.put("group.id", "yyb_dev1"); 46 | 47 | TypeInformation[] types = new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}; 48 | String[] fields = new String[]{"id", "time"}; 49 | RowTypeInfo rowTypeINfo = new RowTypeInfo(types, fields); 50 | JsonRowDeserializationSchema jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(rowTypeINfo).build(); 51 | // Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema); 52 | //指定 从 kafka 的 earliest 开始消费 53 | Kafka010TableSource kafka = new Kafka010TableSource(tableSchema, Optional.empty(), Collections.emptyList(), Optional.empty(),"eventsource_yhj", prop, jsonRowDeserializationSchema 54 | , StartupMode.EARLIEST, Collections.emptyMap()); 55 | 56 | Table kafkaSource = tableEnv.fromTableSource(kafka); 57 | 58 | tableEnv.createTemporaryView("default_catalog.kafkaSource", kafkaSource); 59 | 60 | String sql ="select * from default_catalog.kafkaSource"; 61 | tableEnv.sqlQuery(sql).printSchema(); 62 | 63 | String name = "myhive"; 64 | String defaultDatabase = "test"; 65 | String hiveConfDir = WriteData2HiveJavaReadFromkafkaTableSource.class.getResource("/").getFile(); //可以通过这一种方式设置 hiveConfDir,这样的话,开发与测试和生产环境可以保持一致 66 | 67 | // val version = "2.3.6" 68 | String version = "1.1.0"; 69 | HiveCatalog hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version); 70 | 71 | tableEnv.registerCatalog("myhive", hive); 72 | tableEnv.useCatalog("myhive"); 73 | 74 | sql = "insert into myhive.test.a select * from default_catalog.kafkaSource"; 75 | tableEnv.sqlUpdate(sql); 76 | 77 | DataStream kafkaSourceDataStream = tableEnv.toAppendStream(kafkaSource, Pi.class); 78 | kafkaSourceDataStream.print().setParallelism(1); 79 | tableEnv.execute("WriteData2Hive"); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/hive/WriteData2HiveReadFromkafkaTableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.hive 2 | 3 | import java.util.Properties 4 | 5 | import com.yyb.flink10.commonEntity.Pi 6 | import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation} 7 | import org.apache.flink.api.java.typeutils.RowTypeInfo 8 | import org.apache.flink.formats.json.JsonRowDeserializationSchema 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 10 | import org.apache.flink.streaming.api.scala._ 11 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource 12 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema} 13 | import org.apache.flink.table.api.scala.StreamTableEnvironment 14 | import org.apache.flink.table.catalog.hive.HiveCatalog 15 | import org.apache.flink.table.descriptors.Schema 16 | 17 | /** 18 | * @Author yyb 19 | * @Description 注意使用 java 版本的代码 20 | * 本代码 在 new RowTypeInfo 的时候报错 21 | * @Date Create in 2020-07-07 22 | * @Time 10:26 23 | */ 24 | object WriteData2HiveReadFromkafkaTableSource { 25 | def main(args: Array[String]): Unit = { 26 | val settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build() 27 | val env = StreamExecutionEnvironment.getExecutionEnvironment 28 | val tableEnv = StreamTableEnvironment.create(env, settings) 29 | 30 | val schema = new Schema() 31 | val tableSchema: TableSchema = TableSchema.builder() 32 | .field("id", DataTypes.STRING()) 33 | .field("time", DataTypes.STRING()) 34 | .build() 35 | schema.schema(tableSchema) 36 | val prop = new Properties() 37 | prop.put("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 38 | prop.put("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 39 | prop.put("group.id", "yyb_dev") 40 | 41 | val types: Array[BasicTypeInfo[String]] = Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO) 42 | val fields: Array[String] = Array("id", "time") 43 | // val rowTypeINfo = new RowTypeInfo(types, fields) 44 | val jsonRowDeserializationSchema = new JsonRowDeserializationSchema.Builder(schema.toString).build() 45 | 46 | val kafka = new Kafka010TableSource(tableSchema, "eventsource_yhj", prop, jsonRowDeserializationSchema) 47 | 48 | val kafkaSource: Table = tableEnv.fromTableSource(kafka) 49 | tableEnv.createTemporaryView("kafkaSource", kafkaSource) 50 | 51 | 52 | val name = "myhive" 53 | val defaultDatabase = "flink" 54 | val hiveConfDir = this.getClass.getResource("/").getFile //可以通过这一种方式设置 hiveConfDir,这样的话,开发与测试和生产环境可以保持一致 55 | 56 | // val version = "2.3.6" 57 | val version = "1.1.0" 58 | val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir, version) 59 | 60 | tableEnv.registerCatalog("myhive", hive) 61 | tableEnv.useCatalog("myhive") 62 | 63 | var sql = 64 | s""" 65 | |insert into table myhive.${defaultDatabase}.a select * from kafkaSource 66 | |""".stripMargin 67 | tableEnv.sqlUpdate(sql) 68 | 69 | val kafkaSourceDataStream: DataStream[Pi] = tableEnv.toAppendStream[Pi](kafkaSource) 70 | kafkaSourceDataStream.print().setParallelism(1) 71 | 72 | tableEnv.execute("WriteData2Hive") 73 | 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/join/temporaltable/TemporalTableDemo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.join.temporaltable; 2 | 3 | import org.apache.flink.api.java.io.jdbc.JDBCLookupOptions; 4 | import org.apache.flink.api.java.io.jdbc.JDBCOptions; 5 | import org.apache.flink.api.java.io.jdbc.JDBCReadOptions; 6 | import org.apache.flink.api.java.io.jdbc.JDBCTableSource; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.table.api.EnvironmentSettings; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.TableSchema; 12 | import org.apache.flink.table.api.java.StreamTableEnvironment; 13 | import org.apache.flink.table.types.AtomicDataType; 14 | import org.apache.flink.table.types.logical.DateType; 15 | import org.apache.flink.table.types.logical.IntType; 16 | import org.apache.flink.table.types.logical.VarCharType; 17 | import org.apache.flink.types.Row; 18 | 19 | /** 20 | * @Author yyb 21 | * @Description 22 | * @Date Create in 2020-07-27 23 | * @Time 16:14 24 | */ 25 | public class TemporalTableDemo { 26 | public static void main(String[] args) throws Exception { 27 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 30 | 31 | JDBCLookupOptions lookOption = JDBCLookupOptions.builder() 32 | .setCacheExpireMs(60 * 1000) 33 | .setCacheMaxSize(1024 * 1024) 34 | .setMaxRetryTimes(10) 35 | .build(); 36 | 37 | JDBCOptions jdbcOpition = JDBCOptions.builder() 38 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 39 | .setDriverName("com.mysql.jdbc.Driver") 40 | .setUsername("root") 41 | .setPassword("111111") 42 | .setTableName("RatesHistory") 43 | .build(); 44 | 45 | JDBCReadOptions jdbcReadOption = JDBCReadOptions.builder() 46 | .setFetchSize(5000) 47 | .build(); 48 | 49 | TableSchema tableSchema = TableSchema.builder() 50 | .field("rowtime", new AtomicDataType(new VarCharType(2147483647))) 51 | .field("currency", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 52 | .field("rate", new AtomicDataType(new IntType())) 53 | .build(); 54 | 55 | JDBCTableSource jdbcTableSource = JDBCTableSource.builder() 56 | .setLookupOptions(lookOption) 57 | .setOptions(jdbcOpition) 58 | .setReadOptions(jdbcReadOption) 59 | .setSchema(tableSchema) 60 | .build(); 61 | 62 | blinkTableEnv.registerTableSource("LatestRates", jdbcTableSource); 63 | 64 | blinkTableEnv.registerFunction("jdbcLookup", jdbcTableSource.getLookupFunction(new String[]{"currency"})); 65 | 66 | // 注意不能直接 访问 时态表的数据 67 | String sql = "SELECT * FROM LatestRates FOR SYSTEM_TIME AS OF Timestamp '2020-07-27 16:30:15'"; 68 | sql = "select * from LatestRates"; 69 | Table rs1 = blinkTableEnv.sqlQuery(sql); 70 | DataStream rs1DataStream = blinkTableEnv.toAppendStream(rs1, Row.class); 71 | rs1DataStream.print().setParallelism(1); 72 | 73 | blinkTableEnv.execute("TemporalTableDemo"); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/join/temporaltable/TemporalTableFunction.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.join.temporaltable; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.EnvironmentSettings; 9 | import org.apache.flink.table.api.Table; 10 | import org.apache.flink.table.api.java.StreamTableEnvironment; 11 | import org.apache.flink.types.Row; 12 | 13 | import java.util.ArrayList; 14 | import java.util.List; 15 | 16 | /** 17 | * @Author yyb 18 | * @Description 时态表 函数 19 | * @Date Create in 2020-07-27 20 | * @Time 15:44 21 | */ 22 | public class TemporalTableFunction { 23 | public static void main(String[] args) throws Exception { 24 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 27 | List> ratesHistoryData = new ArrayList<>(); 28 | ratesHistoryData.add(Tuple2.of("US Dollar", 102L)); 29 | ratesHistoryData.add(Tuple2.of("Euro", 114L)); 30 | ratesHistoryData.add(Tuple2.of("Yen", 1L)); 31 | ratesHistoryData.add(Tuple2.of("Euro", 116L)); 32 | ratesHistoryData.add(Tuple2.of("Euro", 119L)); 33 | 34 | DataStreamSource> ratesHistoryStream = env.fromCollection(ratesHistoryData); 35 | //加入 processtime 属性 36 | Table ratesHistory = blinkTableEnv.fromDataStream(ratesHistoryStream, "r_currency, r_rate, r_proctime.proctime"); 37 | blinkTableEnv.createTemporaryView("RatesHistory", ratesHistory); 38 | 39 | //创建 Temporal Table Function 40 | org.apache.flink.table.functions.TemporalTableFunction rates = ratesHistory.createTemporalTableFunction("r_proctime", "r_currency"); 41 | blinkTableEnv.registerFunction("Rates", rates); 42 | 43 | // 注意不能直接 访问 时态表函数的数据 44 | String sql = "SELECT * FROM RatesHistory FOR SYSTEM_TIME AS OF TIME '16:01:15'"; 45 | 46 | Table rs1 = blinkTableEnv.sqlQuery(sql); 47 | DataStream rs1DataStream = blinkTableEnv.toAppendStream(rs1, Row.class); 48 | rs1DataStream.print().setParallelism(1); 49 | 50 | blinkTableEnv.execute("TemporalTableFunction"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/EventTimeDemo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.yyb.flink10.commonEntity.Current1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.streaming.api.CheckpointingMode; 7 | import org.apache.flink.streaming.api.TimeCharacteristic; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.streaming.api.functions.ProcessFunction; 11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 14 | import org.apache.flink.table.api.EnvironmentSettings; 15 | import org.apache.flink.table.api.Table; 16 | import org.apache.flink.table.api.java.StreamTableEnvironment; 17 | import org.apache.flink.types.Row; 18 | import org.apache.flink.util.Collector; 19 | 20 | import java.io.InputStream; 21 | import java.util.Properties; 22 | 23 | /** 24 | * @Author yyb 25 | * @Description 26 | * @Date Create in 2020-08-10 27 | * @Time 18:04 28 | */ 29 | public class EventTimeDemo { 30 | public static void main(String[] args) throws Exception { 31 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 34 | 35 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 36 | env.enableCheckpointing(3000); 37 | env.getCheckpointConfig().setTolerableCheckpointFailureNumber(3); 38 | env.getCheckpointConfig().setMaxConcurrentCheckpoints(1); 39 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 40 | 41 | env.getConfig().setAutoWatermarkInterval(1000); 42 | 43 | InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties"); 44 | Properties prop = new Properties(); 45 | prop.load(in_env); 46 | 47 | 48 | 49 | Properties properties = new Properties(); 50 | properties.setProperty("bootstrap.servers", prop.getProperty("bootstrap.servers")); 51 | properties.setProperty("zookeeper.connect", prop.getProperty("zookeeper.connect")); 52 | properties.setProperty("group.id", "test"); 53 | 54 | FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011("eventsource_yyb", new SimpleStringSchema(), properties); 55 | DataStream stream = env.addSource(kafkaSource); 56 | 57 | 58 | DataStream currentDS = stream.process(new ProcessFunction() { 59 | @Override 60 | public void processElement(String value, Context ctx, Collector out) throws Exception { 61 | Current1 current1 = JSON.parseObject(value, Current1.class); 62 | out.collect(current1); 63 | } 64 | }); 65 | 66 | currentDS.assignTimestampsAndWatermarks(new TimestampExtractor(Time.seconds(0))); 67 | 68 | currentDS.print().setParallelism(1); 69 | 70 | // sql rowtime 71 | //注意 第一个 rowtime 是自己的 rowtime,user_action_time.rowtime才是 真正的 eventTime 72 | Table t = blinkTableEnv.fromDataStream(currentDS, "rowtime,amount,currency,user_action_time.rowtime"); 73 | 74 | DataStream tRow = blinkTableEnv.toAppendStream(t, Row.class); 75 | tRow.print().setParallelism(1); 76 | env.execute("EventTimeDemo"); 77 | 78 | 79 | } 80 | 81 | static class TimestampExtractor extends BoundedOutOfOrdernessTimestampExtractor { 82 | 83 | public TimestampExtractor(Time maxOutOfOrderness){ 84 | super(maxOutOfOrderness); 85 | } 86 | @Override 87 | public long extractTimestamp(Current1 element) { 88 | return Long.parseLong(element.getRowtime()); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaConnector.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka 2 | 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema} 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment 7 | import org.apache.flink.table.descriptors.{ConnectTableDescriptor, Json, Kafka, Schema} 8 | 9 | /** 10 | * @Author yyb 11 | * @Description 12 | * @Date Create in 2020-06-10 13 | * @Time 09:32 14 | */ 15 | object ReadDataFromKafkaConnector { 16 | def main(args: Array[String]): Unit = { 17 | val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build() 18 | val env = StreamExecutionEnvironment.getExecutionEnvironment 19 | val flinkTableEnv = StreamTableEnvironment.create(env, settings) 20 | 21 | val kafka = new Kafka() 22 | kafka.version("0.11") 23 | .topic("eventsource_yhj") 24 | .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 25 | .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 26 | .property("group.id", "yyb_dev") 27 | .startFromEarliest() 28 | 29 | val schema = new Schema() 30 | val tableSchema = TableSchema.builder() 31 | .field("id", DataTypes.STRING()) 32 | .field("time", DataTypes.STRING()) 33 | .build() 34 | schema.schema(tableSchema) 35 | val tableSource: ConnectTableDescriptor = flinkTableEnv.connect(kafka) 36 | .withFormat( new Json().failOnMissingField(true) ) 37 | .withSchema(schema) 38 | tableSource.createTemporaryTable("test") 39 | var sql = "select * from test" 40 | 41 | val test: Table = flinkTableEnv.from("test") 42 | test.printSchema() 43 | 44 | 45 | val testDataStream: DataStream[Pi] = flinkTableEnv.toAppendStream[Pi](test) 46 | 47 | testDataStream.print().setParallelism(1) 48 | 49 | flinkTableEnv.execute("ReadDataFromKafkaConnector") 50 | 51 | 52 | } 53 | 54 | case class Pi( 55 | id:String, 56 | time:String 57 | ) 58 | } 59 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaConnectorJava.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka; 2 | 3 | 4 | import com.yyb.flink10.util1.GeneratorClassByASM; 5 | import net.sf.cglib.core.ReflectUtils; 6 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 7 | import org.apache.flink.api.java.typeutils.TupleTypeInfo; 8 | import org.apache.flink.core.fs.Path; 9 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink; 13 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; 14 | import org.apache.flink.table.api.DataTypes; 15 | import org.apache.flink.table.api.EnvironmentSettings; 16 | import org.apache.flink.table.api.Table; 17 | import org.apache.flink.table.api.TableSchema; 18 | import org.apache.flink.table.api.java.StreamTableEnvironment; 19 | import org.apache.flink.table.descriptors.ConnectTableDescriptor; 20 | import org.apache.flink.table.descriptors.Json; 21 | import org.apache.flink.table.descriptors.Kafka; 22 | import org.apache.flink.table.descriptors.Schema; 23 | import org.apache.flink.types.Row; 24 | 25 | /** 26 | * 注意 这里 涉及到了 ASM 动态产生 class 并加载的 内容,可以参考 https://blog.csdn.net/u010374412/article/details/106714721 博文 27 | * @Author yyb 28 | * @Description 29 | * @Date Create in 2020-06-10 30 | * @Time 09:32 31 | */ 32 | public class ReadDataFromKafkaConnectorJava { 33 | public static void main(String[] args) throws Exception { 34 | 35 | /** 36 | * 这里是 ASM 生产 动态 class 类,不用理会。 37 | */ 38 | String packageName = "com.yyb.flink10.xxx."; 39 | String className = "Pi"; 40 | byte[] byteOfClass = GeneratorClassByASM.geneClassMain(packageName, className); 41 | Class piCLass = ReflectUtils.defineClass(packageName + className, byteOfClass, ReadDataFromKafkaConnectorJava.class.getClassLoader()); 42 | Class xx = Class.forName(packageName + className); 43 | System.out.println(xx.newInstance()); 44 | 45 | 46 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build(); 47 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 48 | env.registerType(piCLass); 49 | 50 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, settings); 51 | 52 | 53 | Kafka kafka = new Kafka(); 54 | kafka.version("0.11") 55 | .topic("eventsource_yhj") 56 | .property("zookeeper.connect", "172.16.10.16:2181,172.16.10.17:2181,172.16.10.18:2181") 57 | .property("bootstrap.servers", "172.16.10.19:9092,172.16.10.26:9092,172.16.10.27:9092") 58 | .property("group.id", "yyb_dev") 59 | .startFromEarliest(); 60 | 61 | Schema schema = new Schema(); 62 | TableSchema tableSchema = TableSchema.builder() 63 | .field("id", DataTypes.STRING()) 64 | .field("time", DataTypes.STRING()) 65 | .build(); 66 | schema.schema(tableSchema); 67 | ConnectTableDescriptor tableSource = flinkTableEnv.connect(kafka) 68 | .withFormat(new Json().failOnMissingField(true)) 69 | .withSchema(schema); 70 | tableSource.createTemporaryTable("test"); 71 | String sql = "select * from test"; 72 | 73 | Table test = flinkTableEnv.from("test"); 74 | test.printSchema(); 75 | 76 | 77 | /** 78 | * 注意 TupleTypeInfoBase 这个 抽象类 有3个直接实现 79 | * BaseRowTypeInfo, RowTypeInfo, TupleTypeInfo 80 | * 目前这个程序 只是用了 TupleTypeInfo 这个类 81 | */ 82 | TupleTypeInfo tupleTypeInfo = new TupleTypeInfo(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO); 83 | // DataStream testDataStream = flinkTableEnv.toAppendStream(test, piCLass); //使用 Class 的方式 84 | DataStream testDataStream = flinkTableEnv.toAppendStream(test, tupleTypeInfo); //使用 TypeInformation 的方式 85 | testDataStream.print().setParallelism(1); 86 | 87 | // DataStream testDataStream1 = flinkTableEnv.toAppendStream(test, Row.class); 88 | // 89 | // String fileSinkPath = "./xxx.text/rs7/"; 90 | // StreamingFileSink sink = StreamingFileSink.forBulkFormat( 91 | // new Path(fileSinkPath), 92 | // ParquetAvroWriters.forReflectRecord(Row.class)) 93 | // .withRollingPolicy(OnCheckpointRollingPolicy.build()).build(); 94 | // testDataStream1.addSink(sink); 95 | flinkTableEnv.execute("ReadDataFromKafkaConnector"); 96 | } 97 | 98 | } 99 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka 2 | 3 | import java.util 4 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 5 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSourceSinkFactory 6 | import org.apache.flink.table.api.EnvironmentSettings 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment 8 | 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-06-09 14 | * @Time 22:21 15 | */ 16 | object ReadDataFromKafkaSource { 17 | def main(args:Array[String])={ 18 | val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build() 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | val flinkTableEnv = StreamTableEnvironment.create(env, settings) 21 | 22 | val kafkaSourceFactory = new Kafka010TableSourceSinkFactory() 23 | val proper = new util.HashMap[String, String]() 24 | val kafkaSource = kafkaSourceFactory.createStreamTableSource(proper) 25 | 26 | flinkTableEnv.registerTableSource( "kafka", kafkaSource) 27 | 28 | env.execute("") 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/ReadDataFromKafkaSourceJava.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka; 2 | 3 | import org.apache.flink.api.common.serialization.DeserializationSchema; 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource; 9 | import org.apache.flink.table.api.EnvironmentSettings; 10 | import org.apache.flink.table.api.TableSchema; 11 | import org.apache.flink.table.api.java.StreamTableEnvironment; 12 | import java.util.Properties; 13 | 14 | /** 15 | * @Author yyb 16 | * @Description 17 | * @Date Create in 2020-06-09 18 | * @Time 22:46 19 | */ 20 | public class ReadDataFromKafkaSourceJava { 21 | public static void main(String[] args){ 22 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build(); 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env, settings); 25 | BasicTypeInfo field1 = BasicTypeInfo.STRING_TYPE_INFO; 26 | BasicTypeInfo field2 = BasicTypeInfo.STRING_TYPE_INFO; 27 | TableSchema schema = new TableSchema(new String[]{"field1", "field2"}, new TypeInformation[]{BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO}); 28 | String topic = "topic"; 29 | Properties properties = new Properties(); 30 | DeserializationSchema deserializationSchema = new SimpleStringSchema(); 31 | Kafka010TableSource kafkaSource = new Kafka010TableSource(schema, topic, properties, deserializationSchema); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/WriteToKafkaByKafkaConnectorOfOrder.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka; 2 | 3 | import com.yyb.flink10.commonEntity.Current1; 4 | import com.yyb.flink10.commonEntity.Current2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.table.api.DataTypes; 8 | import org.apache.flink.table.api.EnvironmentSettings; 9 | import org.apache.flink.table.api.Table; 10 | import org.apache.flink.table.api.TableSchema; 11 | import org.apache.flink.table.api.java.StreamTableEnvironment; 12 | import org.apache.flink.table.descriptors.ConnectTableDescriptor; 13 | import org.apache.flink.table.descriptors.Json; 14 | import org.apache.flink.table.descriptors.Kafka; 15 | import org.apache.flink.table.descriptors.Schema; 16 | 17 | import java.io.IOException; 18 | import java.io.InputStream; 19 | import java.util.ArrayList; 20 | import java.util.Date; 21 | import java.util.Properties; 22 | 23 | /** 24 | * @Author yyb 25 | * @Description 注意 在 join的时候,是由 水印 触发的 (即 每当 新的水印大于 旧的水印 才会触发计算, join 的时候,由所有流中的 min 的水印决定 这个 join 的水印) 26 | * @Date Create in 2020-08-03 27 | * @Time 08:53 28 | */ 29 | public class WriteToKafkaByKafkaConnectorOfOrder { 30 | public static void main(String [] args) throws Exception { 31 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 34 | InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties"); 35 | Properties prop = new Properties(); 36 | prop.load(in_env); 37 | System.out.println(prop.getProperty("zookeeper.connect")); 38 | 39 | Kafka kafka = new Kafka(); 40 | kafka.version("0.11") 41 | .topic("eventsource_yyb_order") 42 | .property("zookeeper.connect", prop.getProperty("zookeeper.connect")) 43 | .property("bootstrap.servers", prop.getProperty("bootstrap.servers")). 44 | property("group.id", "yyb_dev") 45 | .startFromLatest(); 46 | Schema schema = new Schema(); 47 | TableSchema tableSchema1 = TableSchema.builder() 48 | .field("rowtime", DataTypes.STRING()) 49 | .field("amount", DataTypes.INT()) 50 | .field("currency", DataTypes.STRING()) 51 | .field("eventTime", DataTypes.BIGINT()) 52 | .build(); 53 | schema.schema(tableSchema1); 54 | ConnectTableDescriptor tableSource = blinkTableEnv.connect(kafka) 55 | .withFormat(new Json().failOnMissingField(true)) 56 | .withSchema(schema); 57 | tableSource.createTemporaryTable("Orders"); 58 | 59 | ArrayList data = new ArrayList(); 60 | data.add(new Current2( "2016-01-01 00:00:00",3, "Euro", 0L)); 61 | 62 | DataStreamSource dataDS = env.fromCollection(data); 63 | Table dataTable = blinkTableEnv.fromDataStream(dataDS); 64 | blinkTableEnv.registerTable("source", dataTable); 65 | 66 | String sql = "insert into Orders select rowtime, amount, currency, eventTime from source"; 67 | 68 | blinkTableEnv.sqlUpdate(sql); 69 | 70 | env.execute("WriteToKafkaByKafkaConnector"); 71 | } 72 | 73 | 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/blink/stream/kafka/WriteToKafkaByKafkaConnectorOfRates.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.blink.stream.kafka; 2 | 3 | import com.yyb.flink10.commonEntity.Current1; 4 | import com.yyb.flink10.commonEntity.Rate; 5 | import com.yyb.flink10.commonEntity.Rate2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.DataTypes; 9 | import org.apache.flink.table.api.EnvironmentSettings; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.TableSchema; 12 | import org.apache.flink.table.api.java.StreamTableEnvironment; 13 | import org.apache.flink.table.descriptors.ConnectTableDescriptor; 14 | import org.apache.flink.table.descriptors.Json; 15 | import org.apache.flink.table.descriptors.Kafka; 16 | import org.apache.flink.table.descriptors.Schema; 17 | 18 | import java.io.InputStream; 19 | import java.text.SimpleDateFormat; 20 | import java.util.ArrayList; 21 | import java.util.Date; 22 | import java.util.Properties; 23 | 24 | /** 25 | * @Author yyb 26 | * @Description 27 | * @Date Create in 2020-08-03 28 | * @Time 08:53 29 | */ 30 | public class WriteToKafkaByKafkaConnectorOfRates { 31 | public static void main(String [] args) throws Exception { 32 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 33 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | StreamTableEnvironment blinkTableEnv = StreamTableEnvironment.create(env, settings); 35 | InputStream in_env = ClassLoader.getSystemResourceAsStream("env.properties"); 36 | Properties prop = new Properties(); 37 | prop.load(in_env); 38 | System.out.println(prop.getProperty("zookeeper.connect")); 39 | 40 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 41 | 42 | Kafka kafka = new Kafka(); 43 | kafka.version("0.11") 44 | .topic("eventsource_yyb_rate") 45 | .property("zookeeper.connect", prop.getProperty("zookeeper.connect")) 46 | .property("bootstrap.servers", prop.getProperty("bootstrap.servers")). 47 | property("group.id", "yyb_dev") 48 | .startFromLatest(); 49 | 50 | Schema schema = new Schema(); 51 | TableSchema tableSchema1 = TableSchema.builder() 52 | .field("rowtime", DataTypes.STRING()) 53 | .field("currency", DataTypes.STRING()) 54 | .field("rate", DataTypes.INT()) 55 | .field("eventTime", DataTypes.BIGINT()) 56 | .build(); 57 | schema.schema(tableSchema1); 58 | ConnectTableDescriptor tableSource = blinkTableEnv.connect(kafka) 59 | .withFormat(new Json().failOnMissingField(true)) 60 | .withSchema(schema); 61 | tableSource.createTemporaryTable("Rates"); 62 | 63 | ArrayList data = new ArrayList(); 64 | data.add(new Rate2("2016-01-01 00:00:02", "Euro", 120, 0L)); 65 | 66 | DataStreamSource dataDS = env.fromCollection(data); 67 | Table dataTable = blinkTableEnv.fromDataStream(dataDS); 68 | blinkTableEnv.registerTable("source", dataTable); 69 | 70 | String sql = "insert into Rates select rowtime,currency,rate,eventTime from source"; 71 | 72 | blinkTableEnv.sqlUpdate(sql); 73 | 74 | env.execute("WriteToKafkaByKafkaConnector"); 75 | } 76 | 77 | 78 | 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/BatchQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch 2 | 3 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment} 4 | import org.apache.flink.api.scala._ 5 | import org.apache.flink.table.api.Table 6 | import org.apache.flink.table.api.scala.BatchTableEnvironment 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-04-18 12 | * @Time 21:05 13 | */ 14 | object BatchQuery { 15 | def main(args: Array[String]): Unit = { 16 | val env = ExecutionEnvironment.getExecutionEnvironment 17 | val batchTableEnv: BatchTableEnvironment = BatchTableEnvironment.create(env) 18 | 19 | val words = "hello flink hello lagou" 20 | val WDS = words.split("\\W+").map(WD(_, 1)) 21 | 22 | val input: DataSet[WD] = env.fromCollection(WDS) 23 | 24 | val table: Table = batchTableEnv.fromDataSet(input) 25 | 26 | 27 | batchTableEnv.createTemporaryView("wordcount", table) 28 | 29 | batchTableEnv.sqlQuery("select * from wordcount").printSchema() 30 | 31 | val datasetOfTable: DataSet[WD] = batchTableEnv.toDataSet[WD](table) 32 | 33 | datasetOfTable.printToErr() 34 | 35 | 36 | 37 | batchTableEnv.execute("BatchQuery") 38 | 39 | } 40 | 41 | case class WD(word:String, count:Int) 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/BatchReadFromParquetQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch 2 | 3 | import org.apache.flink.api.scala.ExecutionEnvironment 4 | import org.apache.flink.api.scala._ 5 | import org.apache.flink.formats.parquet.ParquetTableSource 6 | import org.apache.flink.table.api.Table 7 | import org.apache.flink.table.api.scala.BatchTableEnvironment 8 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName 9 | import org.apache.parquet.schema.{MessageType, PrimitiveType} 10 | import org.apache.parquet.schema.Type.Repetition 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-04-23 16 | * @Time 17:40 17 | */ 18 | object BatchReadFromParquetQuery { 19 | def main(args: Array[String]): Unit = { 20 | val env = ExecutionEnvironment.getExecutionEnvironment 21 | val batchTableEnv = BatchTableEnvironment.create(env) 22 | 23 | 24 | 25 | 26 | val word = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "word") 27 | val count = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.INT32, "count") 28 | val schema = new MessageType("word", word, count) 29 | 30 | val parquetFile = new ParquetTableSource.Builder() 31 | .path("./xxx.text/rs2/2020-04-23--21/.part-0-0.parquet.inprogress.a6a2a7cd-98bf-4397-8e7d-558b1bb932aa") 32 | .forParquetSchema(schema) 33 | .build() 34 | 35 | batchTableEnv.registerTableSource("xx", parquetFile) 36 | 37 | val sql = 38 | """ 39 | |select * from xx 40 | """.stripMargin 41 | val q1: Table = batchTableEnv.sqlQuery(sql) 42 | 43 | q1.printSchema() 44 | 45 | batchTableEnv.toDataSet[WC](q1).print() 46 | } 47 | 48 | case class WC(word:String, ct:Int) 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/BatchReadFromSequenceQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch 2 | 3 | import org.apache.flink.api.scala.ExecutionEnvironment 4 | import org.apache.flink.api.scala._ 5 | import org.apache.flink.table.api.scala.BatchTableEnvironment 6 | import org.datanucleus.store.rdbms.valuegenerator.SequenceTable 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-04-24 12 | * @Time 09:29 13 | */ 14 | object BatchReadFromSequenceQuery { 15 | def main(args: Array[String]): Unit = { 16 | val env = ExecutionEnvironment.getExecutionEnvironment 17 | val batchTableEnv = BatchTableEnvironment.create(env) 18 | 19 | // env.readFileOfPrimitives() 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/BatchJDBCReadByInputformat2TableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat 5 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo 7 | import org.apache.flink.api.scala.typeutils.Types 8 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _} 9 | import org.apache.flink.table.api.Table 10 | import org.apache.flink.table.api.scala.BatchTableEnvironment 11 | import org.apache.flink.table.descriptors.{BatchTableDescriptor, FileSystem} 12 | import org.apache.flink.table.sinks.CsvTableSink 13 | import org.apache.flink.types.Row 14 | 15 | /** 16 | * @Author yyb 17 | * @Description 18 | * @Date Create in 2020-04-21 19 | * @Time 14:15 20 | */ 21 | object BatchJDBCReadByInputformat2TableSource { 22 | def main(args: Array[String]): Unit = { 23 | val env = ExecutionEnvironment.getExecutionEnvironment 24 | val batchTableEnv = BatchTableEnvironment.create(env) 25 | 26 | val types = Array[TypeInformation[_]](Types.STRING, Types.LONG, Types.STRING) 27 | val fields = Array[String]("MT_KEY1", "MT_KEY2", "MT_COMMENT") 28 | val typeInfo = new RowTypeInfo(types, fields) 29 | 30 | 31 | val jdbc: JDBCInputFormat = new JDBCInputFormatBuilder() 32 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/hive?useSSL=false&serverTimezone=UTC") 33 | .setDrivername("com.mysql.jdbc.Driver") 34 | .setUsername("hive") 35 | .setPassword("hive") 36 | .setQuery("select * from AUX_TABLE") 37 | .setRowTypeInfo(typeInfo) 38 | .finish() 39 | val mysqlSource : DataSet[Row] = env.createInput(jdbc) 40 | 41 | mysqlSource.print() 42 | 43 | // val table: ParquetTableSource = new ParquetTableSource() 44 | // batchTableEnv.registerTableSource("table", table) 45 | 46 | val file = new FileSystem() //注意这里只是 实验性质 47 | val fs: BatchTableDescriptor = batchTableEnv.connect(file) 48 | 49 | // val jdbcTableSink = new JDBCAppendTableSink() 50 | // batchTableEnv.registerTableSink("jdbcTableSink", jdbcTableSink) 51 | 52 | 53 | val csvTableSink = new CsvTableSink("") 54 | // batchTableEnv.registerTableSink("csvTableSink", csvTableSink) 55 | 56 | val AUX_TABLE: Table = batchTableEnv.fromDataSet(mysqlSource) 57 | batchTableEnv.createTemporaryView("AUX_TABLE", AUX_TABLE) 58 | 59 | val sql = 60 | s""" 61 | |select * from AUX_TABLE 62 | """.stripMargin 63 | batchTableEnv.sqlQuery(sql).printSchema() 64 | 65 | 66 | 67 | //目前来看,只有在 有 sink的情况下,需要 加 execute 68 | // batchTableEnv.execute("ConnectJDBCBatch") 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/BatchJobReadFromJDBCTableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch.JDBC 2 | 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource} 4 | import org.apache.flink.api.scala.ExecutionEnvironment 5 | import org.apache.flink.table.api.{Table, TableSchema} 6 | import org.apache.flink.table.api.scala.BatchTableEnvironment 7 | import org.apache.flink.table.types.AtomicDataType 8 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType} 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-04-26 14 | * @Time 17:33 15 | */ 16 | object BatchJobReadFromJDBCTableSource { 17 | def main(args: Array[String]): Unit = { 18 | val env = ExecutionEnvironment.getExecutionEnvironment 19 | val batchTableEnv = BatchTableEnvironment.create(env) 20 | 21 | val lookOption = JDBCLookupOptions.builder() 22 | .setCacheExpireMs(60*1000) 23 | .setCacheMaxSize(1024*1024) 24 | .setMaxRetryTimes(10) 25 | .build() 26 | 27 | val jdbcOpition = JDBCOptions.builder() 28 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 29 | .setDriverName("com.mysql.jdbc.Driver") 30 | .setUsername("root") 31 | .setPassword("111111") 32 | .setTableName("t_order") 33 | .build() 34 | 35 | val jdbcReadOption = JDBCReadOptions.builder() 36 | .setFetchSize(5000) 37 | .build() 38 | 39 | val tableSchema = TableSchema.builder() 40 | .field("id", new AtomicDataType(new IntType)) 41 | .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 42 | .field("time", new AtomicDataType(new DateType)) 43 | .build() 44 | 45 | val jdbcTableSource: JDBCTableSource = JDBCTableSource.builder() 46 | .setLookupOptions(lookOption) 47 | .setOptions(jdbcOpition) 48 | .setReadOptions(jdbcReadOption) 49 | .setSchema(tableSchema) 50 | .build() 51 | 52 | val t_order: Table = batchTableEnv.fromTableSource(jdbcTableSource) 53 | 54 | batchTableEnv.createTemporaryView("t_order", t_order) 55 | 56 | val sql = 57 | s""" 58 | |select * from t_order 59 | """.stripMargin 60 | batchTableEnv.sqlQuery(sql).printSchema() 61 | 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/JDBC/WriteJDBCByTableSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch.JDBC 2 | 3 | import com.yyb.flink10.table.flink.batch.BatchQuery.WD 4 | import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeInformation} 5 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink 6 | import org.apache.flink.api.java.typeutils.RowTypeInfo 7 | import org.apache.flink.api.scala.typeutils.Types 8 | import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment, _} 9 | import org.apache.flink.table.api.Table 10 | import org.apache.flink.table.api.scala.BatchTableEnvironment 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-04-29 16 | * @Time 17:25 17 | */ 18 | object WriteJDBCByTableSink { 19 | def main(args: Array[String]): Unit = { 20 | val env = ExecutionEnvironment.getExecutionEnvironment 21 | val batchTableEnv = BatchTableEnvironment.create(env) 22 | 23 | val words = "hello flink hello lagou" 24 | val WDS = words.split("\\W+").map(WD(_, 1)) 25 | 26 | val input: DataSet[WD] = env.fromCollection(WDS) 27 | 28 | val table: Table = batchTableEnv.fromDataSet(input) 29 | 30 | 31 | batchTableEnv.createTemporaryView("wordcount", table) 32 | 33 | 34 | val jdbcAppendTableSink: JDBCAppendTableSink = JDBCAppendTableSink.builder() 35 | .setBatchSize(2000) 36 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 37 | .setDrivername("com.mysql.jdbc.Driver") 38 | .setUsername("root") 39 | .setPassword("111111") 40 | .setQuery("insert into wordcount (word, count) values(?, ?)") 41 | .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER) 42 | .build() 43 | 44 | 45 | batchTableEnv.registerTableSink("wordcount_jdbc", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink) 46 | 47 | table.insertInto("wordcount_jdbc") 48 | 49 | batchTableEnv.execute("WriteJDBCByTableSink") 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/batch/kafka/SendData2KafkaByKafkaBatchSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.batch.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import com.yyb.flink10.OutputFormat.KafkaOutputFormat 7 | import com.yyb.flink10.sink.KafkaBatchTableSink 8 | import org.apache.flink.api.scala._ 9 | import org.apache.flink.formats.json.JsonRowSerializationSchema 10 | import org.apache.flink.kafka011.shaded.org.apache.kafka.clients.producer.ProducerRecord 11 | import org.apache.flink.kafka011.shaded.org.apache.kafka.common.serialization.StringSerializer 12 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 13 | import org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer 14 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableEnvironment, TableSchema} 15 | import org.apache.flink.table.api.scala.{BatchTableEnvironment, StreamTableEnvironment} 16 | import org.apache.flink.table.descriptors.{ConnectTableDescriptor, Json, Kafka, Schema} 17 | 18 | 19 | /** 20 | * @Author yyb 21 | * @Description 22 | * @Date Create in 2020-07-28 23 | * @Time 16:12 24 | */ 25 | object SendData2KafkaByKafkaBatchSink { 26 | def main(args: Array[String]): Unit = { 27 | val env = ExecutionEnvironment.getExecutionEnvironment 28 | val blinkTableEnv = BatchTableEnvironment.create(env) 29 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 30 | val prop: Properties = new Properties() 31 | prop.load(in_env) 32 | println(prop.getProperty("zookeeper.connect")) 33 | 34 | 35 | 36 | val kafka = new Kafka 37 | kafka.version("0.11") 38 | .topic("eventsource_yhj") 39 | .property("zookeeper.connect", prop.getProperty("zookeeper.connect")) 40 | .property("bootstrap.servers", prop.getProperty("bootstrap.servers")). 41 | property("group.id", "yyb_dev") 42 | .startFromLatest 43 | 44 | 45 | val schema = new Schema 46 | val tableSchema1 = TableSchema.builder 47 | .field("amount", DataTypes.INT) 48 | .field("currency", DataTypes.STRING).build 49 | schema.schema(tableSchema1) 50 | 51 | val tableSource = blinkTableEnv.connect(kafka) 52 | .withFormat(new Json().failOnMissingField(true)) 53 | .withSchema(schema) 54 | tableSource.createTemporaryTable("Orders_tmp") 55 | 56 | val schemaString = new JsonRowSerializationSchema.Builder(tableSchema1.toRowType) 57 | val kafkaProp = new Properties(); 58 | kafkaProp.put("key.serializer", classOf[StringSerializer]) 59 | kafkaProp.put("value.serializer", classOf[StringSerializer]) 60 | kafkaProp.put("zookeeper.connect", prop.getProperty("zookeeper.connect")) 61 | kafkaProp.put("bootstrap.servers", prop.getProperty("bootstrap.servers")) 62 | kafkaProp.put("topic", "eventsource_yhj") 63 | 64 | val kafkaProducer = new FlinkKafkaProducer[String, String](kafkaProp) 65 | val data = Array(Current(1, "Euro")) 66 | 67 | val dataDS = env.fromCollection(data) 68 | 69 | val datasOfRecord: DataSet[ProducerRecord[String, String]] = dataDS.map(x => { 70 | val record: ProducerRecord[String, String] = new ProducerRecord[String, String]("eventsource_yhj", x.toString) 71 | record 72 | }) 73 | 74 | 75 | 76 | 77 | /** 78 | * 这里的 发送数据 到 kafka是 先 collect 到 driver 才 发送的,所以不是 分布式的处理方法 79 | * 需要调优 80 | */ 81 | // datasOfRecord.collect().foreach(kafkaProducer.send(_)) 82 | // kafkaProducer.flush() 83 | 84 | /** 85 | * 这里使用的是 dataset 的 kafkaOutputFormat 86 | */ 87 | val kafkaOutputFormat = new KafkaOutputFormat(kafkaProp); 88 | // dataDS.map(x => x.toString).output(kafkaOutputFormat) 89 | 90 | val dataTable: Table = blinkTableEnv.fromDataSet(dataDS.map(_.toString)) 91 | 92 | blinkTableEnv.registerTable("dataSource", dataTable) 93 | 94 | val kafkaBatchTableSink = new KafkaBatchTableSink(kafkaOutputFormat); 95 | blinkTableEnv.registerTableSink("kafkaBatchTableSink", kafkaBatchTableSink) 96 | 97 | var sql = 98 | """ 99 | |insert into kafkaBatchTableSink select * from dataSource 100 | |""".stripMargin 101 | //因为 kafka 是 无界的, 所以不能使用 batch 模式 的 kafkatablesink 102 | //BatchTableSink or OutputFormatTableSink required to emit batch Table. 103 | blinkTableEnv.sqlUpdate(sql) 104 | 105 | // dataTable.insertInto("Orders_tmp") 106 | 107 | 108 | env.execute("SendData2KafkaByKafkaConnector") 109 | } 110 | 111 | case class Current(amount:Int, currency:String){ 112 | override def toString: String = { 113 | s"""{"amount":"${amount}",currency:"${currency}"}""".stripMargin 114 | } 115 | 116 | def toBytes(): Array[Byte] ={ 117 | toString.getBytes() 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/InsetMode/AppendOnly.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC.InsetMode; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.table.api.Table; 7 | import org.apache.flink.table.api.java.StreamTableEnvironment; 8 | import org.apache.flink.types.Row; 9 | 10 | /** 11 | * @Author yyb 12 | * @Description 13 | * @Date Create in 2020-08-04 14 | * @Time 14:57 15 | */ 16 | public class AppendOnly { 17 | public static void main(String [] args) throws Exception { 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env); 20 | 21 | DataStream> data = env.fromElements( 22 | new Tuple2<>("Mary", "./home"), 23 | new Tuple2<>("Bob", "./cart"), 24 | new Tuple2<>("Mary", "./prod?id=1"), 25 | new Tuple2<>("Liz", "./home"), 26 | new Tuple2<>("Bob", "./prod?id=3") 27 | ); 28 | 29 | Table clicksTable = flinkTableEnv.fromDataStream(data, "user,url"); 30 | 31 | flinkTableEnv.registerTable("clicks", clicksTable); 32 | Table rs = flinkTableEnv.sqlQuery("select user , url from clicks where user='Mary'"); 33 | DataStream rs_ds = flinkTableEnv.toAppendStream(rs, Row.class); 34 | rs_ds.print().setParallelism(1); 35 | 36 | env.execute("AppendOnly"); 37 | 38 | /** 39 | * 结果 40 | * Mary,./prod?id=1 41 | * Mary,./home 42 | */ 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/InsetMode/RetractStream.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC.InsetMode; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.Table; 9 | import org.apache.flink.table.api.java.StreamTableEnvironment; 10 | import org.apache.flink.types.Row; 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-08-04 16 | * @Time 14:57 17 | */ 18 | public class RetractStream { 19 | public static void main(String [] args) throws Exception { 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | StreamTableEnvironment flinkTableEnv = StreamTableEnvironment.create(env); 22 | 23 | DataStream> data = env.fromElements( 24 | new Tuple2<>("Mary", "./home"), 25 | new Tuple2<>("Bob", "./cart"), 26 | new Tuple2<>("Mary", "./prod?id=1"), 27 | new Tuple2<>("Liz", "./home"), 28 | new Tuple2<>("Bob", "./prod?id=3") 29 | ); 30 | 31 | Table clicksTable = flinkTableEnv.fromDataStream(data, "user,url"); 32 | 33 | flinkTableEnv.registerTable("clicks", clicksTable); 34 | Table rs = flinkTableEnv.sqlQuery("select user , count(url) url_ct from clicks group by user"); 35 | //注意这里 使用的是 toRetractStream 36 | DataStream>> rs_ds = flinkTableEnv.toRetractStream(rs, TypeInformation.of(new TypeHint>() { 37 | })); 38 | rs_ds.print().setParallelism(1); 39 | 40 | env.execute("RetractStream"); 41 | 42 | /** 43 | * 结果 44 | * (true,(Liz,1)) 45 | * (true,(Bob,1)) 46 | * (false,(Bob,1)) 47 | * (true,(Bob,2)) 48 | * (true,(Mary,1)) 49 | * (false,(Mary,1)) 50 | * (true,(Mary,2)) 51 | * 52 | * 第一个元素为true表示这条数据为要插入的新数据,false表示需要删除的一条旧数据。 53 | * 也就是说可以把更新表中某条数据分解为先删除一条旧数据再插入一条新数据。 54 | */ 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/StreamJDBCReadByInputformat2TableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation 4 | import org.apache.flink.api.java.io.jdbc.JDBCInputFormat.JDBCInputFormatBuilder 5 | import org.apache.flink.api.java.typeutils.RowTypeInfo 6 | import org.apache.flink.api.scala.typeutils.Types 7 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 8 | import org.apache.flink.table.api.Table 9 | import org.apache.flink.table.api.scala.StreamTableEnvironment 10 | import org.apache.flink.types.Row 11 | 12 | /** 13 | * @Author yyb 14 | * @Description 15 | * @Date Create in 2020-04-21 16 | * @Time 14:15 17 | */ 18 | object StreamJDBCReadByInputformat2TableSource { 19 | def main(args: Array[String]): Unit = { 20 | val env = StreamExecutionEnvironment.getExecutionEnvironment 21 | val streamTableEnv = StreamTableEnvironment.create(env) 22 | 23 | val types = Array[TypeInformation[_]](Types.STRING, Types.LONG, Types.STRING) 24 | val fields = Array[String]("MT_KEY1", "MT_KEY2", "MT_COMMENT") 25 | val typeInfo = new RowTypeInfo(types, fields) 26 | 27 | 28 | val jdbc = new JDBCInputFormatBuilder() 29 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/hive?useSSL=false&serverTimezone=UTC") 30 | .setDrivername("com.mysql.jdbc.Driver") 31 | .setUsername("hive") 32 | .setPassword("hive") 33 | .setQuery("select * from AUX_TABLE") 34 | .setRowTypeInfo(typeInfo) 35 | .finish() 36 | val mysqlSource: DataStream[Row] = env.createInput(jdbc) 37 | 38 | mysqlSource.print() 39 | 40 | val table: Table = streamTableEnv.fromDataStream(mysqlSource) 41 | 42 | streamTableEnv.createTemporaryView("AUX_TABLE", table) 43 | 44 | val table_q: Table = streamTableEnv.sqlQuery("select * from AUX_TABLE") 45 | table_q.printSchema() 46 | 47 | 48 | 49 | 50 | 51 | 52 | //目前来看,只有在 有 sink的情况下,需要 加 execute 53 | streamTableEnv.execute("ConnectJDBCBatch") 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/StreamJobReadFromJDBCTableSource.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC 2 | 3 | import org.apache.flink.api.java.io.jdbc.{JDBCLookupOptions, JDBCOptions, JDBCReadOptions, JDBCTableSource} 4 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 5 | import org.apache.flink.streaming.api.scala._ 6 | import org.apache.flink.table.api.{Table, TableSchema} 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment 8 | import org.apache.flink.table.types.AtomicDataType 9 | import org.apache.flink.table.types.logical.{DateType, IntType, VarCharType} 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-04-26 15 | * @Time 21:39 16 | */ 17 | object StreamJobReadFromJDBCTableSource { 18 | def main(args: Array[String]): Unit = { 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | val streamTableEnv = StreamTableEnvironment.create(env) 21 | 22 | val lookOption = JDBCLookupOptions.builder() 23 | .setCacheExpireMs(60*1000) 24 | .setCacheMaxSize(1024*1024) 25 | .setMaxRetryTimes(10) 26 | .build() 27 | 28 | val jdbcOpition = JDBCOptions.builder() 29 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 30 | .setDriverName("com.mysql.jdbc.Driver") 31 | .setUsername("root") 32 | .setPassword("111111") 33 | .setTableName("t_order") 34 | .build() 35 | 36 | val jdbcReadOption = JDBCReadOptions.builder() 37 | .setFetchSize(5000) 38 | .build() 39 | 40 | val tableSchema = TableSchema.builder() 41 | .field("id", new AtomicDataType(new IntType)) 42 | .field("name", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 43 | .field("time", new AtomicDataType(new DateType)) 44 | .build() 45 | 46 | val jdbcTableSource = JDBCTableSource.builder() 47 | .setLookupOptions(lookOption) 48 | .setOptions(jdbcOpition) 49 | .setReadOptions(jdbcReadOption) 50 | .setSchema(tableSchema) 51 | .build() 52 | 53 | val t_order: Table = streamTableEnv.fromTableSource(jdbcTableSource) 54 | 55 | streamTableEnv.registerTableSource("t_order1", jdbcTableSource) 56 | 57 | streamTableEnv.createTemporaryView("t_order", t_order) 58 | 59 | val sql = 60 | """ 61 | |select * from t_order 62 | """.stripMargin 63 | streamTableEnv.sqlQuery(sql).printSchema() 64 | 65 | // streamTableEnv.registerTableSink() 66 | 67 | 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/WriteDataByJDBCTableUpsertSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo 4 | import org.apache.flink.api.java.io.jdbc.{JDBCOptions, JDBCUpsertTableSink} 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 6 | import org.apache.flink.table.api.{Table, TableSchema} 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment 8 | import org.apache.flink.table.types.AtomicDataType 9 | import org.apache.flink.table.types.logical.{IntType, VarCharType} 10 | 11 | /** 12 | * @Author yyb 13 | * @Description 14 | * @Date Create in 2020-05-05 15 | * @Time 13:04 16 | */ 17 | object WriteDataByJDBCTableUpsertSink { 18 | def main(args: Array[String]): Unit = { 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | val flinkSteramTableEnv = StreamTableEnvironment.create(env) 21 | 22 | val fileSourcePath = "./data/data.txt" 23 | 24 | val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 25 | .flatMap(_.split("\\W+")) 26 | .filter(_.nonEmpty) 27 | .map((_, 1)) 28 | .keyBy(0) 29 | .sum(1) 30 | 31 | val source: Table = flinkSteramTableEnv.fromDataStream(wcStream) 32 | 33 | flinkSteramTableEnv.registerTable("word_flink", source) 34 | 35 | val jdbcOpition = JDBCOptions.builder() 36 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 37 | .setDriverName("com.mysql.jdbc.Driver") 38 | .setUsername("root") 39 | .setPassword("111111") 40 | .setTableName("wordcount") 41 | .build() 42 | 43 | val tableSchema = TableSchema.builder() 44 | .field("word", new AtomicDataType(new VarCharType(2147483647))) //注意 String 就是 2147483647 45 | .field("count", new AtomicDataType(new IntType)) 46 | .build() 47 | 48 | val jdbcUpsertTableSink = JDBCUpsertTableSink.builder() 49 | .setOptions(jdbcOpition) 50 | .setFlushIntervalMills(1000) 51 | .setFlushMaxSize(1024*1024*12) 52 | .setTableSchema(tableSchema) 53 | .build() 54 | jdbcUpsertTableSink.setKeyFields(Array("word")) 55 | jdbcUpsertTableSink.setIsAppendOnly(false) 56 | flinkSteramTableEnv.registerTableSink("word_mysql", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcUpsertTableSink) 57 | //注意这里, 这里的 sql 需要的是 有 聚合操作的,如果没有的话, 那么 结果表里面就会出现多条记录,因为 jdbcUpsertTableSink 里面的 IsAppendOnly,KeyFields 是 flink 执行计划推断出来的 58 | source.insertInto("word_mysql") 59 | 60 | flinkSteramTableEnv.execute("WriteDataByJDBCTableUpsertSink") 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/JDBC/WriteDataByTableSink.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.JDBC 2 | 3 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo 4 | import org.apache.flink.api.java.io.jdbc.JDBCAppendTableSink 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment, _} 6 | import org.apache.flink.table.api.Table 7 | import org.apache.flink.table.api.scala.StreamTableEnvironment 8 | 9 | /** 10 | * @Author yyb 11 | * @Description 12 | * @Date Create in 2020-04-30 13 | * @Time 09:16 14 | */ 15 | object WriteDataByTableSink { 16 | def main(args: Array[String]): Unit = { 17 | val env = StreamExecutionEnvironment.getExecutionEnvironment 18 | val streamTableEnv = StreamTableEnvironment.create(env) 19 | 20 | val fileSourcePath = "./data/data.txt" 21 | 22 | val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 23 | .flatMap(_.split("\\W+")) 24 | .filter(_.nonEmpty) 25 | .map((_, 1)) 26 | .keyBy(0) 27 | .sum(1) 28 | 29 | val table: Table = streamTableEnv.fromDataStream(wcStream) 30 | 31 | streamTableEnv.createTemporaryView("wd", table) 32 | 33 | 34 | 35 | streamTableEnv.sqlQuery("select * from wd").printSchema() 36 | 37 | 38 | val jdbcAppendTableSink = JDBCAppendTableSink.builder() 39 | .setBatchSize(2000) 40 | .setDBUrl("jdbc:mysql://127.0.0.1:3306/test?useSSL=false&serverTimezone=UTC") 41 | .setDrivername("com.mysql.jdbc.Driver") 42 | .setUsername("root") 43 | .setPassword("111111") 44 | .setQuery("insert into wordcount (word, count) values(?, ?)") 45 | .setParameterTypes(java.sql.Types.VARCHAR, java.sql.Types.INTEGER) 46 | .build() 47 | 48 | streamTableEnv.registerTableSink("mysql_wordcount", Array("word", "count"), Array(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), jdbcAppendTableSink) 49 | table.insertInto("mysql_wordcount") 50 | 51 | streamTableEnv.execute("WriteDataByTableSink") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/StreamQuery.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream 2 | 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.table.api.{EnvironmentSettings, Table} 6 | import org.apache.flink.table.api.scala.StreamTableEnvironment 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-04-19 12 | * @Time 12:44 13 | */ 14 | object StreamQuery { 15 | def main(args: Array[String]): Unit = { 16 | //注意 这里新加了一个 EnvironmentSettings 17 | val flinkStreamSettings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build() 18 | val env = StreamExecutionEnvironment.getExecutionEnvironment 19 | val streamTableEnv = StreamTableEnvironment.create(env, flinkStreamSettings) 20 | 21 | val fileSourcePath = "/Users/yyb/Downloads/1.txt" 22 | 23 | val wcStream: DataStream[(String, Int)] = env.readTextFile(fileSourcePath) 24 | .flatMap(_.split("\\W+")) 25 | .filter(_.nonEmpty) 26 | .map((_, 1)) 27 | .keyBy(0) 28 | .sum(1) 29 | 30 | val table: Table = streamTableEnv.fromDataStream(wcStream) 31 | 32 | streamTableEnv.createTemporaryView("wd", table) 33 | 34 | 35 | 36 | streamTableEnv.sqlQuery("select * from wd").printSchema() 37 | 38 | val appendDateStream: DataStream[WD] = streamTableEnv.toAppendStream[WD](table) 39 | 40 | appendDateStream.print() 41 | 42 | 43 | env.execute("StreamQuery") 44 | } 45 | 46 | case class WD(word:String, count:Int) 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/table/flink/stream/kafka/SendData2KafkaByKafkaConnector.scala: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.table.flink.stream.kafka 2 | 3 | import java.io.InputStream 4 | import java.util.Properties 5 | 6 | import org.apache.flink.api.scala._ 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 8 | import org.apache.flink.table.api.scala.StreamTableEnvironment 9 | import org.apache.flink.table.api.{DataTypes, EnvironmentSettings, Table, TableSchema} 10 | import org.apache.flink.table.descriptors.{Json, Kafka, Schema} 11 | 12 | 13 | /** 14 | * @Author yyb 15 | * @Description 16 | * @Date Create in 2020-07-28 17 | * @Time 16:12 18 | */ 19 | object SendData2KafkaByKafkaConnector { 20 | def main(args: Array[String]): Unit = { 21 | val settings = EnvironmentSettings.newInstance().useOldPlanner().inStreamingMode().build() 22 | val env = StreamExecutionEnvironment.getExecutionEnvironment 23 | val blinkTableEnv = StreamTableEnvironment.create(env, settings) 24 | 25 | val in_env: InputStream = ClassLoader.getSystemResourceAsStream("env.properties") 26 | val prop: Properties = new Properties() 27 | prop.load(in_env) 28 | println(prop.getProperty("zookeeper.connect")) 29 | 30 | val kafka = new Kafka 31 | kafka.version("0.11") 32 | .topic("eventsource_yhj") 33 | .property("zookeeper.connect", prop.getProperty("zookeeper.connect")) 34 | .property("bootstrap.servers", prop.getProperty("bootstrap.servers")). 35 | property("group.id", "yyb_dev") 36 | .startFromLatest 37 | 38 | 39 | val schema = new Schema 40 | val tableSchema1 = TableSchema.builder 41 | .field("amount", DataTypes.INT) 42 | .field("currency", DataTypes.STRING).build 43 | schema.schema(tableSchema1) 44 | val tableSource = blinkTableEnv.connect(kafka) 45 | .withFormat(new Json().failOnMissingField(true)) 46 | .withSchema(schema) 47 | tableSource.createTemporaryTable("Orders_tmp") 48 | 49 | val data = Array(Current(1, "Euro")) 50 | 51 | val dataDS = env.fromCollection(data) 52 | 53 | val dataTable: Table = blinkTableEnv.fromDataStream(dataDS) 54 | 55 | // blinkTableEnv.registerTable("dataSource", dataTable) 56 | 57 | var sql = 58 | """ 59 | |insert into Orders_tmp select * from dataSource 60 | |""".stripMargin 61 | //因为 kafka 是 无界的, 所以不能使用 batch 模式 的 kafkatablesink 62 | // blinkTableEnv.sqlUpdate(sql) 63 | 64 | dataTable.insertInto("Orders_tmp") 65 | 66 | 67 | env.execute("SendData2KafkaByKafkaConnector") 68 | } 69 | 70 | case class Current(amount:Int, currency:String) 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/util/ParquetAvroWritersSelf.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.util; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.generic.GenericData; 5 | import org.apache.avro.reflect.ReflectData; 6 | import org.apache.flink.formats.parquet.ParquetBuilder; 7 | import org.apache.flink.formats.parquet.ParquetWriterFactory; 8 | import org.apache.parquet.avro.AvroParquetWriter; 9 | import org.apache.parquet.hadoop.ParquetWriter; 10 | import org.apache.parquet.io.OutputFile; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * @Author yyb 16 | * @Description 17 | * @Date Create in 2020-06-17 18 | * @Time 14:37 19 | */ 20 | public class ParquetAvroWritersSelf{ 21 | public static ParquetWriterFactory forGenericRecord(Schema schema) { 22 | final String schemaString = schema.toString(); 23 | final ParquetBuilder builder = (out) -> createAvroParquetWriter(schemaString, GenericData.get(), out); 24 | return new ParquetWriterFactory<>(builder); 25 | } 26 | 27 | private static ParquetWriter createAvroParquetWriter( 28 | String schemaString, 29 | GenericData dataModel, 30 | OutputFile out) throws IOException { 31 | 32 | final Schema schema = new Schema.Parser().parse(schemaString); 33 | 34 | return AvroParquetWriter.builder(out) 35 | .withSchema(schema) 36 | .withDataModel(dataModel) 37 | .build(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/util/RecordTypeInfo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.util; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.generic.GenericData; 5 | import org.apache.avro.generic.GenericRecord; 6 | import org.apache.flink.api.common.ExecutionConfig; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.api.common.typeutils.CompositeType; 9 | import org.apache.flink.api.common.typeutils.TypeSerializer; 10 | 11 | import java.util.List; 12 | 13 | /** 14 | * @Author yyb 15 | * @Description 16 | * @Date Create in 2020-06-17 17 | * @Time 15:15 18 | */ 19 | public class RecordTypeInfo extends CompositeType implements GenericRecord, Comparable { 20 | 21 | // private final Object[] fields; 22 | 23 | public RecordTypeInfo(Class typeClass, TypeInformation... types){ 24 | super(typeClass); 25 | } 26 | @Override 27 | public int compareTo(GenericData.Record o) { 28 | return 0; 29 | } 30 | 31 | @Override 32 | public void put(String key, Object v) { 33 | 34 | } 35 | 36 | @Override 37 | public Object get(String key) { 38 | return null; 39 | } 40 | 41 | @Override 42 | public void put(int i, Object v) { 43 | 44 | } 45 | 46 | @Override 47 | public Object get(int i) { 48 | return null; 49 | } 50 | 51 | @Override 52 | public Schema getSchema() { 53 | return null; 54 | } 55 | 56 | @Override 57 | public void getFlatFields(String fieldExpression, int offset, List result) { 58 | 59 | } 60 | 61 | @Override 62 | public TypeInformation getTypeAt(String fieldExpression) { 63 | return null; 64 | } 65 | 66 | @Override 67 | public TypeInformation getTypeAt(int pos) { 68 | return null; 69 | } 70 | 71 | @Override 72 | protected TypeComparatorBuilder createTypeComparatorBuilder() { 73 | return null; 74 | } 75 | 76 | @Override 77 | public String[] getFieldNames() { 78 | return new String[0]; 79 | } 80 | 81 | @Override 82 | public int getFieldIndex(String fieldName) { 83 | return 0; 84 | } 85 | 86 | @Override 87 | public boolean isBasicType() { 88 | return false; 89 | } 90 | 91 | @Override 92 | public boolean isTupleType() { 93 | return false; 94 | } 95 | 96 | @Override 97 | public int getArity() { 98 | return 0; 99 | } 100 | 101 | @Override 102 | public int getTotalFields() { 103 | return 0; 104 | } 105 | 106 | @Override 107 | public TypeSerializer createSerializer(ExecutionConfig config) { 108 | return null; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/scala/com/yyb/flink10/util1/Demo.java: -------------------------------------------------------------------------------- 1 | package com.yyb.flink10.util1; 2 | 3 | /** 4 | * @Author yyb 5 | * @Description 6 | * @Date Create in 2020-08-20 7 | * @Time 16:09 8 | */ 9 | public class Demo { 10 | public static void main(String[] args){ 11 | System.out.println(("LSJA24W96KS001123".hashCode() & Integer.MAX_VALUE )%1080 ); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/scala/flink/api/java/RecordAvroTypeInfo.java: -------------------------------------------------------------------------------- 1 | package flink.api.java; 2 | 3 | import org.apache.avro.generic.GenericData; 4 | import org.apache.flink.api.common.ExecutionConfig; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | import org.apache.flink.api.common.typeutils.TypeSerializer; 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-06-29 12 | * @Time 11:21 13 | */ 14 | public class RecordAvroTypeInfo extends TypeInformation { 15 | @Override 16 | public boolean isBasicType() { 17 | return false; 18 | } 19 | 20 | @Override 21 | public boolean isTupleType() { 22 | return false; 23 | } 24 | 25 | @Override 26 | public int getArity() { 27 | return 0; 28 | } 29 | 30 | @Override 31 | public int getTotalFields() { 32 | return 0; 33 | } 34 | 35 | @Override 36 | public Class getTypeClass() { 37 | return null; 38 | } 39 | 40 | @Override 41 | public boolean isKeyType() { 42 | return false; 43 | } 44 | 45 | @Override 46 | public TypeSerializer createSerializer(ExecutionConfig config) { 47 | return null; 48 | } 49 | 50 | @Override 51 | public String toString() { 52 | return null; 53 | } 54 | 55 | @Override 56 | public boolean equals(Object obj) { 57 | return false; 58 | } 59 | 60 | @Override 61 | public int hashCode() { 62 | return 0; 63 | } 64 | 65 | @Override 66 | public boolean canEqual(Object obj) { 67 | return false; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/flink/api/java/Tuple0.java: -------------------------------------------------------------------------------- 1 | package flink.api.java; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.generic.GenericData; 5 | import org.apache.avro.generic.GenericRecord; 6 | 7 | /** 8 | * @Author yyb 9 | * @Description 10 | * @Date Create in 2020-06-18 11 | * @Time 14:07 12 | */ 13 | public class Tuple0 extends org.apache.flink.api.java.tuple.Tuple0 implements GenericRecord, Comparable{ 14 | @Override 15 | public int compareTo(GenericData.Record o) { 16 | return 0; 17 | } 18 | 19 | @Override 20 | public void put(String key, Object v) { 21 | 22 | } 23 | 24 | @Override 25 | public Object get(String key) { 26 | return null; 27 | } 28 | 29 | @Override 30 | public void put(int i, Object v) { 31 | 32 | } 33 | 34 | @Override 35 | public Object get(int i) { 36 | return null; 37 | } 38 | 39 | @Override 40 | public Schema getSchema() { 41 | return null; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/flink/api/java/Tuple1.java: -------------------------------------------------------------------------------- 1 | package flink.api.java; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.generic.GenericData; 5 | import org.apache.avro.generic.GenericRecord; 6 | 7 | /** 8 | * @Author yyb 9 | * @Description 10 | * @Date Create in 2020-06-18 11 | * @Time 14:07 12 | */ 13 | public class Tuple1 extends org.apache.flink.api.java.tuple.Tuple1 implements GenericRecord, Comparable{ 14 | @Override 15 | public int compareTo(GenericData.Record o) { 16 | return 0; 17 | } 18 | 19 | @Override 20 | public void put(String key, Object v) { 21 | 22 | } 23 | 24 | @Override 25 | public Object get(String key) { 26 | return null; 27 | } 28 | 29 | @Override 30 | public void put(int i, Object v) { 31 | 32 | } 33 | 34 | @Override 35 | public Object get(int i) { 36 | return null; 37 | } 38 | 39 | @Override 40 | public Schema getSchema() { 41 | return null; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/flink/api/java/Tuple2.java: -------------------------------------------------------------------------------- 1 | package flink.api.java; 2 | 3 | import org.apache.avro.AvroRuntimeException; 4 | import org.apache.avro.Schema; 5 | import org.apache.avro.generic.GenericData; 6 | import org.apache.avro.generic.GenericRecord; 7 | 8 | /** 9 | * @Author yyb 10 | * @Description 11 | * @Date Create in 2020-06-18 12 | * @Time 14:07 13 | */ 14 | public class Tuple2 extends org.apache.flink.api.java.tuple.Tuple2 implements GenericRecord, Comparable{ 15 | private Object[] values = new Object[2]; 16 | private Schema schema = null; 17 | public Tuple2(){ 18 | super(); 19 | } 20 | public Tuple2(Schema schema) { 21 | super(); 22 | if (schema == null || !Schema.Type.RECORD.equals(schema.getType())) 23 | throw new AvroRuntimeException("Not a record schema: "+schema); 24 | this.schema = schema; 25 | this.values = new Object[schema.getFields().size()]; 26 | } 27 | 28 | public Tuple2(Schema schema, T0 value0, T1 value1) { 29 | super(value0, value1); 30 | if (schema == null || !Schema.Type.RECORD.equals(schema.getType())) 31 | throw new AvroRuntimeException("Not a record schema: "+schema); 32 | this.schema = schema; 33 | this.values = new Object[2]; 34 | this.values[0] = value0; 35 | this.values[1] = value1; 36 | } 37 | 38 | public Tuple2(T0 value0, T1 value1) { 39 | super(value0, value1); 40 | this.values = new Object[2]; 41 | this.values[0] = value0; 42 | this.values[1] = value1; 43 | } 44 | 45 | @Override 46 | public int compareTo(GenericData.Record o) { 47 | return GenericData.get().compare(this, o, schema); 48 | } 49 | 50 | @Override 51 | public void put(String key, Object v) { 52 | Schema.Field field = schema.getField(key); 53 | if (field == null) 54 | throw new AvroRuntimeException("Not a valid schema field: "+key); 55 | 56 | values[field.pos()] = v; 57 | } 58 | 59 | @Override 60 | public Object get(String key) { 61 | Schema.Field field = schema.getField(key); 62 | if (field == null) return null; 63 | return values[field.pos()]; 64 | } 65 | 66 | @Override 67 | public void put(int i, Object v) { 68 | values[i] = v; 69 | } 70 | 71 | @Override 72 | public Object get(int i) { 73 | return values[i]; 74 | } 75 | 76 | @Override 77 | public Schema getSchema() { 78 | return schema; 79 | } 80 | 81 | @Override 82 | public void setField(T value, int pos){ 83 | super.setField(value, pos); 84 | this.values[pos] = value; 85 | } 86 | 87 | @Override 88 | public void setFields(T0 value0, T1 value1){ 89 | super.setFields(value0, value1); 90 | this.values[0] = value0; 91 | this.values[1] = value1; 92 | } 93 | 94 | public static org.apache.flink.api.java.tuple.Tuple2 of(T0 value0, T1 value1) { 95 | return new Tuple2(value0, 96 | value1); 97 | } 98 | } 99 | --------------------------------------------------------------------------------