├── .gitignore
├── src
    └── main
    │   ├── resources
    │       ├── A级景区经纬度.xlsx
    │       ├── 去哪儿网景点数据.csv
    │       ├── phoenix.properties
    │       ├── clickhouse.properties
    │       ├── sqlcfg.properties
    │       ├── oracle.properties
    │       ├── hikari.properties
    │       ├── config.properties
    │       ├── log4j.properties
    │       ├── druid.properties
    │       └── kafka.properties
    │   ├── java
    │       └── cn
    │       │   └── northpark
    │       │       ├── spark
    │       │           └── scoreApp
    │       │           │   ├── Score.txt
    │       │           │   ├── sinkScore.java
    │       │           │   └── sinkScoreAppended.java
    │       │       ├── flink
    │       │           ├── bean
    │       │           │   ├── UserVO.java
    │       │           │   ├── Product.java
    │       │           │   ├── StatisticsVO.java
    │       │           │   ├── Message.java
    │       │           │   ├── EventCatagoryProductCount.java
    │       │           │   └── Access.java
    │       │           ├── weiboAPP
    │       │           │   └── hbase
    │       │           │   │   ├── enums
    │       │           │   │       └── RelType.java
    │       │           │   │   ├── bean
    │       │           │   │       └── WeiboRelations.java
    │       │           │   │   ├── CreateTable.java
    │       │           │   │   ├── DelMany.java
    │       │           │   │   ├── DelOne.java
    │       │           │   │   ├── AddMony.java
    │       │           │   │   └── AddOne.java
    │       │           ├── util
    │       │           │   ├── RocksSaveable.java
    │       │           │   ├── ObjectUtil.java
    │       │           │   ├── RabbitMQUtils.java
    │       │           │   ├── FlinkUtilsV1.java
    │       │           │   ├── DruidUtils.java
    │       │           │   ├── RabbitMQConFactory.java
    │       │           │   └── HikariUtils.java
    │       │           ├── WordCount.java
    │       │           ├── PrintSink.java
    │       │           ├── topN
    │       │           │   └── spark
    │       │           │   │   └── passwordStasApp
    │       │           │   │       └── Test1.java
    │       │           ├── table_sql_api
    │       │           │   ├── WordCountBean.java
    │       │           │   ├── stream
    │       │           │   │   └── sql
    │       │           │   │   │   ├── Split.java
    │       │           │   │   │   ├── UDTFSQL.java
    │       │           │   │   │   ├── UDFSQL.java
    │       │           │   │   │   ├── KafkaWordCountSQL.java
    │       │           │   │   │   ├── udf
    │       │           │   │   │       └── UserBrowseLog.java
    │       │           │   │   │   └── IpLocation.java
    │       │           │   ├── batch
    │       │           │   │   ├── SQLWordCount.java
    │       │           │   │   ├── TableWordCount.java
    │       │           │   │   └── BatchSQLWordCountQueryCommon.java
    │       │           │   ├── StreamSqlWordCount.java
    │       │           │   ├── StreamTableWordCount.java
    │       │           │   └── ConvertCSV2KafkaBean.java
    │       │           ├── window
    │       │           │   ├── udf
    │       │           │   │   ├── TopNAggregateFunction.java
    │       │           │   │   └── TopNWindowFunction.java
    │       │           │   ├── CountWindowAll.java
    │       │           │   ├── SlidingWindowAll.java
    │       │           │   ├── TumblingWindowAll.java
    │       │           │   ├── CountWindow.java
    │       │           │   ├── SlidingWindow.java
    │       │           │   ├── SessionWindow.java
    │       │           │   ├── TumblingWindow.java
    │       │           │   ├── EventTimeSessionWindow.java
    │       │           │   ├── EventTimeSlidingWindowWithWaterMark.java
    │       │           │   └── EventTimeTumblingWindow.java
    │       │           ├── MerchantDayStaApp
    │       │           │   └── MerchantDaySta.java
    │       │           ├── join
    │       │           │   ├── CountBean.java
    │       │           │   ├── StreamDataSourceB.java
    │       │           │   ├── StreamDataSourceC.java
    │       │           │   └── StreamDataSourceA.java
    │       │           ├── starrocks
    │       │           │   └── bean
    │       │           │   │   ├── Identities.java
    │       │           │   │   ├── Lib.java
    │       │           │   │   └── EventMsg.java
    │       │           ├── timeout
    │       │           │   └── TimeOutResult.java
    │       │           ├── KeyBy1.java
    │       │           ├── project
    │       │           │   ├── RestfulActivityLocationsApplication.java
    │       │           │   ├── syncIO
    │       │           │   │   ├── AsyncMysqlApplication.java
    │       │           │   │   ├── AsyncRestfulApplication.java
    │       │           │   │   ├── SinkToMysqlApplication.java
    │       │           │   │   └── function
    │       │           │   │   │   └── NP_MySqlSinkFunction.java
    │       │           │   ├── function
    │       │           │   │   └── MysqlToActivityBeanFunciton.java
    │       │           │   ├── InspectSitemap.java
    │       │           │   └── MysqlActivityNameApplication.java
    │       │           ├── project3
    │       │           │   ├── MyParaFileSource.java
    │       │           │   ├── OperatorStateTest.java
    │       │           │   └── MyExactlyOnceParaFileSource.java
    │       │           ├── KeyBy2Bean.java
    │       │           ├── clickhouse
    │       │           │   └── ReadSQL.java
    │       │           ├── oracle
    │       │           │   ├── FlinkKafkaLinkOracleSource.java
    │       │           │   ├── OracleToTupleFunciton.java
    │       │           │   ├── SinkOracle.java
    │       │           │   └── FlinkKafkaSink.java
    │       │           ├── project2
    │       │           │   ├── NP_ParallelismFileSource.java
    │       │           │   └── OperatorState1.java
    │       │           ├── BatchWC.java
    │       │           ├── exactly
    │       │           │   ├── overrideway
    │       │           │   │   ├── MyRedisSink.java
    │       │           │   │   └── FlinkKafkaToRedis.java
    │       │           │   └── transactionway
    │       │           │   │   ├── FlinkKafkaPrint.java
    │       │           │   │   └── FlinkKafkaToOracle.java
    │       │           ├── KafkaSource.java
    │       │           ├── TT.java
    │       │           ├── KeyBy3.java
    │       │           ├── RestartStrategy2.java
    │       │           ├── AddSink1.java
    │       │           ├── StreamingWordCountSocket.java
    │       │           ├── StreamingWordCount2.java
    │       │           ├── RestartStrategies1.java
    │       │           ├── StreamingWordCountParam.java
    │       │           ├── StreamingWordCount.java
    │       │           ├── KafkaSourceV2.java
    │       │           ├── StateBackend2.java
    │       │           ├── StateBackend1.java
    │       │           ├── StreamingWordCountChain.java
    │       │           ├── StreamingWordCountSharingGroup.java
    │       │           └── OperatorStateAndKeyedState.java
    │       │       └── hadoop
    │       │           └── MR
    │       │               ├── covid
    │       │                   ├── CovidReducer.java
    │       │                   ├── CovidMapper.java
    │       │                   ├── Covid.java
    │       │                   └── CovidApp.java
    │       │               └── CarBean.java
    │   ├── scala
    │       ├── yxlm
    │       │   └── LolApp.scala
    │       ├── transformApp
    │       │   ├── util
    │       │   │   ├── GuassTest.scala
    │       │   │   ├── DateUtils.scala
    │       │   │   ├── HBaseSink.scala
    │       │   │   ├── KafkaSink.scala
    │       │   │   ├── TrafficEntity.scala
    │       │   │   ├── JDBCSink.scala
    │       │   │   ├── GlobalEntity.scala
    │       │   │   ├── MakeData.scala
    │       │   │   └── MakeData1.scala
    │       │   └── monitorWarning
    │       │   │   └── RTCarAnaly.scala
    │       ├── cn
    │       │   └── northpark
    │       │   │   └── spark
    │       │   │       ├── StreamWordCountScala.scala
    │       │   │       └── scoreStt.scala
    │       └── suicideApp
    │       │   └── HdfsTest.java
    │   └── python
    │       ├── LogisticRegression.py
    │       ├── gendata.py
    │       ├── KmeansGroup2.py
    │       └── KmeansGroup.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /target/
2 | .idea
3 | /backEnd/
4 | *.log
5 | redis.properties
6 | *.iml


--------------------------------------------------------------------------------
/src/main/resources/A级景区经纬度.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/resources/A级景区经纬度.xlsx


--------------------------------------------------------------------------------
/src/main/resources/去哪儿网景点数据.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/resources/去哪儿网景点数据.csv


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/spark/scoreApp/Score.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/java/cn/northpark/spark/scoreApp/Score.txt


--------------------------------------------------------------------------------
/src/main/resources/phoenix.properties:
--------------------------------------------------------------------------------
1 | # 数据库驱动|不填写HikariCp会自动识别
2 | driverClassName=org.apache.phoenix.jdbc.PhoenixDriver
3 | # 访问数据库连接
4 | jdbcUrl=jdbc:phoenix:node1:2181:/hbase
5 | 


--------------------------------------------------------------------------------
/src/main/scala/yxlm/LolApp.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.yxlm
 2 | 
 3 | /**
 4 |  *
 5 |  * @author bruce
 6 |  * @date 2022年04月18日 09:34:02
 7 |  */
 8 | class LolApp {
 9 | 
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/resources/clickhouse.properties:
--------------------------------------------------------------------------------
 1 | # 数据库驱动|不填写HikariCp会自动识别
 2 | driverClassName=ru.yandex.clickhouse.ClickHouseDriver
 3 | # 访问数据库连接
 4 | #jdbcUrl=jdbc:clickhouse://localhost:8123/
 5 | jdbcUrl=jdbc:clickhouse://node1:8123/
 6 | # 数据库用户名
 7 | #username=test
 8 | # 数据库密码
 9 | #password=test
10 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/UserVO.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | public class UserVO implements Serializable {
 6 | 
 7 | 
 8 |     public Integer id;
 9 | 
10 |     public String username;
11 | 
12 |     public String email;
13 |     
14 | 
15 | }


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/enums/RelType.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase.enums;
 2 | 
 3 | /**
 4 |  * @author bruce
 5 |  * @date 2022年06月26日 10:58:15
 6 |  */
 7 | public interface RelType {
 8 | 
 9 |     String REPLY = "reply";// 回复
10 |     String TRANS_LINK = "transLink";// 转发
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/RocksSaveable.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import org.rocksdb.RocksDB;
 4 | 
 5 | /**
 6 |  * @author zhangyang
 7 |  * @date 2020年07月22日 17:58:03
 8 |  */
 9 | public interface RocksSaveable {
10 |     void save(RocksDB rocksDB) throws Exception;
11 | 
12 |     void deleteFromRocks(RocksDB rocksDB) throws Exception;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/main/resources/sqlcfg.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | #sql configs here
3 | ################################################################################
4 | 
5 | #sql query configs
6 | sql=select word,sum(counts) as counts from word_count group by word having sum(counts) >=2 order by counts desc 
7 | table=word_count
8 | columns=word,counts
9 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/Product.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | public class Product {
 4 | 
 5 |     public String category;
 6 |     public String name;
 7 | 
 8 |     @Override
 9 |     public String toString() {
10 |         return "Product{" +
11 |                 "category='" + category + '\'' +
12 |                 ", name='" + name + '\'' +
13 |                 '}';
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/StatisticsVO.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | /**
 6 |  * @author bruce
 7 |  * @date 2021年11月17日 16:24:01
 8 |  */
 9 | public class StatisticsVO {
10 |      public String url ;
11 |      public String method ;
12 |      public String ip ;
13 |      public String class_method ;
14 |      public String args ;
15 | 
16 |      public UserVO userVO;
17 | 
18 |      public Map<String, String> cookieMap;
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/GuassTest.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator}
 4 | 
 5 | object GaussTest {
 6 |   def main(args: Array[String]): Unit = {
 7 |     val generator = new GaussianRandomGenerator(new JDKRandomGenerator())
 8 |     for(i <- 1 to 100){
 9 |       val result: Int =  (generator.nextNormalizedDouble() * 100).abs.toInt
10 |       println(result)
11 |     }
12 |   }
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/resources/oracle.properties:
--------------------------------------------------------------------------------
 1 | # 数据库驱动|不填写HikariCp会自动识别
 2 | driverClassName=oracle.jdbc.driver.OracleDriver
 3 | # 访问数据库连接
 4 | jdbcUrl=jdbc:oracle:thin:@localhost:1521:test
 5 | # 数据库用户名
 6 | username=test
 7 | # 数据库密码
 8 | password=test
 9 | # 最大连接数
10 | maximumPoolSize=30
11 | # 连接池空闲连接的最小数量
12 | minimumIdle=5
13 | # 开启事务自动提交
14 | autoCommit=false
15 | # 是否自定义配置，为true时下面两个参数才生效
16 | dataSource.cachePrepStmts=true
17 | # 连接池大小默认25，官方推荐250-500
18 | dataSource.prepStmtCacheSize=250
19 | # 单条语句最大长度默认256，官方推荐2048
20 | dataSource.prepStmtCacheSqlLimit=2048
21 | 
22 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/Message.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | /**
 4 |  * @author bruce
 5 |  * @date 2024年01月25日 13:44:13
 6 |  */
 7 | public class Message {
 8 |     private String msgType;
 9 |     private String body;
10 | 
11 |     public String getMsgType() {
12 |         return msgType;
13 |     }
14 | 
15 |     public void setMsgType(String msgType) {
16 |         this.msgType = msgType;
17 |     }
18 | 
19 |     public String getBody() {
20 |         return body;
21 |     }
22 | 
23 |     public void setBody(String body) {
24 |         this.body = body;
25 |     }
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/resources/hikari.properties:
--------------------------------------------------------------------------------
 1 | # 数据库驱动|不填写HikariCp会自动识别
 2 | driverClassName=com.mysql.jdbc.Driver
 3 | # 访问数据库连接
 4 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
 5 | # 数据库用户名
 6 | username=root
 7 | # 数据库密码
 8 | password=123456
 9 | # 最大连接数
10 | maximumPoolSize=30
11 | # 连接池空闲连接的最小数量
12 | minimumIdle=5
13 | # 开启事务自动提交
14 | autoCommit=false
15 | 
16 | # 是否自定义配置，为true时下面两个参数才生效
17 | dataSource.cachePrepStmts=true
18 | # 连接池大小默认25，官方推荐250-500
19 | dataSource.prepStmtCacheSize=250
20 | # 单条语句最大长度默认256，官方推荐2048
21 | dataSource.prepStmtCacheSqlLimit=2048


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/WordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | public class WordCount {
 4 |     public String word;
 5 |     public Integer counts;
 6 | 
 7 |     public WordCount() {
 8 |     }
 9 | 
10 |     public WordCount(String word, Integer counts) {
11 |         this.word = word;
12 |         this.counts = counts;
13 |     }
14 | 
15 |     public static WordCount of(String word, Integer counts) {
16 |         return new WordCount(word, counts);
17 |     }
18 | 
19 |     @Override
20 |     public String toString() {
21 |         return "WordCount{" +
22 |                 "word='" + word + '\'' +
23 |                 ", counts=" + counts +
24 |                 '}';
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/PrintSink.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | 
 6 | /**
 7 |  * @author bruce
 8 |  * 揭秘subTask的编号
 9 |  */
10 | public class PrintSink {
11 | 
12 |     public static void main(String[] args) throws Exception {
13 | 
14 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
15 | 
16 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
17 | 
18 |         source.print("the res is ").setParallelism(2);
19 | 
20 |         env.execute("PrintSink");
21 | 
22 | 
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/topN/spark/passwordStasApp/Test1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.topN.spark.passwordStasApp;
 2 | 
 3 | import java.util.StringTokenizer;
 4 | 
 5 | /**
 6 |  * @author bruce
 7 |  * @date 2022年06月25日 12:21:23
 8 |  */
 9 | public class Test1 {
10 | 
11 |     public static void main(String[] args) {
12 |         String exp = "shibazi_lin@126.com\t6584596";
13 |         // 去掉所有的键盘上的不可输入字符，不包括双字节的，32-126
14 |         String pattern = "[^\040-\176]";
15 |         String line = exp.toString().replaceAll(pattern, " ");
16 |         StringTokenizer itr = new StringTokenizer(line);
17 | 
18 |         while (itr.hasMoreTokens()) {
19 |             System.err.println(itr.nextToken());
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/WordCountBean.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api;
 2 | 
 3 | public class WordCountBean {
 4 |     public String word;
 5 |     public int counts;
 6 | 
 7 |     public WordCountBean() {
 8 |     }
 9 | 
10 |     public WordCountBean(String word, int counts) {
11 |         this.word = word;
12 |         this.counts = counts;
13 |     }
14 | 
15 |     public static WordCountBean of(String word, int counts) {
16 |         return new WordCountBean(word, counts);
17 |     }
18 | 
19 |     @Override
20 |     public String toString() {
21 |         return "WordCount{" +
22 |                 "word='" + word + '\'' +
23 |                 ", counts=" + counts +
24 |                 '}';
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/udf/TopNAggregateFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window.udf;
 2 | 
 3 | import cn.northpark.flink.bean.Access;
 4 | import org.apache.flink.api.common.functions.AggregateFunction;
 5 | 
 6 | public class TopNAggregateFunction implements AggregateFunction<Access, Long, Long> {
 7 |     @Override
 8 |     public Long createAccumulator() {
 9 |         return 0L;
10 |     }
11 | 
12 |     @Override
13 |     public Long add(Access value, Long accumulator) {
14 |         return accumulator + 1;
15 |     }
16 | 
17 |     @Override
18 |     public Long getResult(Long accumulator) {
19 |         return accumulator;
20 |     }
21 | 
22 |     @Override
23 |     public Long merge(Long a, Long b) {
24 |         return null;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/Split.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.stream.sql;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 4 | import org.apache.flink.api.common.typeinfo.Types;
 5 | import org.apache.flink.table.functions.TableFunction;
 6 | import org.apache.flink.types.Row;
 7 | 
 8 | public class Split extends TableFunction<Row> {
 9 |     private String separator = ",";
10 |     public Split(String separator) {
11 |         this.separator = separator;
12 |     }
13 |     public void eval(String line){
14 |         for (String s: line.split(separator)){
15 |             collect(Row.of(s));
16 |         }
17 |     }
18 | 
19 |     @Override
20 |     public TypeInformation getResultType() {
21 |         return Types.ROW(Types.STRING);
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/MerchantDayStaApp/MerchantDaySta.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.MerchantDayStaApp;
 2 | 
 3 | import lombok.Data;
 4 | 
 5 | 
 6 | /**
 7 |  * @author bruce
 8 |  * @date 2022年05月09日 18:06:54
 9 |  */
10 | 
11 | @Data
12 | public class MerchantDaySta {
13 | 
14 |     private String merchantId;
15 | 
16 |     private Double totalDeductMoney;
17 | 
18 |     public MerchantDaySta() {
19 |     }
20 | 
21 |     public MerchantDaySta(String merchantId, Double totalDeductMoney) {
22 |         this.merchantId = merchantId;
23 |         this.totalDeductMoney = totalDeductMoney;
24 |     }
25 | 
26 |     @Override
27 |     public String toString() {
28 |         return "MerchantDaySta{" +
29 |                 "merchantId='" + merchantId + '\'' +
30 |                 ", totalDeductMoney=" + totalDeductMoney +
31 |                 '}';
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/bean/WeiboRelations.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase.bean;
 2 | 
 3 | import lombok.Data;
 4 | 
 5 | /**
 6 |  * @author bruce
 7 |  * @date 2022年06月26日 10:56:13
 8 |  */
 9 | @Data
10 | public class WeiboRelations {
11 |     private String user_id;
12 | 
13 |     /**
14 |      * reply：回复
15 |      * transLink:转发
16 |      */
17 |     private String rel_type;
18 |     private String rel_user_id;
19 | 
20 |     /**
21 |      * 1 : 被转发/被回复
22 |      * 0 ： 主动转发/主动回复
23 |      */
24 |     private int by_type;
25 |     private WeiboRelations(){};
26 | 
27 |     public WeiboRelations(String user_id, String rel_type, String rel_user_id ,int by_type) {
28 |         this.user_id = user_id;
29 |         this.rel_type = rel_type;
30 |         this.rel_user_id = rel_user_id;
31 |         this.by_type = by_type;
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/hadoop/MR/covid/CovidReducer.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.hadoop.MR.covid;
 2 | 
 3 | import org.apache.hadoop.io.Text;
 4 | import org.apache.hadoop.mapreduce.Reducer;
 5 | 
 6 | import java.io.IOException;
 7 | 
 8 | /**
 9 |  * @author bruce
10 |  * @date 2023年03月20日 13:59:45
11 |  */
12 | public class CovidReducer extends Reducer<Text, Covid, Text, Text> {
13 |     @Override
14 |     public void reduce(Text key, Iterable<Covid> values, Context context) throws IOException, InterruptedException {
15 |         int totalCases = 0;
16 |         int totalDeaths = 0;
17 | 
18 |         for (Covid value : values) {
19 |             totalCases += value.getNewCases();
20 |             totalDeaths += value.getNewDeaths();
21 |         }
22 | 
23 |         String result = "[" + totalCases + ", " + totalDeaths + "]";
24 |         context.write(key, new Text(result));
25 |     }
26 | }


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/DateUtils.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.text.SimpleDateFormat
 4 | import java.util.Date
 5 | 
 6 | import scala.util.Random
 7 | 
 8 | object DateUtils {
 9 |   //获取当前天的日期
10 |   def getCurrentDate() = {
11 |     val sdf = new SimpleDateFormat("yyyy-MM-dd")
12 |     sdf.format(new Date())
13 |   }
14 |   //随机获取一个小时
15 |   def getRandomHour() = {
16 |     val random = new Random()
17 |     random.nextInt(24).formatted("%02d")
18 |   }
19 |   //随机获取分钟或者秒
20 |   def getRandomMinutesOrSeconds() = {
21 |     val random = new Random()
22 |     random.nextInt(60).formatted("%02d")
23 |   }
24 | 
25 |   //根据时间戳转换成 yyyy-mm-dd HH:mm:ss 数据格式
26 |   def timestampToDataStr(timeStamp:Long): String = {
27 |       val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
28 |       sdf.format(new Date(timeStamp))
29 |   }
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/EventCatagoryProductCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | public class EventCatagoryProductCount {
 4 | 
 5 |     public String event;
 6 |     public String catagory;
 7 |     public String product;
 8 |     public long count;
 9 |     public long start;
10 |     public long end;
11 | 
12 |     public EventCatagoryProductCount() {
13 |     }
14 | 
15 |     public EventCatagoryProductCount(String event, String catagory, String product, long count, long start, long end) {
16 |         this.event = event;
17 |         this.catagory = catagory;
18 |         this.product = product;
19 |         this.count = count;
20 |         this.start = start;
21 |         this.end = end;
22 |     }
23 | 
24 |     @Override
25 |     public String toString() {
26 |        return event + "\t" + catagory + "\t" + product + "\t" + count + "\t" + start + "\t" + end;
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/join/CountBean.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.join;
 2 | 
 3 | public class CountBean {
 4 | 
 5 |     /**
 6 |      * 设置为 public
 7 |      */
 8 |     public String name;
 9 |     /**
10 |      * 设置为 public
11 |      */
12 |     public long number;
13 | 
14 |     public CountBean() {
15 |     }
16 | 
17 |     public CountBean(String name, long number) {
18 |         this.name = name;
19 |         this.number = number;
20 |     }
21 | 
22 |     public String getName() {
23 |         return name;
24 |     }
25 | 
26 |     public void setName(String name) {
27 |         this.name = name;
28 |     }
29 | 
30 |     public long getNumber() {
31 |         return number;
32 |     }
33 | 
34 |     public void setNumber(int number) {
35 |         this.number = number;
36 |     }
37 | 
38 |     @Override
39 |     public String toString() {
40 |         return this.name + ":" + this.number;
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/python/LogisticRegression.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import LogisticRegression
 2 | import pandas as pd
 3 | from sklearn.model_selection import train_test_split
 4 | 
 5 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径
 6 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称
 7 | data=pd.read_csv(path,header=None,names=Cname)
 8 | data.index.name='index'     #datadrame结构的行索引与列索引名字
 9 | data.columns.name='columns'
10 | print(data)
11 | 
12 | X=data[data.columns[1:17]]  #提取特征值，不需要第一列的字母值
13 | x_train,x_test,y_train,y_test=train_test_split(X,data["字母"],train_size=0.8,random_state=77)
14 | #设置最大迭代次数为4000，默认为1000.不更改会出现警告提示
15 | lr=LogisticRegression(max_iter=4000)
16 | clm=lr.fit(x_train,y_train)  #输入训练集
17 | print('对测试集的预测结果：')
18 | #输出预测结果、预测结果的结构类型及尺寸
19 | print(clm.predict(x_test),type(clm.predict(x_test)),clm.predict(x_test).shape)
20 | #
21 | print('模型评分：'+ str(clm.score(x_test,y_test))) #用决定系数来打分


--------------------------------------------------------------------------------
/src/main/python/gendata.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | # 机构等级
 4 | hos_levels = ['一级：1', '二级：2', '三级：3']
 5 | 
 6 | # 收入类别
 7 | income_categories = ['药品收入：yp', '耗材收入：hc', '检查检验：jcjy', '化验：hy', '医疗服务：yl', '其他：qt']
 8 | 
 9 | # 机构编码和名称
10 | hos_codes = ['hos001', 'hos002', 'hos003', 'hos004']
11 | hos_names = ['医院A', '医院B', '医院C', '医院D']
12 | 
13 | # 生成insert语句
14 | for i in range(36):
15 |     year_mon = f'2019{i+1:02d}'
16 |     for j in range(100):
17 |         hos_level = random.choice(hos_levels)
18 |         hos_code = random.choice(hos_codes)
19 |         hos_name = random.choice(hos_names)
20 |         income_category = random.choice(income_categories)
21 |         income_amount = round(random.uniform(1000, 10000), 2)
22 |         sql = f"INSERT INTO med_ins_income (year, year_mon, hos_level, hos_code, hos_name, income_category, income_amount) VALUES ('2019', '{year_mon}', '{hos_level}', '{hos_code}', '{hos_name}', '{income_category}', {income_amount});"
23 |         print(sql)


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/CreateTable.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase;
 2 | 
 3 | import cn.northpark.flink.util.PhoenixUtilV2;
 4 | 
 5 | import java.util.List;
 6 | import java.util.Map;
 7 | import java.util.Objects;
 8 | import java.util.stream.Collectors;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * @date 2022年06月26日 10:03:33
13 |  */
14 | public class CreateTable {
15 | 
16 |     public static void main(String[] args) {
17 | 
18 |         //1.建表
19 |         String t_weibo_relations_sql = "CREATE TABLE \"stt\".T_WEIBO_RELATIONS_V2 (\n" +
20 |                 " ID VARCHAR NOT NULL,\n" +
21 |                 " USER_ID VARCHAR ,\n" +
22 |                 " REL_TYPE VARCHAR,\n" +
23 |                 " REL_USER_ID VARCHAR,\n" +
24 |                 " BY_TYPE INTEGER\n" +
25 |                 " CONSTRAINT PK PRIMARY KEY (ID)\n" +
26 |                 ")";
27 |         PhoenixUtilV2.createTable(t_weibo_relations_sql);
28 | 
29 | 
30 | 
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/starrocks/bean/Identities.java:
--------------------------------------------------------------------------------
 1 | 
 2 | package cn.northpark.flink.starrocks.bean;
 3 | 
 4 | import com.google.gson.annotations.SerializedName;
 5 | 
 6 | import javax.annotation.Generated;
 7 | 
 8 | @Generated("net.hexar.json2pojo")
 9 | @SuppressWarnings("unused")
10 | public class Identities {
11 | 
12 |     @SerializedName("$identity_anonymous_id")
13 |     private String $identityAnonymousId;
14 |     @SerializedName("$identity_mp_id")
15 |     private String $identityMpId;
16 | 
17 |     public String get$identityAnonymousId() {
18 |         return $identityAnonymousId;
19 |     }
20 | 
21 |     public void set$identityAnonymousId(String $identityAnonymousId) {
22 |         this.$identityAnonymousId = $identityAnonymousId;
23 |     }
24 | 
25 |     public String get$identityMpId() {
26 |         return $identityMpId;
27 |     }
28 | 
29 |     public void set$identityMpId(String $identityMpId) {
30 |         this.$identityMpId = $identityMpId;
31 |     }
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/hadoop/MR/covid/CovidMapper.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.hadoop.MR.covid;
 2 | 
 3 | import org.apache.hadoop.io.LongWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | import org.apache.hadoop.mapreduce.Mapper;
 6 | 
 7 | import java.io.IOException;
 8 | import java.util.Objects;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * @date 2023年03月20日 13:58:54
13 |  */
14 | public class CovidMapper extends Mapper<LongWritable, Text, Text, Covid> {
15 |     @Override
16 |     public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
17 |         String[] data = value.toString().split(",");
18 |         String country = data[2];
19 |         int newCases = Integer.parseInt(Objects.nonNull(data[4])?data[4]:"0");
20 |         int newDeaths = Integer.parseInt(Objects.nonNull(data[6])?data[6]:"0");
21 | 
22 |         if (country.equals("China") || country.equals("United States of America")) {
23 |             context.write(new Text(country), new Covid(newCases, newDeaths));
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/resources/config.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #configs here
 3 | ################################################################################
 4 | 
 5 | #Kafka configs
 6 | topics=flink005
 7 | group.id=bruce
 8 | bootstrap.servers=localhost:9092
 9 | auto.offset.reset=earliest
10 | enable.auto.commit=true
11 | 
12 | #kafka for browse demo
13 | kafkaBootstrapServers=localhost:9092
14 | browseTopic=flink4b
15 | browseTopicGroupID=bruce
16 | 
17 | #kafka for northpark movie
18 | npKafkaBootstrapServers=np:9092
19 | npTopic=northpark
20 | npTopicGroupID=bruce
21 | 
22 | 
23 | 
24 | #flink configs
25 | checkpoint.interval=10000
26 | 
27 | 
28 | #redis
29 | redis.host=localhost
30 | redis.pwd=
31 | redis.db=0
32 | 
33 | #jdbc config
34 | # 数据库驱动|不填写HikariCp会自动识别
35 | driverClassName=com.mysql.jdbc.Driver
36 | # 访问数据库连接
37 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
38 | # 数据库用户名
39 | username=root
40 | # 数据库密码
41 | password=123456


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/join/StreamDataSourceB.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.join;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | 
 7 | public class StreamDataSourceB extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
 8 |     private volatile boolean running = true;
 9 | 
10 |     @Override
11 |     public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
12 | 
13 |         Tuple3[] elements = new Tuple3[]{
14 |                 Tuple3.of("a", "hangzhou", 1000000059000L),
15 |                 Tuple3.of("b", "beijing", 1000000105000L),
16 |         };
17 | 
18 |         int count = 0;
19 |         while (running && count < elements.length) {
20 |             ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (long) elements[count].f2));
21 |             count++;
22 |             Thread.sleep(1000);
23 |         }
24 |     }
25 | 
26 |     @Override
27 |     public void cancel() {
28 |         running = false;
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/udf/TopNWindowFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window.udf;
 2 | 
 3 | import cn.northpark.flink.bean.EventCatagoryProductCount;
 4 | import org.apache.flink.api.java.tuple.Tuple3;
 5 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
 7 | import org.apache.flink.util.Collector;
 8 | 
 9 | public class TopNWindowFunction implements WindowFunction<Long, EventCatagoryProductCount, Tuple3<String, String, String>, TimeWindow> {
10 |     @Override
11 |     public void apply(Tuple3<String, String, String> value, TimeWindow window, Iterable<Long> input, Collector<EventCatagoryProductCount> out) throws Exception {
12 | 
13 |         String event = value.f0;
14 |         String catagory = value.f1;
15 |         String product = value.f2;
16 |         Long count = input.iterator().next();
17 |         long start = window.getStart();
18 |         long end = window.getEnd();
19 | 
20 |         out.collect(new EventCatagoryProductCount(event, catagory, product,count,start, end));
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/ObjectUtil.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import java.io.ByteArrayInputStream;
 4 | import java.io.ByteArrayOutputStream;
 5 | import java.io.ObjectInputStream;
 6 | import java.io.ObjectOutputStream;
 7 | 
 8 | public class ObjectUtil {
 9 | 	
10 | 	
11 | 	/**
12 | 	 * 对象转Byte数组
13 | 	 *
14 | 	 * @param obj
15 | 	 * @return
16 | 	 * @throws Exception
17 | 	 */
18 | 	public static byte[] objectToBytes(Object obj) throws Exception {
19 | 		ByteArrayOutputStream out = new ByteArrayOutputStream();
20 | 		ObjectOutputStream sOut = new ObjectOutputStream(out);
21 | 		sOut.writeObject(obj);
22 | 		sOut.flush();
23 | 		byte[] bytes = out.toByteArray();
24 | 
25 | 
26 | 		return bytes;
27 | 	} 
28 | 
29 | 	/**
30 | 	 * 字节数组转对象
31 | 	 *
32 | 	 * @param bytes
33 | 	 * @return
34 | 	 * @throws Exception
35 | 	 */
36 | 	public static Object bytesToObject(byte[] bytes) throws Exception {
37 | 
38 | 		//byte转object
39 | 		ByteArrayInputStream in = new ByteArrayInputStream(bytes);
40 | 		ObjectInputStream sIn = new ObjectInputStream(in);
41 | 		return sIn.readObject();
42 | 
43 | 	}
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/timeout/TimeOutResult.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.timeout;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | /**
 6 |  * @author zhangyang
 7 |  * @date 2020年12月15日 15:15:35
 8 |  */
 9 | public class TimeOutResult implements Serializable {
10 | 
11 |     public TimeOutResult() {
12 |     }
13 | 
14 |     public String queueName;
15 |     public String primaryKey;
16 |     public String resultMsg;
17 |     public String status;
18 | 
19 | 
20 |     public TimeOutResult(String queueName, String primaryKey, String resultMsg,String status) {
21 |         this.queueName = queueName;
22 |         this.primaryKey = primaryKey;
23 |         this.resultMsg = resultMsg;
24 |         this.status = status;
25 |     }
26 | 
27 |     @Override
28 |     public String toString() {
29 |         return "TimeOutResult{" +
30 |                 "queueName='" + queueName + '\'' +
31 |                 ", primaryKey='" + primaryKey + '\'' +
32 |                 ", resultMsg='" + resultMsg + '\'' +
33 |                 ", status='" + status + '\'' +
34 |                 '}';
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/starrocks/bean/Lib.java:
--------------------------------------------------------------------------------
 1 | 
 2 | package cn.northpark.flink.starrocks.bean;
 3 | 
 4 | import com.google.gson.annotations.Expose;
 5 | import com.google.gson.annotations.SerializedName;
 6 | 
 7 | import javax.annotation.Generated;
 8 | 
 9 | @Generated("net.hexar.json2pojo")
10 | @SuppressWarnings("unused")
11 | public class Lib {
12 | 
13 |     @Expose
14 |     private String $lib;
15 |     @SerializedName("$lib_method")
16 |     private String $libMethod;
17 |     @SerializedName("$lib_version")
18 |     private String $libVersion;
19 | 
20 |     public String get$lib() {
21 |         return $lib;
22 |     }
23 | 
24 |     public void set$lib(String $lib) {
25 |         this.$lib = $lib;
26 |     }
27 | 
28 |     public String get$libMethod() {
29 |         return $libMethod;
30 |     }
31 | 
32 |     public void set$libMethod(String $libMethod) {
33 |         this.$libMethod = $libMethod;
34 |     }
35 | 
36 |     public String get$libVersion() {
37 |         return $libVersion;
38 |     }
39 | 
40 |     public void set$libVersion(String $libVersion) {
41 |         this.$libVersion = $libVersion;
42 |     }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/scala/cn/northpark/spark/StreamWordCountScala.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.spark
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 5 | /**
 6 |  *
 7 |  * @author bruce
 8 |  * @date 2022年06月17日 09:18:38
 9 |  */
10 | object StreamWordCountScala {
11 | 
12 |   def main(args: Array[String]): Unit = {
13 |     //创建SparkConf配置对象
14 |     val conf = new SparkConf()
15 |       //注意：此处的local[2]表示启动2个进程，一个进程负责读取数据源的数据，一个进程负责处理数据
16 |       .setMaster("local[2]")
17 |       .setAppName("StreamWordCountScala")
18 | 
19 |     //创建StreamingContext，指定数据处理间隔为5秒
20 |     val ssc = new StreamingContext(conf, Seconds(5))
21 | 
22 |     //通过socket获取实时产生的数据
23 |     val linesRDD = ssc.socketTextStream("node1", 8888)
24 | 
25 |     //对接收到的数据使用空格进行切割，转换成单个单词
26 |     val wordsRDD = linesRDD.flatMap(_.split(" "))
27 | 
28 |     //把每个单词转换成tuple2的形式
29 |     val tupRDD = wordsRDD.map((_, 1))
30 | 
31 |     //执行reduceByKey操作
32 |     val wordcountRDD = tupRDD.reduceByKey(_ + _)
33 | 
34 |     //将结果数据打印到控制台
35 |     wordcountRDD.print()
36 | 
37 |     //启动任务
38 |     ssc.start()
39 |     //等待任务停止
40 |     ssc.awaitTermination()
41 |   }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/hadoop/MR/covid/Covid.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.hadoop.MR.covid;
 2 | 
 3 | import org.apache.hadoop.io.Writable;
 4 | 
 5 | import java.io.DataInput;
 6 | import java.io.DataOutput;
 7 | import java.io.IOException;
 8 | 
 9 | /**
10 |  * @author bruce
11 |  * @date 2023年03月20日 13:58:33
12 |  */
13 | public class Covid  implements Writable {
14 | 
15 |     public Covid() {
16 |         // 空构造函数
17 |     }
18 |     private int newCases;
19 |     private int newDeaths;
20 | 
21 |     public Covid(int newCases, int newDeaths) {
22 |         this.newCases = newCases;
23 |         this.newDeaths = newDeaths;
24 |     }
25 | 
26 |     public int getNewCases() {
27 |         return newCases;
28 |     }
29 | 
30 |     public int getNewDeaths() {
31 |         return newDeaths;
32 |     }
33 | 
34 |     @Override
35 |     public void write(DataOutput dataOutput) throws IOException {
36 |         dataOutput.writeInt(newCases);
37 |         dataOutput.writeInt(newDeaths );
38 |     }
39 | 
40 |     @Override
41 |     public void readFields(DataInput dataInput) throws IOException {
42 |         this.newCases  =dataInput.readInt();
43 |         this.newDeaths =dataInput.readInt();
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/HBaseSink.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.util
 4 | 
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 7 | import org.apache.hadoop.conf
 8 | import org.apache.hadoop.hbase.client.{ConnectionFactory, Put, Table}
 9 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName, client}
10 | /**
11 |   *  HBase sink ,批量数据插入到HBase中
12 |   */
13 | class HBaseSink extends RichSinkFunction[java.util.List[Put]] {
14 |   var configuration: conf.Configuration = _
15 |   var conn: client.Connection = _
16 |   //初始化 RichSinkFunction 对象时 执行一次
17 |   override def open(parameters: Configuration): Unit = {
18 |     configuration = HBaseConfiguration.create()
19 |     configuration.set("hbase.zookeeper.quorum","node3:2181,node4:2181,node5:2181")
20 |     conn = ConnectionFactory.createConnection(configuration)
21 |   }
22 | 
23 | 
24 |   override def invoke(value: util.List[Put], context: SinkFunction.Context): Unit = {
25 |     //连接HBase 表
26 |     val table: Table = conn.getTable(TableName.valueOf("a1"))
27 |     table.put(value)
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/KeyBy1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.Types;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | 
11 | /**
12 |  * KeyBy实例一
13 |  * @author bruce
14 |  */
15 | public class KeyBy1 {
16 | 
17 |     public static void main(String[] args) throws Exception {
18 | 
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | 
21 |         //直接输入单词
22 |         DataStreamSource<String> lines = env.socketTextStream("localhost",  4000);
23 | 
24 |         SingleOutputStreamOperator<Tuple2<String, Integer>> map = lines.map(i -> Tuple2.of(i, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));
25 | 
26 | 
27 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
28 | 
29 |         keyed.print();
30 | 
31 |         env.execute("KeyBy1");
32 | 
33 |     }
34 | 
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/join/StreamDataSourceC.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.join;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
 6 | 
 7 | 
 8 | public class StreamDataSourceC extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
 9 |     private volatile boolean running = true;
10 | 
11 |     @Override
12 |     public void run(SourceFunction.SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
13 | 
14 |         Tuple3[] elements = new Tuple3[]{
15 |                 Tuple3.of("a", "beijing", 1000000058000L),
16 |                 Tuple3.of("c", "beijing", 1000000055000L),
17 |                 Tuple3.of("d", "beijing", 1000000106000L),
18 |         };
19 | 
20 |         int count = 0;
21 |         while (running && count < elements.length) {
22 |             ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (long) elements[count].f2));
23 |             count++;
24 |             Thread.sleep(1000);
25 |         }
26 |     }
27 | 
28 |     @Override
29 |     public void cancel() {
30 |         running = false;
31 |     }
32 | }


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | log4j.rootLogger=INFO, console
20 | 
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/RestfulActivityLocationsApplication.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project;
 2 | 
 3 | import cn.northpark.flink.project.function.RestfulToActivityBeanFunciton;
 4 | import cn.northpark.flink.util.FlinkUtilsV1;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.streaming.api.datastream.DataStream;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | 
 9 | /**
10 |  *
11 |  * 需求2:查询高德地图API,关联地理位置信息
12 |  *
13 |  * 给定的数据:
14 |  * u001,A1,2019-09-02 10:10:11,1,115.908923,39.267291
15 |  * u002,A1,2019-09-02 10:11:11,1,123.818517,41.312458
16 |  * u003,A2,2019-09-02 10:13:11,1,121.26757,37.49794
17 |  *
18 |  * 希望的得到的数据
19 |  * u001,A1,2019-09-02 10:10:11,1,北京市
20 |  * u002,A1,2019-09-02 10:11:11,1,辽宁省
21 |  *
22 |  */
23 | public class RestfulActivityLocationsApplication {
24 | 
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         DataStream<String>  lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema());
28 | 
29 |         SingleOutputStreamOperator<ActivityBean> beans = lines.map(new RestfulToActivityBeanFunciton());
30 | 
31 |         beans.print();
32 | 
33 |         FlinkUtilsV1.getEnv().execute("HandleActivityLocationsApplication");
34 | 
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project3/MyParaFileSource.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project3;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple2;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | import java.io.RandomAccessFile;
 7 | 
 8 | public class MyParaFileSource extends RichParallelSourceFunction<Tuple2<String,String>> {
 9 |     private String path;
10 | 
11 |     private boolean flag = true;
12 | 
13 |     public MyParaFileSource(String path) {
14 |         this.path = path;
15 |     }
16 | 
17 |     public MyParaFileSource() {
18 |     }
19 | 
20 |     @Override
21 |     public void run(SourceContext ctx) throws Exception {
22 |         int index = getRuntimeContext().getIndexOfThisSubtask();
23 | 
24 |         RandomAccessFile randomAccessFile = new RandomAccessFile(path +"/" +index +".txt","r");
25 | 
26 |         while (flag){
27 |             String line = randomAccessFile.readLine();
28 |             if(line!=null){
29 |                 line = new String(line.getBytes("ISO-8859-1"),"utf-8");
30 |                 ctx.collect(Tuple2.of(index+"",line));
31 |             }else{
32 |                 Thread.sleep(2000);
33 |             }
34 |         }
35 |     }
36 | 
37 |     @Override
38 |     public void cancel() {
39 |         flag = true;
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/join/StreamDataSourceA.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.join;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | 
 7 | public class StreamDataSourceA extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
 8 |     private volatile boolean running = true;
 9 | 
10 |     @Override
11 |     public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
12 | 
13 |         Tuple3[] elements = new Tuple3[]{
14 |                 Tuple3.of("a", "1", 1000000050000L),
15 |                 Tuple3.of("a", "2", 1000000054000L),
16 |                 Tuple3.of("a", "3", 1000000079900L),
17 |                 Tuple3.of("a", "4", 1000000115000L),
18 |                 Tuple3.of("b", "5", 1000000100000L),
19 |                 Tuple3.of("b", "6", 1000000108000L)
20 |         };
21 | 
22 |         int count = 0;
23 |         while (running && count < elements.length) {
24 |             ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2));
25 |             count++;
26 |             Thread.sleep(1000);
27 |         }
28 |     }
29 | 
30 |     @Override
31 |     public void cancel() {
32 |         running = false;
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/syncIO/AsyncMysqlApplication.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project.syncIO;
 2 | 
 3 | import cn.northpark.flink.project.syncIO.function.AsyncMysqlToActivityBeanFunciton;
 4 | import cn.northpark.flink.util.FlinkUtilsV1;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | 
10 | import java.util.concurrent.TimeUnit;
11 | 
12 | /**
13 |  *
14 |  * Kafka-console-producer --broker-list localhost:9092 --topic flink000
15 |  * >A1
16 |  * ----------------
17 |  * 1> 新人礼包
18 |  *
19 |  * 通过连接池异步IO调用数据库关联查询的DEMO
20 |  */
21 | public class AsyncMysqlApplication {
22 |     public static void main(String[] args) throws Exception {
23 | 
24 |         DataStream<String> lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema());
25 | 
26 |         //调用异步IO的transform
27 |         SingleOutputStreamOperator<String> strs = AsyncDataStream.unorderedWait(lines, new AsyncMysqlToActivityBeanFunciton(), 0, TimeUnit.SECONDS);
28 | 
29 |         strs.print();
30 | 
31 |         FlinkUtilsV1.getEnv().execute("AsyncHandleActivityLocationsApplication");
32 | 
33 |     }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/KeyBy2Bean.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | 
10 | /**
11 |  * KeyBy实例2
12 |  * @author bruce
13 |  */
14 | public class KeyBy2Bean {
15 | 
16 |     public static void main(String[] args) throws Exception {
17 | 
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 | 
20 |         //直接输入单词
21 |         DataStreamSource<String> lines = env.socketTextStream("localhost",  4000);
22 | 
23 |         SingleOutputStreamOperator<WordCount> map = lines.map(new MapFunction<String, WordCount>() {
24 | 
25 |             @Override
26 |             public WordCount map(String value) throws Exception {
27 |                 return WordCount.of(value, 1);
28 |             }
29 |         });
30 | 
31 |         KeyedStream<WordCount, Tuple> word = map.keyBy("word");
32 | 
33 |         word.print();
34 | 
35 |         env.execute("KeyBy2Bean");
36 | 
37 |     }
38 | 
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/clickhouse/ReadSQL.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.clickhouse;
 2 | 
 3 | import cn.northpark.flink.util.KafkaString;
 4 | import org.apache.kafka.clients.producer.KafkaProducer;
 5 | import org.apache.kafka.clients.producer.ProducerRecord;
 6 | 
 7 | import java.io.BufferedReader;
 8 | import java.io.FileReader;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * @date 2024年04月18日 17:31:29
13 |  */
14 | public class ReadSQL {
15 |     public static void main(String[] args) throws Exception {
16 |         String sqlFilePath = "C:\\Users\\Bruce\\Desktop\\drg_pay.sql"; // SQL 文件路径
17 |         try (BufferedReader reader = new BufferedReader(new FileReader(sqlFilePath))) {
18 | 
19 |             String line;
20 |             StringBuilder batch = new StringBuilder();
21 | 
22 |             while ((line = reader.readLine()) != null) {
23 |                 try {
24 |                     batch.append(line.trim());
25 | 
26 |                     if (line.endsWith(";")) {
27 |                         // 发送到 Kafka
28 |                         KafkaString.sendKafkaString(KafkaString.buildBasicKafkaProperty(),"drg_pay",batch.toString());
29 |                         batch.setLength(0); // 清空批次
30 |                     }
31 |                 }catch (Exception e){
32 | 
33 |                 }
34 | 
35 |             }
36 | 
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/python/KmeansGroup2.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import findspark
 4 | findspark.init()
 5 | 
 6 | from numpy import array
 7 | from math import sqrt
 8 | from pyspark import SparkContext
 9 | from pyspark.mllib.clustering import KMeans, KMeansModel
10 | import pandas as pd
11 | 
12 | if __name__ == "__main__":
13 |     sc = SparkContext(appName="KMeansExample",master='local')  # SparkContext
14 | 
15 | 
16 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径
17 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称
18 | data=pd.read_csv(path,header=None,names=Cname)
19 | data.index.name='index'     #datadrame结构的行索引与列索引名字
20 | data.columns.name='columns'
21 | print(data)
22 | 
23 | X=data[data.columns[1:17]]  #提取特征值，不需要第一列的字母值
24 | 
25 | 
26 | # 读取并处理数据
27 | ScData = sc.parallelize(data)
28 | 
29 | parsedData = ScData.map(lambda line: array([float(x) for x in line.split(' ')]))
30 | 
31 | 
32 | # 训练数据
33 | print(parsedData.collect())
34 | 
35 | clusters = KMeans.train(parsedData, k=2, maxIterations=10,
36 |                         runs=10, initializationMode="random")
37 | 
38 | 
39 | 
40 | #聚类结果
41 | def sort(point):
42 |     return clusters.predict(point)
43 | clusters_result = parsedData.map(sort)
44 | # Save and load model
45 | # $example off$
46 | print('聚类结果：')
47 | print(clusters_result.collect())
48 | 
49 | sc.stop()


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/oracle/FlinkKafkaLinkOracleSource.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.oracle;
 2 | 
 3 | import cn.northpark.flink.exactly.transactionway.FlinkKafkaToMysql;
 4 | import cn.northpark.flink.util.FlinkUtils;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.api.java.tuple.Tuple3;
 7 | import org.apache.flink.api.java.utils.ParameterTool;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | 
11 | import java.io.InputStream;
12 | 
13 | /***
14 |  * flink 读取kafka数据结合Oracle查询 整合数据
15 |  * @author bruce
16 |  */
17 | public class FlinkKafkaLinkOracleSource {
18 | 
19 |     public static void main(String[] args) throws  Exception{
20 | 
21 |         InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties");
22 | 
23 |         ParameterTool parameters = ParameterTool.fromPropertiesFile(is);
24 | 
25 |         DataStream<String> kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class);
26 | 
27 | 
28 |         SingleOutputStreamOperator<Tuple3<String, String, String>> tupleData = kafkaStream.map(new OracleToTupleFunciton());
29 | 
30 |         tupleData.print();
31 | 
32 |         FlinkUtils.getEnv().execute("FlinkKafkaLinkOracleSource");
33 | 
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/python/KmeansGroup.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import findspark
 4 | findspark.init()
 5 | 
 6 | from numpy import array
 7 | from math import sqrt
 8 | from pyspark import SparkContext
 9 | from pyspark.mllib.clustering import KMeans, KMeansModel
10 | import pandas as pd
11 | 
12 | if __name__ == "__main__":
13 |     sc = SparkContext(appName="KMeansExample",master='local')  # SparkContext
14 | 
15 | 
16 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径
17 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称
18 | data=pd.read_csv(path,header=None,names=Cname)
19 | data.index.name='index'     #datadrame结构的行索引与列索引名字
20 | data.columns.name='columns'
21 | print(data)
22 | 
23 | X=data[data.columns[1:17]]  #提取特征值，不需要第一列的字母值
24 | 
25 | print(X)
26 | 
27 | # 读取并处理数据
28 | ScData = sc.parallelize(X)
29 | 
30 | parsedData = ScData.map(lambda line: array([float(x) for x in line.split(' ')]))
31 | 
32 | 
33 | # 训练数据
34 | print(parsedData.collect())
35 | 
36 | clusters = KMeans.train(parsedData, k=2, maxIterations=10,
37 |                         runs=10, initializationMode="random")
38 | 
39 | 
40 | 
41 | #聚类结果
42 | def sort(point):
43 |     return clusters.predict(point)
44 | clusters_result = parsedData.map(sort)
45 | # Save and load model
46 | # $example off$
47 | print('聚类结果：')
48 | print(clusters_result.collect())
49 | 
50 | sc.stop()


--------------------------------------------------------------------------------
/src/main/resources/druid.properties:
--------------------------------------------------------------------------------
 1 | # 数据库驱动|不填写HikariCp会自动识别
 2 | driverClassName=com.mysql.jdbc.Driver
 3 | # 访问数据库连接
 4 | url=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
 5 | # 数据库用户名
 6 | username=root
 7 | # 数据库密码
 8 | password=123456
 9 | 
10 | 
11 | 
12 | # 开启事务自动提交
13 | defaultAutoCommit=false
14 | 
15 | # 配置参数，让ConfigFilter解密密码
16 | #connectionProperties=config.decrypt=true;config.decrypt.key=xxxx
17 | 
18 | # 监控统计拦截的filters
19 | filters=stat
20 | 
21 | 
22 | # 初始化时建立物理连接的个数，初始化发生在显示调用init方法，或者第一次getConnection时
23 | initialSize=10
24 | # 最大连接池数量
25 | maxActive=50
26 | # 最小连接池数量
27 | minIdle:5
28 | # 获取连接等待超时的时间，单位毫秒
29 | maxWait=60000
30 | 
31 | # 配置间隔多久才进行一次检测，检测需要关闭的空闲连接，单位是毫秒
32 | # 有两个含义：1) Destroy线程会检测连接的间隔时间  2) testWhileIdle的判断依据，详细看testWhileIdle属性的说明
33 | timeBetweenEvictionRunsMillis=60000
34 | # 一个连接在池中最小生存的时间，单位是毫秒
35 | minEvictableIdleTimeMillis=300000
36 | 
37 | # 用来检测连接是否有效
38 | validationQuery=SELECT 1
39 | # 申请连接的时候检测，如果空闲时间大于timeBetweenEvictionRunsMillis，执行validationQuery检测连接是否有效
40 | testWhileIdle=true
41 | # 申请连接时执行validationQuery检测连接是否有效，做了这个配置会降低性能
42 | testOnBorrow=false
43 | # 归还连接时执行validationQuery检测连接是否有效，做了这个配置会降低性能
44 | testOnReturn=false
45 | 
46 | # 是否缓存preparedStatement，也就是PSCache
47 | poolPreparedStatements=true
48 | 
49 | maxPoolPreparedStatementPerConnectionSize=200
50 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project2/NP_ParallelismFileSource.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project2;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple2;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | import java.io.RandomAccessFile;
 7 | 
 8 | public class NP_ParallelismFileSource extends RichParallelSourceFunction<Tuple2<String, String>> {
 9 | 
10 |     private String path;
11 | 
12 |     private boolean flag = true;
13 | 
14 |     public NP_ParallelismFileSource(String path) {
15 |         this.path = path;
16 |     }
17 | 
18 |     public NP_ParallelismFileSource() {
19 |     }
20 | 
21 |     @Override
22 |     public void run(SourceContext ctx) throws Exception {
23 |         int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
24 | 
25 |         RandomAccessFile file = new RandomAccessFile(path + "/" + subtaskIndex + ".txt", "r");
26 | 
27 | 
28 |         while (flag) {
29 |             String readLine = file.readLine();
30 |             if (readLine != null) {
31 |                 readLine = new String(readLine.getBytes("ISO-8859-1"),"UTF-8" );
32 |                 ctx.collect(Tuple2.of(subtaskIndex + "", readLine));
33 |             }else {
34 |                 Thread.sleep(1000);
35 |             }
36 |         }
37 |     }
38 | 
39 |     @Override
40 |     public void cancel() {
41 |         flag = false;
42 | 
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/bean/Access.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.bean;
 2 | 
 3 | public class Access {
 4 | 
 5 |     public String device;
 6 |     public String deviceType;
 7 |     public String os;
 8 |     public String event;
 9 |     public String net;
10 |     public String channel;
11 |     public String uid;
12 |     public int nu;  // 1新
13 |     public int nu2;
14 |     public String ip;  // ==> ip去解析
15 |     public long time;
16 |     public String version;
17 |     public String province;
18 |     public String city;
19 | 
20 |     public Product product;
21 | 
22 |     @Override
23 |     public String toString() {
24 |         return "Access{" +
25 |                 "device='" + device + '\'' +
26 |                 ", deviceType='" + deviceType + '\'' +
27 |                 ", os='" + os + '\'' +
28 |                 ", event='" + event + '\'' +
29 |                 ", net='" + net + '\'' +
30 |                 ", channel='" + channel + '\'' +
31 |                 ", uid='" + uid + '\'' +
32 |                 ", nu=" + nu +
33 |                 ", nu2=" + nu2 +
34 |                 ", ip='" + ip + '\'' +
35 |                 ", time=" + time +
36 |                 ", version='" + version + '\'' +
37 |                 ", province='" + province + '\'' +
38 |                 ", city='" + city + '\'' +
39 |                 ", product=" + product +
40 |                 '}';
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/BatchWC.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.operators.DataSource;
 7 | import org.apache.flink.api.java.tuple.Tuple2;
 8 | import org.apache.flink.util.Collector;
 9 | 
10 | /**
11 |  * @author zhangyang
12 |  *	使用Java API来开发Flink的批处理应用程序.
13 |  */
14 | public class BatchWC {
15 | 
16 | 	public static void main(String[] args) throws Exception {
17 | 
18 | 		//1.环境
19 | 		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
20 | 		
21 | 		//2.read
22 | 		DataSource<String> readTextFile = env.readTextFile("D:\\mac.txt");
23 | 		
24 | 		readTextFile.print();
25 | 		
26 | 		//3.transform
27 | 		readTextFile.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
28 | 
29 | 			@Override
30 | 			public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception {
31 | 				// TODO Auto-generated method stub
32 | 				String[] tokens = value.toLowerCase().split("，");
33 | 				for (String token : tokens) {
34 | 					if(token.length()>0) {
35 | 						collector.collect(new Tuple2<String, Integer>(token,1));//每个单词数量设置为1，后面再统计/累加...
36 | 					}
37 | 				}
38 | 			}
39 | 		}).groupBy(0).sum(1).print();
40 | 
41 | 		//4.execute
42 | //		env.execute();
43 | 		
44 | 	}
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/resources/kafka.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #configs here
 3 | ################################################################################
 4 | 
 5 | #Kafka configs
 6 | 
 7 | #for test common other functions
 8 | topics=flink000
 9 | rdf_topics=flink_rdf
10 | 
11 | #for test window count and convert
12 | #topics=event008
13 | 
14 | #for test sink statistics
15 | #topics=flink777
16 | group.id=bruce
17 | bootstrap.servers=PLAINTEXT://node1:9092,PLAINTEXT://node2:9092,PLAINTEXT://node3:9092
18 | #zookeeper.connect=node1:2181,node2:2181,node3:2181
19 | auto.offset.reset=latest
20 | #auto.offset.reset=earliest
21 | enable.auto.commit=true
22 | key.serializer=org.apache.kafka.common.serialization.StringSerializer
23 | value.serializer=org.apache.kafka.common.serialization.StringSerializer
24 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
25 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
26 | 
27 | 
28 | 
29 | 
30 | #redis
31 | redis.host=node1
32 | redis.pwd=123456
33 | redis.db=0
34 | 
35 | 
36 | #jdbc config
37 | # 数据库驱动|不填写HikariCp会自动识别
38 | driverClassName=com.mysql.jdbc.Driver
39 | # 访问数据库连接
40 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
41 | # 数据库用户名
42 | username=root
43 | # 数据库密码
44 | password=123456
45 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/exactly/overrideway/MyRedisSink.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.exactly.overrideway;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.api.java.utils.ParameterTool;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 7 | import redis.clients.jedis.Jedis;
 8 | 
 9 | public class MyRedisSink extends RichSinkFunction<Tuple3<String, String, String>> {
10 |     private transient Jedis jedis;
11 | 
12 |     @Override
13 |     public void open(Configuration parameters) throws Exception {
14 |         super.open(parameters);
15 | 
16 |         ParameterTool params = (ParameterTool) getRuntimeContext().getExecutionConfig().getGlobalJobParameters();
17 |         String host = params.getRequired("redis.host");
18 |         String password = params.get("redis.pwd", null);
19 |         int db = params.getInt( "redis.db", 0);
20 |         jedis = new Jedis(host, 6379, 5000);
21 | //        jedis.auth(password);
22 |         jedis.select(db);
23 |     }
24 | 
25 |     @Override
26 |     public void close() throws Exception {
27 |         super.close();
28 |         jedis.close();
29 |     }
30 | 
31 |     @Override
32 |     public void invoke(Tuple3<String, String, String> value, Context context) throws Exception {
33 |         if(!jedis.isConnected()){
34 |             jedis.connect();
35 |         }
36 |         jedis.hset(value.f0,value.f1,value.f2);
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/CountWindowAll.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
 9 | 
10 | /**
11 |  * 不分组划分窗口，将整体当成一个组
12 |  * @author bruce
13 |  */
14 | public class CountWindowAll {
15 |     public static void main(String[] args) throws Exception {
16 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 | 
18 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
19 | 
20 |         SingleOutputStreamOperator<Integer> map = source.map(new MapFunction<String, Integer>() {
21 |             @Override
22 |             public Integer map(String value) throws Exception {
23 |                 return Integer.parseInt(value);
24 |             }
25 |         });
26 |         //不分组，将整体当成一个组
27 |         AllWindowedStream<Integer, GlobalWindow> windowAll = map.countWindowAll(5);
28 | 
29 | 
30 |         SingleOutputStreamOperator<Integer> summed = windowAll.sum(0);
31 | 
32 |         summed.print();
33 | 
34 |         env.execute("CountWindowAll");
35 | 
36 | 
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/KafkaSink.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.util.Properties
 4 | 
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 7 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 8 | import org.apache.kafka.common.serialization.StringSerializer
 9 | 
10 | class KafkaSink[T](cls:String)(topic: String) extends RichSinkFunction[T] {
11 |   var props :Properties = _
12 |   //创建Kafka Producer
13 |   var producer: KafkaProducer[String, String] = _
14 |   //当初始化 RichSinnkFunction时，只会调用一次
15 |   override def open(parameters: Configuration): Unit = {
16 |     //创建kafka配置
17 |     props = new Properties()
18 |     props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092")
19 |     props.setProperty("key.serializer",classOf[StringSerializer].getName)
20 |     props.setProperty("value.serializer",classOf[StringSerializer].getName)
21 | 
22 |     producer = new KafkaProducer[String,String](props)
23 |   }
24 | 
25 |   //来一条数据，处理一次
26 |   override def invoke(value: T, context: SinkFunction.Context): Unit = {
27 |     //统计最通畅的top5通道
28 |     val info: Top5MonitorBean = value.asInstanceOf[Top5MonitorBean]
29 | 
30 |     //向kafka中写入
31 | 
32 |     producer.send( new ProducerRecord[String,String](topic,info.toString))
33 | 
34 |   }
35 | 
36 |   override def close(): Unit = {
37 |     producer.close()
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/suicideApp/HdfsTest.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.suicideApp;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.LocatedFileStatus;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.fs.RemoteIterator;
 8 | 
 9 | import java.io.IOException;
10 | 
11 | /**
12 |  * ！！！hadoop hdfs连接不上 直接把9000端口去掉就可以了！！！！
13 |  * @author bruce
14 |  * @date 2022年04月21日 09:51:08
15 |  */
16 | public class HdfsTest {
17 | 
18 |     public static void main(String[] args) throws IOException {
19 |         Configuration conf = new Configuration();
20 |         //这里设置namenode
21 |         conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
22 |         conf.set("dfs.nameservices", "node1");
23 |         conf.set("fs.defaultFS", "hdfs://node1");
24 |         FileSystem fileSystem1 = FileSystem.get(conf);
25 |         System.out.println("===contains1===");
26 |         //测试访问情况
27 |         Path path=new Path("/scd");
28 |         System.out.println("===contains2===");
29 |         if(fileSystem1.exists(path)){
30 |             System.out.println("===contains3===");
31 |         }
32 |         System.out.println("===contains4===");
33 |         RemoteIterator<LocatedFileStatus> list=fileSystem1.listFiles(path,true);
34 |         while (list.hasNext()){
35 |             LocatedFileStatus fileStatus=list.next();
36 |             System.out.println(fileStatus.getPath());
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/UDTFSQL.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.stream.sql;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.Table;
 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 7 | import org.apache.flink.types.Row;
 8 | 
 9 | /**
10 |  * 自定义UDTF 一行输入  多行输出
11 |  */
12 | public class UDTFSQL {
13 |     public static void main(String[] args) throws Exception {
14 | 
15 |         //实时dataStream api
16 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 | 
18 |         //实时Table执行上下文
19 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
20 | 
21 | 
22 |         //42.57.88.186
23 |         //a.b.c.d
24 |         //聪.明.的.六.猴.儿
25 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
26 | 
27 |         tableEnv.registerDataStream("t_lines",lines,"line");
28 | 
29 |         //注册自定义函数是一个UDTF,输入一行字符串，返回多列
30 |         tableEnv.registerFunction("split",new Split("\\."));
31 | 
32 |         //lateral:表生成函数
33 |         Table table = tableEnv.sqlQuery("select word from t_lines,lateral table(split(line)) as T(word) ");
34 | 
35 |         tableEnv.toAppendStream(table, Row.class).print();
36 | //        tableEnv.toRetractStream(table,Row.class).print();
37 |         env.execute("UDTFSQL");
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/DelMany.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase;
 2 | 
 3 | import cn.northpark.flink.util.PhoenixUtilV2;
 4 | 
 5 | import java.util.List;
 6 | import java.util.Map;
 7 | import java.util.Objects;
 8 | import java.util.stream.Collectors;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * @date 2022年06月26日 12:12:06
13 |  */
14 | public class DelMany {
15 |     public static void main(String[] args) {
16 |         //4.删除一批关系
17 |         //把 A的所有【被转发/被评论关系】删除
18 |         String userid = "75e1227c896a4eaa9fe782556af0fc76";
19 |         delMany( userid);
20 | 
21 |     }
22 | 
23 |     private static void delMany(String userid) {
24 |         //查询A的sub-list
25 |         List<Map<String, String>> mapList = PhoenixUtilV2.queryList("select * from \"stt\".t_weibo_relations_v2 where rel_user_id = ? and by_type = 0",userid);
26 |         List<String> sel_list = mapList.stream().filter(t -> Objects.nonNull(t.get("USER_ID"))).map(t -> t.get("USER_ID").toString()).collect(Collectors.toList());
27 | 
28 |         //删除A的数据
29 |         String del_A = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and by_type = 1";
30 |         PhoenixUtilV2.delData(del_A,userid);
31 | 
32 |         //删除sel_list的数据
33 |         String del_sel_list = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and rel_user_id = ? and by_type = 0";
34 |         for (String sel_id : sel_list) {
35 |             PhoenixUtilV2.delData(del_sel_list,sel_id,userid);
36 |         }
37 | 
38 | 
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/DelOne.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase;
 2 | 
 3 | import cn.northpark.flink.util.PhoenixUtilV2;
 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations;
 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType;
 6 | 
 7 | /**
 8 |  * @author bruce
 9 |  * @date 2022年06月26日 12:10:52
10 |  */
11 | public class DelOne {
12 |     public static void main(String[] args) {
13 |         //3.删除一条关系
14 |         //构造A的b
15 |         //8538d7f7b0724b518b33129672a38915	reply	4ef14c4e222d4a9c8110d1a435a24ea3	1
16 |         WeiboRelations bean_by = new WeiboRelations("8538d7f7b0724b518b33129672a38915", RelType.REPLY,"4ef14c4e222d4a9c8110d1a435a24ea3",1);
17 |         delOne(bean_by);
18 |     }
19 | 
20 |     /**
21 |      * 删除一条转发/评论关系
22 |      */
23 |     private static void delOne(WeiboRelations bean_by) {
24 | 
25 |         String del_one_sql = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and rel_type = ?  and rel_user_id = ? and by_type = ?";
26 |         //删除A的b
27 |         PhoenixUtilV2.delData(del_one_sql,
28 |                 bean_by.getUser_id(),
29 |                 bean_by.getRel_type(),
30 |                 bean_by.getRel_user_id(),
31 |                 bean_by.getBy_type());
32 |         //删除b的A
33 |         PhoenixUtilV2.delData(del_one_sql,
34 |                 bean_by.getRel_user_id(),
35 |                 bean_by.getRel_type(),
36 |                 bean_by.getUser_id(),
37 |                 bean_by.getBy_type()==1?0:1);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/KafkaSource.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 5 | import org.apache.flink.streaming.api.datastream.DataStream;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
 8 | 
 9 | import java.util.Properties;
10 | 
11 | /**
12 |  * @author zhangyang
13 |  *	使用Kafka作为数据源读取数据 exactly once
14 |  */
15 | public class KafkaSource {
16 | 
17 |     public static void main(String[] args) throws Exception {
18 | 
19 |         //1.环境
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 | 
22 |         Properties props = new Properties();
23 | 
24 |         //指定Ka fka的Broker地址
25 |         props.setProperty( "bootstrap.servers", "localhost:9092");
26 |         //指定组ID
27 |         props.setProperty("group.id", "bruce");
28 |         //如果没有记录偏移量，第一次从最开始消费
29 |         props.setProperty("auto.offset.reset", "earliest") ;
30 |         //kafka的消费者不自动提交偏移量
31 |         //props。setProperty("enable. auto. commit", "false");
32 | 
33 |         //2.read
34 |         FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props);
35 | 
36 | 
37 |         DataStream<String> lines = env.addSource(kafkaSource);
38 | 
39 |         //3.sink/transform
40 |         lines.print();
41 | 
42 |         //4.execute
43 |         env.execute();
44 |     }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/TrafficEntity.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | //车辆监控实体
 4 | case class MonitorCarBean(areaId: String, roadId: String, monitorId: String, cameraId: String, actionTime: Long, car: String, speed: Double)
 5 | 
 6 | 
 7 | //最通畅的top5 通道实体
 8 | case class Top5MonitorBean(windowStartTime: String, windowEndTime: String, monitorId: String, hightSpeedCarCount: Long, middleSpeedCount: Long, normalSpeedCarCount: Long, lowSpeedCarCount: Long)
 9 | 
10 | //通道通过车辆数的统计实体
11 | case class MonitorSpeedClsNumsBean(xhightSpeedCarCount: Long, xmiddleSpeedCount: Long, xnormalSpeedCarCount: Long, xlowSpeedCarCount: Long) extends Ordered[MonitorSpeedClsNumsBean] {
12 |   var hightSpeedCarCount = xhightSpeedCarCount
13 |   var middleSpeedCount = xmiddleSpeedCount
14 |   var normalSpeedCarCount = xnormalSpeedCarCount
15 |   var lowSpeedCarCount = xlowSpeedCarCount
16 | 
17 |   override def compare(that: MonitorSpeedClsNumsBean): Int = {
18 |     if (this.hightSpeedCarCount != that.hightSpeedCarCount) {
19 |       (this.hightSpeedCarCount - that.hightSpeedCarCount).toInt
20 |     } else if (this.middleSpeedCount != that.middleSpeedCount) {
21 |       (this.middleSpeedCount - that.middleSpeedCount).toInt
22 |     } else if (this.normalSpeedCarCount != that.normalSpeedCarCount) {
23 |       (this.normalSpeedCarCount - that.normalSpeedCarCount).toInt
24 |     } else {
25 |       (this.lowSpeedCarCount - that.lowSpeedCarCount).toInt
26 |     }
27 |   }
28 | }
29 | 
30 | 
31 | object TrafficEntity {
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/UDFSQL.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.stream.sql;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.Table;
 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 7 | import org.apache.flink.types.Row;
 8 | 
 9 | /**
10 |  * 自定义UDF 一行输入  一行输出
11 |  */
12 | public class UDFSQL {
13 |     public static void main(String[] args) throws Exception {
14 | 
15 |         //实时dataStream api
16 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 | 
18 |         //注册一个可以cache的文件，通过网络发送给taskManager
19 |         env.registerCachedFile("/Users/bruce/Desktop/ip.txt","ip-rules");
20 | 
21 |         //实时Table执行上下文
22 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
23 | 
24 | 
25 |         //42.57.88.186
26 |         //106.121.4.223
27 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
28 | 
29 |         tableEnv.registerDataStream("t_lines",lines,"ip");
30 | 
31 |         //注册自定义函数是一个UDF,输入一个IP地址，返回ROW<省、市>
32 |         tableEnv.registerFunction("ipLocation",new IpLocation());
33 | 
34 |         Table table = tableEnv.sqlQuery("select ip,ipLocation(ip) from t_lines ");
35 | 
36 |         tableEnv.toAppendStream(table, Row.class).print();
37 | //        tableEnv.toRetractStream(table,Row.class).print();
38 |         env.execute("UDFSQL");
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/SlidingWindowAll.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.windowing.time.Time;
 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
10 | 
11 | /**
12 |  * 滑动窗口
13 |  * @author bruce
14 |  */
15 | public class SlidingWindowAll {
16 |     public static void main(String[] args) throws Exception {
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 | 
19 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
20 | 
21 |         SingleOutputStreamOperator<Integer> map = source.map(new MapFunction<String, Integer>() {
22 |             @Override
23 |             public Integer map(String value) throws Exception {
24 |                 return Integer.parseInt(value);
25 |             }
26 |         });
27 | 
28 |         //不分组，将整体当成一个组
29 |         AllWindowedStream<Integer, TimeWindow> windowAll = map.timeWindowAll(Time.seconds(10),Time.seconds(5));
30 | 
31 | 
32 |         SingleOutputStreamOperator<Integer> summed = windowAll.sum(0);
33 | 
34 |         summed.print();
35 | 
36 |         env.execute("SlidingWindowAll");
37 | 
38 | 
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/TumblingWindowAll.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.windowing.time.Time;
 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
10 | 
11 | /**
12 |  * 滚动窗口，不分组划分窗口，将整体当成一个组,5秒一个窗口
13 |  * @author bruce
14 |  */
15 | public class TumblingWindowAll {
16 |     public static void main(String[] args) throws Exception {
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 | 
19 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
20 | 
21 |         SingleOutputStreamOperator<Integer> map = source.map(new MapFunction<String, Integer>() {
22 |             @Override
23 |             public Integer map(String value) throws Exception {
24 |                 return Integer.parseInt(value);
25 |             }
26 |         });
27 |         //不分组，将整体当成一个组
28 |         AllWindowedStream<Integer, TimeWindow> windowAll = map.timeWindowAll(Time.seconds(5));
29 | 
30 | 
31 |         SingleOutputStreamOperator<Integer> summed = windowAll.sum(0);
32 | 
33 |         summed.print();
34 | 
35 |         env.execute("TumblingWindowAll");
36 | 
37 | 
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/TT.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import cn.hutool.core.codec.Base64;
 4 | import cn.hutool.http.HttpUtil;
 5 | import cn.northpark.flink.util.JsonUtil;
 6 | import com.alibaba.fastjson.JSON;
 7 | import com.alibaba.fastjson.JSONArray;
 8 | 
 9 | import java.util.Map;
10 | 
11 | /**
12 |  * @author bruce
13 |  * @date 2023年03月21日 16:51:37
14 |  */
15 | public class TT {
16 |     public static void main(String[] args) {
17 |         String decode = Base64.decodeStr("NzRhODVkNjFmYTkyYWZlMzViYzY2YmUyZjk1ZjBjMTY=");
18 | 
19 | //        String baseUrl = "https://restapi.amap.com/v3/geocode/geo?address="+"西山温泉"+"&key="+decode;
20 | //        String res = HttpUtil.get(baseUrl);
21 | //        Map<String, Object> json2map = JsonUtil.json2map(res);
22 | //        System.err.println(json2map);
23 | //        if(json2map.get("info").toString().equals("OK")){
24 | //            Object geocodes = json2map.get("geocodes");
25 | //            JSONArray jsonArray = JSON.parseArray(geocodes.toString());
26 | //            Map<String, Object> geocodesMap = JsonUtil.json2map(jsonArray.get(0).toString());
27 | //            String location = geocodesMap.get("location").toString();
28 | //            String longitude = location.split(",")[0];
29 | //            String latitude = location.split(",")[1];
30 | //            System.err.println(longitude);
31 | //            System.err.println(latitude);
32 | //
33 | //        }
34 | 
35 |         System.err.println(Double.valueOf("114.123456")==0d);
36 |         System.err.println(Double.valueOf("0.000000")==0d);
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/syncIO/AsyncRestfulApplication.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project.syncIO;
 2 | 
 3 | import cn.northpark.flink.project.ActivityBean;
 4 | import cn.northpark.flink.project.syncIO.function.AsyncRestfulToActivityBeanFunciton;
 5 | import cn.northpark.flink.util.FlinkUtilsV1;
 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 7 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | 
11 | import java.util.concurrent.TimeUnit;
12 | 
13 | /**
14 |  *
15 |  * Kafka-console-producer --broker-list localhost:9092 --topic flink000
16 |  * >u002,A1,2019-09-02 10:11:11,1,123.818517,41.312458
17 |  * ----------------
18 |  * 1> ActivityBean{uid='u002', aid='A1', activityName='null', time='2019-09-02 10:11:11', eventType=1, province='辽宁省', longitude=null, latitude=null}
19 |  *
20 |  * 通过Http  异步IO  调用restful请求高德接口获取关联的地区的DEMO
21 |  */
22 | public class AsyncRestfulApplication {
23 |     public static void main(String[] args) throws Exception {
24 | 
25 |         DataStream<String> lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema());
26 | 
27 |         //调用异步IO的transform
28 |         SingleOutputStreamOperator<ActivityBean> beans = AsyncDataStream.unorderedWait(lines, new AsyncRestfulToActivityBeanFunciton(), 0, TimeUnit.SECONDS, 10);
29 | 
30 |         beans.print();
31 | 
32 |         FlinkUtilsV1.getEnv().execute("AsyncHandleActivityLocationsApplication");
33 | 
34 |     }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/JDBCSink.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.sql.{Connection, DriverManager, PreparedStatement}
 4 | 
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 7 | 
 8 | class JDBCSink[T](cls:String) extends RichSinkFunction[T] {
 9 |   var conn: Connection = _
10 |   var pst: PreparedStatement = _
11 |   var stop = false
12 |   //当初始化 RichSinnkFunction时，只会调用一次
13 |   override def open(parameters: Configuration): Unit = {
14 |     conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink", "root", "123456")
15 |   }
16 | 
17 |   //来一条数据，处理一次
18 |   override def invoke(value: T, context: SinkFunction.Context): Unit = {
19 |     if("Top5MonitorInfo".equals(cls)){
20 |       //统计最通畅的top5通道
21 |       val info: Top5MonitorBean = value.asInstanceOf[Top5MonitorBean]
22 |       pst = conn.prepareStatement("insert into t_top5_monitor_info (start_time,end_time,monitor_id,hight_speed_carcount,middle_speed_carcount,normal_speed_carcount,low_speed_carcount) values(?,?,?,?,?,?,?)")
23 |       pst.setString(1,info.windowStartTime)
24 |       pst.setString(2,info.windowEndTime)
25 |       pst.setString(3,info.monitorId)
26 |       pst.setDouble(4,info.hightSpeedCarCount)
27 |       pst.setDouble(5,info.middleSpeedCount)
28 |       pst.setDouble(6,info.normalSpeedCarCount)
29 |       pst.setDouble(7,info.lowSpeedCarCount)
30 |       pst.executeUpdate()
31 |     }
32 | 
33 |   }
34 | 
35 |   override def close(): Unit = {
36 |     pst.close()
37 |     conn.close()
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project3/OperatorStateTest.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project3;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 6 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | 
 9 | public class OperatorStateTest {
10 |     public static void main(String[] args) throws Exception {
11 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
12 | 
13 |         env.enableCheckpointing(5000);
14 | 
15 |         env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/backEnd"));
16 | 
17 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
18 | 
19 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000));
20 | 
21 |         env.addSource(new MyExactlyOnceParaFileSource("/Users/bruce/Desktop/data")).print();
22 | 
23 |         //THROW exception
24 |         env.socketTextStream("localhost",4000).map(new MapFunction<String, Object>() {
25 |             @Override
26 |             public Object map(String value) throws Exception {
27 |                 if(value.startsWith("jeyy")){
28 |                     System.out.println(1/0);
29 |                 }
30 |                 return value;
31 |             }
32 |         });
33 | 
34 |         env.execute("OperatorStateTest");
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/KeyBy3.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple3;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | 
 9 | /**
10 |  * KeyBy实例3 keyBy多个字段进行分组
11 |  * @author bruce
12 |  */
13 | public class KeyBy3 {
14 | 
15 |     public static void main(String[] args) throws Exception {
16 | 
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 | 
19 |         //直接输入单词
20 |         DataStreamSource<String> lines = env.socketTextStream("localhost",  4000);
21 | 
22 |         //辽宁,沈阳,1000
23 |         //山东,青岛,2000
24 |         //山东，青岛,2000
25 |         //山东,烟台,1000
26 |         SingleOutputStreamOperator<Tuple3<String, String, Double>> proCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Double>>() {
27 | 
28 |             @Override
29 |             public Tuple3<String, String, Double> map(String value) throws Exception {
30 |                 String[] words = value.split(",");
31 |                 String province = words[0];
32 |                 String city = words[1];
33 |                 Double money = Double.parseDouble(words[2]);
34 |                 return Tuple3.of(province, city, money);
35 |             }
36 |         });
37 | 
38 |         proCityAndMoney.keyBy(0,1).sum(2).print();
39 | 
40 | 
41 |         env.execute("KeyBy3");
42 | 
43 |     }
44 | 
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/RabbitMQUtils.java:
--------------------------------------------------------------------------------
 1 |                     
 2 | package cn.northpark.flink.util;
 3 | 
 4 | import cn.northpark.flink.MerchantDayStaApp.MerchantDaySta;
 5 | import com.alibaba.fastjson.JSON;
 6 | import com.rabbitmq.client.*;
 7 | import lombok.extern.slf4j.Slf4j;
 8 | 
 9 | /**
10 |  * 操作rabbitmq队列工具类,支持集群自动重连
11 |  * 
12 |  * @author bruce
13 |  *
14 |  */
15 | @Slf4j
16 | public class RabbitMQUtils {
17 | 	
18 | 	final static String CHARSET_UTF8 = "UTF-8";
19 | 	static Channel channel = null;
20 | 
21 | 	/**
22 | 	 * 发送消息到rabbitmq
23 | 	 * 
24 | 	 * @param queueName
25 | 	 *            队列名
26 | 	 * @param Message
27 | 	 *            消息
28 | 	 */
29 | 	public static void Send(String queueName, String Message) {
30 | 		try {
31 | 			if(channel==null)
32 | 			{
33 | 				Connection connection = RabbitMQConFactory.getConnection("common");
34 | 				channel = connection.createChannel();
35 | 			}
36 | 			channel.queueDeclare(queueName, true, false, false, null);
37 | 			channel.basicPublish("", queueName, null, Message.getBytes(CHARSET_UTF8));
38 | 			log.info(" Sent '" + Message + "' SUCCESS!");
39 | 		} catch (Exception e) {
40 | 			e.printStackTrace();
41 | 		}
42 | 	}
43 | 	public static void main(String[] args) throws InterruptedException {
44 | 
45 | 		MerchantDaySta bean = new MerchantDaySta("1001",20.55);
46 | //		MerchantDaySta bean2 = new MerchantDaySta("1002",80.55);
47 | //		MerchantDaySta bean3 = new MerchantDaySta("1002",70.55);
48 | 
49 | 		RabbitMQUtils.Send("flink_amount_queue", JSON.toJSONString(bean));
50 | //		RabbitMqUtils.Send("flink_amount_queue", JSON.toJSONString(bean2));
51 | //		RabbitMqUtils.Send("flink_amount_queue", JSON.toJSONString(bean3));
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/RestartStrategy2.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | 
10 | public class RestartStrategy2 {
11 |     public static void main(String[] args) throws  Exception{
12 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 | 
14 |         //只有开启了checkpoint 才会有重启策略
15 |         env.enableCheckpointing(8000);
16 | 
17 |         //设置重启策略为重启2次，间隔2秒
18 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2,2));
19 | 
20 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
21 | 
22 |         SingleOutputStreamOperator<Tuple2<String, Integer>>  wordOne = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
23 | 
24 |             @Override
25 |             public Tuple2<String, Integer> map(String value) throws Exception {
26 |                 if (value.startsWith("jeyy")) {
27 |                     throw new RuntimeException("jeyy来了，程序出错了！！！");
28 |                 }
29 |                 return Tuple2.of(value, 1);
30 |             }
31 |         });
32 | 
33 | 
34 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = wordOne.keyBy(0).sum(1);
35 | 
36 |         summed.print();
37 | 
38 |         env.execute("RestartStrategy2");
39 | 
40 | 
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/AddSink1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * 通过自定义sink来讲解subTask的编号问题
13 |  *
14 |  */
15 | public class AddSink1 {
16 |     public static void main(String[] args) throws Exception {
17 | 
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 | 
20 |         DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 4000);
21 | 
22 | 
23 |         SingleOutputStreamOperator<Tuple2<String, Long>> maped = socketTextStream.map(new MapFunction<String, Tuple2<String, Long>>() {
24 |             @Override
25 |             public Tuple2<String, Long> map(String value) throws Exception {
26 |                 return Tuple2.of(value, 1L);
27 |             }
28 |         });
29 | 
30 | //        maped.print("the result is ").setParallelism(2);
31 | 
32 |         maped.addSink(new RichSinkFunction<Tuple2<String, Long>>() {
33 | 
34 |             @Override
35 |             public void invoke(Tuple2<String, Long> value, Context context) throws Exception {
36 | 
37 |                 System.out.println(getRuntimeContext().getIndexOfThisSubtask() +1 + "> "+value);
38 | 
39 |             }
40 |         });
41 | 
42 | 
43 |         env.execute("AddSink1");
44 | 
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCountSocket.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | import org.apache.flink.streaming.api.windowing.time.Time;
 8 | import org.apache.flink.util.Collector;
 9 | 
10 | /**
11 |  * @author zhangyang
12 |  * 使用Java API来开发Flink的实时处理应用程序.
13 |  *
14 |  * wc统计的数据我们源自于socket
15 |  */
16 | public class StreamingWordCountSocket {
17 | 
18 | 	public static void main(String[] args) throws Exception {
19 | 		 // step1 ：获取执行环境
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 |         
22 |         // step2：读取数据
23 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
24 | 
25 | 
26 |         // step3: transform
27 |         text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
28 |             @Override
29 |             public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception {
30 |                 String[] tokens = value.toLowerCase().split(",");
31 |                 for(String token : tokens) {
32 |                     if(token.length() > 0) {
33 |                         collector.collect(new Tuple2<String,Integer>(token,1));
34 |                     }
35 |                 }
36 |             }
37 |         }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print();//.setParallelism(1);
38 | 
39 |         //step 4:execute
40 |         env.execute("StreamingWordCountSocket");
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/hadoop/MR/covid/CovidApp.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.hadoop.MR.covid;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.io.Text;
 6 | import org.apache.hadoop.mapreduce.Job;
 7 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 8 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 9 | 
10 | /**
11 |  *
12 |  *
13 |  *  //hadoop提交作业
14 |  *  1.hadoop jar np_hadoop-1.0-SNAPSHOT-jar-with-dependencies.jar hadoop.CovidApp
15 |  *
16 |  *  //列出输出文件列表
17 |  *  2.hdfs dfs -ls /BigDataProject/A
18 |  *
19 |  * //查看输出结果
20 |  *  3.hadoop fs -cat /BigDataProject/A/part-r-00000
21 |  * @author bruce
22 |  * @date 2023年03月20日 14:00:35
23 |  */
24 | public class CovidApp {
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         Configuration conf = new Configuration();
28 |         //本地执行
29 |         conf.set("fs.defaultFS", "file:///");
30 |         conf.set("mapreduce.framework.name", "local");
31 | 
32 |         Job job = Job.getInstance(conf, "Covid");
33 |         job.setJarByClass(CovidApp.class);
34 |         job.setMapperClass(CovidMapper.class);
35 |         job.setReducerClass(CovidReducer.class);
36 | 
37 |         //map
38 |         job.setMapOutputKeyClass(Text.class);
39 |         job.setMapOutputValueClass(Covid.class);
40 | 
41 |         //out
42 |         job.setOutputKeyClass(Text.class);
43 |         job.setOutputValueClass(Text.class);
44 | 
45 |         FileInputFormat.addInputPath(job, new Path("C:\\Users\\Bruce\\Downloads\\COVID-19.dat"));
46 |         FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Bruce\\Downloads\\output"));
47 |         System.exit(job.waitForCompletion(true) ? 0 : 1);
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/batch/SQLWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.batch;
 2 | 
 3 | import cn.northpark.flink.WordCount;
 4 | import org.apache.flink.api.java.DataSet;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.operators.DataSource;
 7 | import org.apache.flink.table.api.Table;
 8 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * 利用sql api进行离线计算
13 |  */
14 | public class SQLWordCount {
15 |     public static void main(String[] args) throws Exception {
16 | 
17 |         //dataSet api
18 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
19 | 
20 |         //实时Table执行上下文
21 |         BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);
22 | 
23 |         //模拟数据
24 |         DataSource<WordCount> wordCountDataSource = env.fromElements(
25 |                 new WordCount("java", 1),
26 |                 new WordCount("scala", 1),
27 |                 new WordCount("java", 1),
28 |                 new WordCount("java", 1),
29 |                 new WordCount("flink", 1),
30 |                 new WordCount("flink", 1),
31 |                 new WordCount("vue", 1)
32 | 
33 | 
34 |         );
35 | 
36 | 
37 |         //将dataSet注册成表,指定字段名称
38 |         tableEnv.registerDataSet("word_count",wordCountDataSource,"word,counts");
39 | 
40 |         String sql = "select word,sum(counts) as counts from word_count group by word having sum(counts) >=2 order by counts desc ";
41 | 
42 |         Table table = tableEnv.sqlQuery(sql);
43 | 
44 |         //把表转换成dataSet
45 |         DataSet<WordCount> rowDataSet = tableEnv.toDataSet(table, WordCount.class);
46 | 
47 |         rowDataSet.print();
48 | 
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCount2.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.util.Collector;
 9 | 
10 | /**
11 |  * @author zhangyang
12 |  * 按照步骤来一步步拆分Task是如何划分的
13 |  * wc统计的数据我们源自于socket
14 |  */
15 | public class StreamingWordCount2 {
16 | 
17 | 	public static void main(String[] args) throws Exception {
18 | 		 // step1 ：获取执行环境
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 |         
21 |         // step2：读取数据
22 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
23 | 
24 |         env.setParallelism(2);
25 | 
26 |         // 拆词 + 拼数
27 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
28 |             @Override
29 |             public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
30 |                 String[] words = value.split(" ");
31 |                 for (String word : words) {
32 |                     out.collect(Tuple2.of(word, 1));
33 |                 }
34 |             }
35 |         });
36 | 
37 | 
38 |         //分组、累加
39 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);
40 | 
41 | 
42 |         //sink
43 |         sumed.print().setParallelism(2);
44 | 
45 |         //execute
46 |         env.execute("StreamingWordCount");
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/oracle/OracleToTupleFunciton.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.oracle;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.functions.RichMapFunction;
 5 | import org.apache.flink.api.java.tuple.Tuple3;
 6 | import org.apache.flink.configuration.Configuration;
 7 | 
 8 | import java.sql.Connection;
 9 | import java.sql.DriverManager;
10 | import java.sql.PreparedStatement;
11 | import java.sql.ResultSet;
12 | 
13 | /**
14 |  *
15 |  */
16 | public class OracleToTupleFunciton extends RichMapFunction<String, Tuple3<String, String, String>> {
17 | 
18 | 
19 |     private transient Connection connection = null;
20 | 
21 |     @Override
22 |     public void open(Configuration parameters) throws Exception {
23 |         super.open(parameters);
24 |         Class.forName("oracle.jdbc.OracleDriver");
25 |         connection = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:test", "root", "123456");
26 |     }
27 | 
28 | 
29 |     @Override
30 |     public Tuple3<String, String, String> map(String word) throws Exception {
31 | 
32 |         PreparedStatement preparedStatement = connection.prepareStatement("select \"id\",\"word\",\"uptime\" from FLINK.\"t_word_counts\" WHERE \"word\" = ?");
33 | 
34 |         preparedStatement.setString(1, word);
35 |         ResultSet resultSet = preparedStatement.executeQuery();
36 |         String id = "";
37 |         String uptime = "";
38 |         while (resultSet.next()) {
39 |             id = resultSet.getString(1);
40 |             uptime = resultSet.getString(3);
41 |         }
42 | 
43 |         preparedStatement.close();
44 | 
45 | 
46 |         return Tuple3.of(id, word, uptime);
47 |     }
48 | 
49 |     @Override
50 |     public void close() throws Exception {
51 |         super.close();
52 |         connection.close();
53 |     }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/syncIO/SinkToMysqlApplication.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project.syncIO;
 2 | 
 3 | import cn.northpark.flink.project.ActivityBean;
 4 | import cn.northpark.flink.project.syncIO.function.AsyncRestfulToActivityBeanFunciton;
 5 | import cn.northpark.flink.project.syncIO.function.NP_MySqlSinkFunction;
 6 | import cn.northpark.flink.util.FlinkUtilsV1;
 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 8 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSink;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | 
13 | import java.util.concurrent.TimeUnit;
14 | 
15 | /***
16 |  * 统计event，省份等纬度的数目，把结果写到t_activity_counts表
17 |  * 写入mysql数据库DEMO
18 |  */
19 | public class SinkToMysqlApplication {
20 |     public static void main(String[] args) throws Exception {
21 | 
22 |         DataStream<String> lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema());
23 | 
24 |         //调用异步IO的transform
25 |         SingleOutputStreamOperator<ActivityBean> beans = AsyncDataStream.unorderedWait(lines, new AsyncRestfulToActivityBeanFunciton(), 0, TimeUnit.SECONDS, 10);
26 | 
27 |         SingleOutputStreamOperator<ActivityBean> summed = beans.keyBy("eventType").sum("counts");
28 | 
29 |         SingleOutputStreamOperator<ActivityBean> summed2 = beans.keyBy("eventType","province").sum("counts");
30 | 
31 |         DataStreamSink<ActivityBean> addSink = summed.addSink(new NP_MySqlSinkFunction());
32 | 
33 | //        DataStreamSink<ActivityBean> addSink2 = summed2.addSink(new NP_MySqlSink());
34 | 
35 |         FlinkUtilsV1.getEnv().execute("SinkToMysqlApplication");
36 | 
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/oracle/SinkOracle.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.oracle;
 2 | 
 3 | import lombok.extern.slf4j.Slf4j;
 4 | import org.apache.flink.api.java.tuple.Tuple3;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 7 | 
 8 | import java.sql.Connection;
 9 | import java.sql.DriverManager;
10 | import java.sql.PreparedStatement;
11 | 
12 | @Slf4j
13 | public class SinkOracle extends RichSinkFunction<Tuple3<String, String, String>> {
14 | 
15 |     private Connection connection;
16 |     private PreparedStatement statement;
17 | 
18 |     // 1,初始化
19 |     @Override
20 |     public void open(Configuration parameters) throws Exception {
21 |         super.open(parameters);
22 |         Class.forName("oracle.jdbc.OracleDriver");
23 |         connection = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:test", "root", "123456");
24 |     }
25 | 
26 |     // 2,执行
27 |     @Override
28 |     public void invoke(Tuple3<String, String, String> objectNode, Context context) throws Exception {
29 |         log.info("---------------------->", connection);
30 |         String sql = "INSERT INTO \"FLINK\".\"t_word_counts\"(\"id\", \"word\", \"uptime\")  values (?,?,?) " ;
31 |         PreparedStatement ps = connection.prepareStatement(sql);
32 |         log.info("sql--------------------->", sql);
33 |         ps.setString(1, objectNode.f0);
34 |         ps.setString(2, objectNode.f1);
35 |         ps.setString(3, objectNode.f2);
36 |         ps.executeUpdate();
37 |     }
38 | 
39 |     // 3,关闭
40 |     @Override
41 |     public void close() throws Exception {
42 |         super.close();
43 |         if (statement != null)
44 |             statement.close();
45 |         if (connection != null)
46 |             connection.close();
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/batch/TableWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.batch;
 2 | 
 3 | import cn.northpark.flink.WordCount;
 4 | import org.apache.flink.api.java.DataSet;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.operators.DataSource;
 7 | import org.apache.flink.table.api.Table;
 8 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;
 9 | 
10 | /**
11 |  * @author bruce
12 |  * 利用table api进行离线计算
13 |  */
14 | public class TableWordCount {
15 |     public static void main(String[] args) throws Exception {
16 | 
17 |         //dataSet api
18 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
19 | 
20 |         //实时Table执行上下文
21 |         BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);
22 | 
23 |         //模拟数据
24 |         DataSource<WordCount> wordCountDataSource = env.fromElements(new WordCount("java", 1),
25 |                 new WordCount("scala", 1),
26 |                 new WordCount("java", 1),
27 |                 new WordCount("java", 1),
28 |                 new WordCount("flink", 1),
29 |                 new WordCount("flink", 1),
30 |                 new WordCount("vue", 1)
31 | 
32 | 
33 |         );
34 | 
35 |         //将dataSet注册成表
36 |         Table table = tableEnv.fromDataSet(wordCountDataSource);
37 | 
38 |         System.out.printf("schema---", table.getSchema());
39 | 
40 |         Table table2 = table
41 |                 .groupBy("word")
42 |                 .select("word, counts.sum as counts")
43 |                 .filter("counts >=2 ")
44 |                 .orderBy("counts.desc");
45 | 
46 |         //把表转换成dataSet
47 |         DataSet<WordCount> rowDataSet = tableEnv.toDataSet(table2, WordCount.class);
48 | 
49 |         rowDataSet.print();
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/FlinkUtilsV1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 4 | import org.apache.flink.streaming.api.datastream.DataStream;
 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 6 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
 7 | 
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * 执行程序的参数
12 |  * --topics flink000 --group.id bruce --bootstrap.servers node1:9092
13 |  */
14 | public class FlinkUtilsV1 {
15 | 
16 |     public static final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 | 
18 |     /**
19 |      * 从kafka读取数据
20 |      *
21 |      * @param args
22 |      * @param simpleStringSchema
23 |      * @return
24 |      */
25 |     public static DataStream<String> createKafkaStream(String[] args, SimpleStringSchema simpleStringSchema) {
26 | 
27 |         String topic = args[0];
28 |         String groupId = args[1];
29 |         String brokerList = args[2];
30 |         Properties props = new Properties();
31 |         //指定Ka fka的Broker地址
32 |         props.setProperty("bootstrap.servers", brokerList);
33 |         //指定组ID
34 |         props.setProperty("group.id", groupId);
35 |         //如果没有记录偏移量，第一次从最开始消费
36 |         props.setProperty("auto.offset.reset", "earliest");
37 |         //kafka的消费者不自动提交偏移量
38 |         //props . setProperty("enable.auto. commit", "false");
39 |         //KafkaSource
40 |         FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<>(
41 |                 topic,
42 |                 new SimpleStringSchema(),
43 |                 props);
44 | 
45 | 
46 |         return env.addSource(kafkaSource);
47 |     }
48 | 
49 |     public static StreamExecutionEnvironment getEnv() {
50 |         return env;
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project2/OperatorState1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project2;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | 
11 | public class OperatorState1 {
12 |     public static void main(String[] args) throws Exception {
13 | 
14 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
15 | 
16 |         env.enableCheckpointing(5000);
17 | 
18 |         env.setStateBackend(new FsStateBackend("file:////Users/bruce/Documents/workspace/np-flink/backEnd"));
19 | 
20 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000));
21 | 
22 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
23 | 
24 | 
25 |         DataStreamSource<Tuple2<String, String>> lines = env.addSource(new NP_ExactlyOnceParallelismFileSource("/Users/bruce/Desktop/data"));
26 | 
27 |         lines.print();
28 | 
29 | 
30 |         DataStreamSource<String> socketTextStream = env.socketTextStream("localhost", 4000);
31 | 
32 |         socketTextStream.map(new MapFunction<String, Object>() {
33 |             @Override
34 |             public Object map(String value) throws Exception {
35 |                 if(value.startsWith("jeyy")){
36 |                     System.out.println(1/0);
37 |                 }
38 |                 return value;
39 |             }
40 |         });
41 | 
42 |         env.execute("OperatorState1");
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/exactly/transactionway/FlinkKafkaPrint.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.exactly.transactionway;
 2 | 
 3 | import cn.northpark.flink.util.FlinkUtils;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.api.java.tuple.Tuple3;
 7 | import org.apache.flink.api.java.utils.ParameterTool;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.util.Collector;
11 | import org.apache.flink.util.StringUtils;
12 | 
13 | import java.io.InputStream;
14 | import java.time.LocalDateTime;
15 | import java.util.UUID;
16 | 
17 | /***
18 |  * Flink从kafka读取数据写入Oracle 并且实现exactly once
19 |  * @author bruce
20 |  */
21 | public class FlinkKafkaPrint {
22 | 
23 |     public static void main(String[] args) throws  Exception{
24 | 
25 |         InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties");
26 | 
27 |         ParameterTool parameters = ParameterTool.fromPropertiesFile(is);
28 | 
29 |         DataStream<String> kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class);
30 | 
31 | 
32 |         SingleOutputStreamOperator<Tuple3<String, String, String>> words = kafkaStream.flatMap(new FlatMapFunction<String, Tuple3<String, String, String>>() {
33 |             @Override
34 |             public void flatMap(String value, Collector<Tuple3<String, String, String>> out) throws Exception {
35 |                 if (!StringUtils.isNullOrWhitespaceOnly(value)) {
36 | 
37 |                     out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString()));
38 |                 }
39 |             }
40 |         });
41 | 
42 |         words.print();
43 | 
44 |         FlinkUtils.getEnv().execute("FlinkKafkaPrint");
45 | 
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/syncIO/function/NP_MySqlSinkFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project.syncIO.function;
 2 | 
 3 | import cn.northpark.flink.project.ActivityBean;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 6 | 
 7 | import java.sql.Connection;
 8 | import java.sql.DriverManager;
 9 | import java.sql.PreparedStatement;
10 | 
11 | public class NP_MySqlSinkFunction extends RichSinkFunction<ActivityBean> {
12 | 
13 |     private Connection connection = null;
14 | 
15 |     @Override
16 |     public void open(Configuration parameters) throws Exception {
17 |         super.open(parameters);
18 |         connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink?characterEncoding=UTF-8","root","123456");
19 | 
20 |     }
21 | 
22 |     @Override
23 |     public void invoke(ActivityBean bean, Context context) throws Exception {
24 | 
25 |         //插入或者更新
26 |         //INSERT INTO t_activity_counts (id, event, counts) VALUES (?, ?, ?)
27 |         // ON DUPLICATE KEY UPDATE counts = ?
28 | 
29 |         PreparedStatement preparedStatement = null;
30 |         try{
31 | 
32 |             preparedStatement = connection.prepareStatement(" INSERT INTO t_activity_counts (id, event, counts) VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE counts = ? ");
33 |             preparedStatement.setString(1,bean.aid);
34 |             preparedStatement.setInt(2,bean.eventType);
35 |             preparedStatement.setInt(3,bean.counts);
36 |             preparedStatement.setInt(4,bean.counts);
37 | 
38 |             preparedStatement.executeUpdate();
39 |         }finally {
40 |             if(preparedStatement!=null){
41 |                 preparedStatement.close();
42 |             }
43 |         }
44 | 
45 | 
46 | 
47 |     }
48 | 
49 |     @Override
50 |     public void close() throws Exception {
51 |         super.close();
52 |         connection.close();
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/CountWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
12 | 
13 | /**
14 |  * 先分组，每个组达到一定数目才会被触发窗口
15 |  * @author bruce
16 |  */
17 | public class CountWindow {
18 |     public static void main(String[] args) throws Exception {
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | 
21 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
22 | 
23 |         //spark 1
24 |         //spark 2
25 |         //java 3
26 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
27 |             @Override
28 |             public Tuple2<String, Integer> map(String value) throws Exception {
29 |                 String[] lines = value.split(" ");
30 |                 return Tuple2.of(lines[0],Integer.parseInt(lines[1]));
31 |             }
32 |         });
33 | 
34 |         //先分组
35 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
36 | 
37 |         //按照分组后分窗口
38 |         WindowedStream<Tuple2<String, Integer>, Tuple, GlobalWindow> window = keyed.countWindow(5);
39 | 
40 | 
41 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
42 | 
43 |         summed.print();
44 | 
45 |         env.execute("CountWindow");
46 | 
47 | 
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/RestartStrategies1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | 
10 | /**
11 |  * @author zhangyang
12 |  * 重启策略
13 |  */
14 | public class RestartStrategies1 {
15 | 
16 | 	public static void main(String[] args) throws Exception {
17 | 		 // step1 ：获取执行环境
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 | 
20 |         //只有开启了checkpoint 才会有重启策略 默认是不重启
21 |         env.enableCheckpointing(5000);//每隔5s进行一次checkpoint
22 |         //默认的重启策略是无限重启  Integer.MAX_VALUE 次
23 | 
24 |         //重启重试次数
25 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000));
26 | 
27 | 
28 |         // step2：读取数据
29 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
30 | 
31 | 
32 |         //把单词和1拼一块
33 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = text.map(new MapFunction<String, Tuple2<String, Integer>>() {
34 |             @Override
35 |             public Tuple2<String, Integer> map(String value) throws Exception {
36 |                 if(value.startsWith("jeyy")){
37 |                     throw new RuntimeException("jeyy来了，发生异常！！");
38 |                 }
39 |                 return Tuple2.of(value, 1);
40 |             }
41 |         });
42 | 
43 |         //分组、累加
44 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
45 | 
46 | 
47 |         //sink
48 |         sumed.print();
49 | 
50 |         //execute
51 |         env.execute("RestartStrategies1");
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/KafkaWordCountSQL.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.stream.sql;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.Table;
 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 7 | import org.apache.flink.table.descriptors.Json;
 8 | import org.apache.flink.table.descriptors.Kafka;
 9 | import org.apache.flink.table.descriptors.Schema;
10 | import org.apache.flink.types.Row;
11 | 
12 | /**
13 |  * 读取kafka数据 利用sql api来查询
14 |  */
15 | public class KafkaWordCountSQL {
16 |     public static void main(String[] args) throws Exception {
17 | 
18 |         //实时dataStream api
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | 
21 | 
22 |         //实时Table执行上下文
23 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
24 | 
25 | 
26 |         tableEnv.connect(new Kafka()
27 |                 .version("universal")
28 |                 .topic("bruce")
29 |                 .startFromEarliest()
30 |                 .property("bootstrap.servers","localhost:9092")
31 | 
32 |         ).withFormat(new Json().deriveSchema())
33 |             .withSchema(new Schema()
34 |                                    .field("name", TypeInformation.of(String.class))
35 |                                    .field("gender",TypeInformation.of(String.class))
36 |             ).inAppendMode().createTemporaryTable("kafkaSource");
37 | 
38 | 
39 | 
40 |         //这里是table api的实现写法
41 |         Table table = tableEnv.scan("kafkaSource")
42 |                 .groupBy("gender")
43 |                 .select("gender ,count(1) as counts");
44 | 
45 |         tableEnv.toAppendStream(table, Row.class).print();
46 | //        tableEnv.toRetractStream(table,Row.class).print();
47 |         env.execute("KafkaWordCountSQL");
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/oracle/FlinkKafkaSink.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.oracle;
 2 | 
 3 | import cn.northpark.flink.exactly.transactionway.FlinkKafkaToMysql;
 4 | import cn.northpark.flink.util.FlinkUtils;
 5 | import org.apache.flink.api.common.functions.FlatMapFunction;
 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 7 | import org.apache.flink.api.java.tuple.Tuple3;
 8 | import org.apache.flink.api.java.utils.ParameterTool;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.util.Collector;
12 | import org.apache.flink.util.StringUtils;
13 | 
14 | import java.io.InputStream;
15 | import java.time.LocalDateTime;
16 | import java.util.UUID;
17 | 
18 | /***
19 |  * @author bruce
20 |  */
21 | public class FlinkKafkaSink {
22 | 
23 |     public static void main(String[] args) throws  Exception{
24 | 
25 |         InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties");
26 | 
27 |         ParameterTool parameters = ParameterTool.fromPropertiesFile(is);
28 | 
29 |         DataStream<String> kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class);
30 | 
31 | 
32 |         SingleOutputStreamOperator<Tuple3<String, String, String>> words = kafkaStream.flatMap(new FlatMapFunction<String, Tuple3<String, String, String>>() {
33 |             @Override
34 |             public void flatMap(String value, Collector<Tuple3<String, String, String>> out) throws Exception {
35 |                 if (!StringUtils.isNullOrWhitespaceOnly(value)) {
36 | 
37 |                     out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString()));
38 |                 }
39 |             }
40 |         });
41 | 
42 |         words.print();
43 | 
44 |         words.addSink(new SinkOracle());
45 | 
46 |         FlinkUtils.getEnv().execute("FlinkKafkaPrint");
47 | 
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/function/MysqlToActivityBeanFunciton.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project.function;
 2 | 
 3 | 
 4 | import cn.northpark.flink.project.ActivityBean;
 5 | import org.apache.flink.api.common.functions.RichMapFunction;
 6 | import org.apache.flink.configuration.Configuration;
 7 | 
 8 | import java.sql.Connection;
 9 | import java.sql.DriverManager;
10 | import java.sql.PreparedStatement;
11 | import java.sql.ResultSet;
12 | 
13 | /**
14 |  */
15 | public class MysqlToActivityBeanFunciton extends RichMapFunction<String, ActivityBean> {
16 | 
17 | 
18 |     private transient Connection connection = null;
19 | 
20 |     @Override
21 |     public void open(Configuration parameters) throws Exception {
22 |         super.open(parameters);
23 |         connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink?characterEncoding=UTF-8","root","123456");
24 |     }
25 | 
26 | 
27 |     @Override
28 |     public ActivityBean map(String line) throws Exception {
29 |         String[] fields = line.split( ",");
30 |         //u001,A1,2019-09-02 10:10:11,1 ,北京市
31 | 
32 |         PreparedStatement preparedStatement = connection.prepareStatement("SELECT NAME FROM T_ACTIVITY  WHERE ID = ?");
33 | 
34 |         String uid = fields[0] ;
35 |         String aid = fields[1] ;
36 |         String time = fields[2] ;
37 |         int eventType = Integer.parseInt(fields[3]) ;
38 |         String province = fields[4] ;
39 |         String activityName = null;
40 |         preparedStatement.setString(1,aid);
41 |         ResultSet resultSet = preparedStatement.executeQuery();
42 |         while (resultSet.next()){
43 |             activityName = resultSet.getString(1);
44 |         }
45 | 
46 |         preparedStatement.close();
47 | 
48 | 
49 |         return ActivityBean.of(uid,aid,activityName,time,eventType,province);
50 |     }
51 | 
52 |     @Override
53 |     public void close() throws Exception {
54 |         super.close();
55 |         connection.close();
56 |     }
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/InspectSitemap.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project;
 2 | 
 3 | import cn.northpark.flink.util.HttpGetUtils;
 4 | import org.apache.commons.lang.StringUtils;
 5 | import org.apache.flink.api.common.functions.RichMapFunction;
 6 | import org.apache.flink.api.java.ExecutionEnvironment;
 7 | import org.apache.flink.api.java.operators.DataSource;
 8 | import org.apache.flink.api.java.operators.MapOperator;
 9 | import org.apache.flink.configuration.Configuration;
10 | import org.apache.http.HttpResponse;
11 | import org.apache.http.client.config.RequestConfig;
12 | import org.apache.http.client.methods.HttpGet;
13 | import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
14 | import org.apache.http.impl.nio.client.HttpAsyncClients;
15 | import org.apache.http.util.EntityUtils;
16 | 
17 | import java.util.concurrent.CompletableFuture;
18 | import java.util.concurrent.Future;
19 | import java.util.function.Supplier;
20 | 
21 | /**
22 |  * @author bruce
23 |  * @date 2022年10月08日 13:48:34
24 |  */
25 | public class InspectSitemap {
26 |     public static void main(String[] args)  throws  Exception{
27 | 
28 |         //dataSet api
29 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
30 | 
31 |         //读取数据
32 |         DataSource<String> sitemap_lines = env.readTextFile("C:\\Users\\Bruce\\Downloads\\soft.txt");
33 | 
34 |         MapOperator<String, Object> bad_url = sitemap_lines.map(new RichMapFunction<String, Object>() {
35 | 
36 |             @Override
37 |             public Object map(String value) throws Exception {
38 | 
39 | 
40 |                 String dataResult = HttpGetUtils.getDataResult(value);
41 | 
42 |                 if (StringUtils.isBlank(dataResult)) {
43 |                    return value;
44 |                 }
45 | 
46 |                 return null;
47 |             }
48 | 
49 |         });
50 | 
51 |         bad_url.writeAsText("C:\\Users\\Bruce\\Downloads\\bad_req.txt");
52 | 
53 |         env.execute("aa");
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/batch/BatchSQLWordCountQueryCommon.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.batch;
 2 | 
 3 | import cn.northpark.flink.WordCount;
 4 | import org.apache.flink.api.java.DataSet;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.operators.DataSource;
 7 | import org.apache.flink.api.java.utils.ParameterTool;
 8 | import org.apache.flink.table.api.Table;
 9 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;
10 | import org.apache.flink.types.Row;
11 | 
12 | /**
13 |  * @author bruce
14 |  * 利用sql api进行离线查询
15 |  * 从配置文件读取参数匹配
16 |  * 适合不会编程的人员调用
17 |  */
18 | public class BatchSQLWordCountQueryCommon {
19 |     public static void main(String[] args) throws Exception {
20 | 
21 |         ParameterTool parameters  = ParameterTool.fromPropertiesFile("/Users/bruce/Documents/workspace/np-flink/src/main/resources/sqlcfg.properties");
22 | 
23 |         //dataSet api
24 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
25 | 
26 |         //实时Table执行上下文
27 |         BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);
28 | 
29 |         //模拟数据
30 |         DataSource<WordCount> wordCountDataSource = env.fromElements(
31 |                 new WordCount("java", 1),
32 |                 new WordCount("scala", 1),
33 |                 new WordCount("java", 1),
34 |                 new WordCount("java", 1),
35 |                 new WordCount("flink", 1),
36 |                 new WordCount("flink", 1),
37 |                 new WordCount("vue", 1)
38 | 
39 | 
40 |         );
41 | 
42 | 
43 |         //将dataSet注册成表,指定字段名称
44 |         tableEnv.registerDataSet(parameters.getRequired("table"),wordCountDataSource,parameters.getRequired("columns"));
45 | 
46 |         Table table = tableEnv.sqlQuery(parameters.getRequired("sql"));
47 | 
48 |         //把表转换成dataSet
49 |         DataSet<Row> rowDataSet = tableEnv.toDataSet(table, Row.class);
50 | 
51 |         rowDataSet.print();
52 | 
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project/MysqlActivityNameApplication.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project;
 2 | 
 3 | import cn.northpark.flink.project.function.MysqlToActivityBeanFunciton;
 4 | import cn.northpark.flink.util.FlinkUtilsV1;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.streaming.api.datastream.DataStream;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | 
 9 | /**
10 |  *
11 |  * 从kafka或者其他源读数据 关联mysql查询信息，返回Bean
12 |  *
13 |  *      u001,A1,2019-09-0210:10:11,1,北京市
14 |  *      u002,A1,2019-09-0210:11:11,1,辽宁省
15 |  *      u001,A1,2019-09-0210:11:11,2,北京市
16 |  *      u001,A1,2019-09-0210:11:30,3,北京市
17 |  *      u002,A1,2019-09-0210:12:11,2,辽宁省
18 |  *      u003,A2,2019-09-0210:13:11,1,山东省
19 |  *      u003,A22019-09-0210:13:20,2,山东省
20 |  *      u003,A2,2019-09-0210:14:20,3,山东省
21 |  *      u004,A1,2019-09-0210:15:20,1,北京市
22 |  *      u004,A1,2019-09-0210:15:20,2,北京市
23 |  *      u005,A1,2019-09-0210:15:20,1,河北省
24 |  *      u001,A22019-09-0210:16:11,1,北京市
25 |  *      u001,A2,2019-09-0210:16:11,2,北京市
26 |  *      u002,A1,2019-09-0210:18:11,2,辽宁省
27 |  *      u002,A1,2019-09-0210:19:11,3,辽宁省
28 |  *  *  *
29 |  *  *  *
30 |  *  *  * id  name    last_update
31 |  *  *  * A1  新人礼包 2019-10-15 11:36:36
32 |  *  *  * A2  月末活动 2019-10-15 16:37:42
33 |  *  *  * A3  周末活动 2019-10-15 11:44:23
34 |  *  *  * A4  年度促销 2019-10-15 11:44:23
35 |  *
36 |  *
37 |  *
38 |  *  希望的得到的数据:
39 |  * u001新人礼包,2019-09-0210:10:11, 1,北京市
40 |  * u002 ,新人礼包,2019-09-0210:11:11, 1 ,辽宁省
41 |  *
42 |  */
43 | public class MysqlActivityNameApplication {
44 | 
45 |     public static void main(String[] args) throws Exception {
46 | 
47 |         DataStream<String>  lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema());
48 | 
49 |         SingleOutputStreamOperator<ActivityBean> beans = lines.map(new MysqlToActivityBeanFunciton());
50 | 
51 |         beans.print();
52 | 
53 |         FlinkUtilsV1.getEnv().execute("HandleActivityNameApplication");
54 | 
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/AddMony.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase;
 2 | 
 3 | import cn.northpark.flink.util.PhoenixUtilV2;
 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations;
 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType;
 6 | 
 7 | import java.util.UUID;
 8 | 
 9 | /**
10 |  * @author bruce
11 |  * @date 2022年06月26日 10:03:33
12 |  */
13 | public class AddMony {
14 |     public static void main(String[] args) {
15 | 
16 |         String uid = UUID.randomUUID().toString().replace("-","");
17 |         //添加1对多的转发关系
18 | //              A -B1
19 | //                -B2
20 | //                -B3
21 |         for (int i = 0; i < 20; i++) {
22 | 
23 |             String rel_uid_ = UUID.randomUUID().toString().replace("-","");
24 |             WeiboRelations bean_by_ = new WeiboRelations(uid, RelType.TRANS_LINK,rel_uid_,1);
25 |             addOne(bean_by_);
26 |         }
27 |     }
28 | 
29 |     /**
30 |      * 添加一条转发/评论关系
31 |      * @param bean_by
32 |      */
33 |     private static void addOne(WeiboRelations bean_by) {
34 |         //根据被转发关系构造一条主动转发关系
35 |         WeiboRelations bean_self = new WeiboRelations(bean_by.getRel_user_id(), bean_by.getRel_type(), bean_by.getUser_id(),bean_by.getBy_type()==1?0:1);
36 | 
37 |         //分别插入2条数据
38 | 
39 |         String insert_rel_sql =  "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " +
40 |                 " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_by.getUser_id()+"' ,'"+ bean_by.getRel_type()+"','"+ bean_by.getRel_user_id()+"',"+ bean_by.getBy_type()+" )";
41 | 
42 | 
43 |         String insert_rel_sql2 =  "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " +
44 |                 " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_self.getUser_id()+"' ,'"+bean_self.getRel_type()+"','"+bean_self.getRel_user_id()+"',"+bean_self.getBy_type()+" )";
45 | 
46 | 
47 |         PhoenixUtilV2.insertData(insert_rel_sql);
48 |         PhoenixUtilV2.insertData(insert_rel_sql2);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/udf/UserBrowseLog.java:
--------------------------------------------------------------------------------
 1 | 
 2 | package cn.northpark.flink.table_sql_api.stream.sql.udf;
 3 | 
 4 | import com.google.gson.annotations.Expose;
 5 | 
 6 | import javax.annotation.Generated;
 7 | 
 8 | @Generated("net.hexar.json2pojo")
 9 | @SuppressWarnings("unused")
10 | public class UserBrowseLog {
11 | 
12 |     @Expose
13 |     private String eventTime;
14 |     @Expose
15 |     private String eventType;
16 |     @Expose
17 |     private String productID;
18 |     @Expose
19 |     private int productPrice;
20 |     @Expose
21 |     private String userID;
22 |     @Expose
23 |     private Long eventTimeTimestamp;
24 | 
25 | 
26 |     private String orderID;
27 | 
28 |     public String getEventTime() {
29 |         return eventTime;
30 |     }
31 | 
32 |     public void setEventTime(String eventTime) {
33 |         this.eventTime = eventTime;
34 |     }
35 | 
36 |     public String getEventType() {
37 |         return eventType;
38 |     }
39 | 
40 |     public void setEventType(String eventType) {
41 |         this.eventType = eventType;
42 |     }
43 | 
44 |     public String getProductID() {
45 |         return productID;
46 |     }
47 | 
48 |     public void setProductID(String productID) {
49 |         this.productID = productID;
50 |     }
51 | 
52 |     public int getProductPrice() {
53 |         return productPrice;
54 |     }
55 | 
56 |     public void setProductPrice(int productPrice) {
57 |         this.productPrice = productPrice;
58 |     }
59 | 
60 |     public String getUserID() {
61 |         return userID;
62 |     }
63 | 
64 |     public void setUserID(String userID) {
65 |         this.userID = userID;
66 |     }
67 | 
68 |     public Long getEventTimeTimestamp() {
69 |         return eventTimeTimestamp;
70 |     }
71 | 
72 |     public void setEventTimeTimestamp(Long eventTimeTimestamp) {
73 |         this.eventTimeTimestamp = eventTimeTimestamp;
74 |     }
75 | 
76 |     public String getOrderID() {
77 |         return orderID;
78 |     }
79 | 
80 |     public void setOrderID(String orderID) {
81 |         this.orderID = orderID;
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCountParam.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.api.java.utils.ParameterTool;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.windowing.time.Time;
 9 | import org.apache.flink.util.Collector;
10 | 
11 | /**
12 |  * @author zhangyang
13 |  * 使用Java API来开发Flink的实时处理应用程序.
14 |  * wc统计的数据我们源自于socket
15 |  * 端口从参数传入
16 |  */
17 | public class StreamingWordCountParam {
18 | 
19 | 
20 |     public static void main(String[] args) throws Exception {
21 | 
22 | 
23 |         // 获取参数
24 |         int port = 0;
25 | 
26 |         try {
27 |             ParameterTool tool = ParameterTool.fromArgs(args);
28 |             port = tool.getInt("port");
29 |         } catch (Exception e) {
30 |             System.err.println("端口未设置，使用默认端口9999");
31 |             port = 9999;
32 |         }
33 | 
34 | 
35 |         // step1 ：获取执行环境
36 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
37 | 
38 | 
39 |         // step2：读取数据
40 |         DataStreamSource<String> text = env.socketTextStream("localhost", port);
41 | 
42 | 
43 |         // step3: transform
44 |         text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
45 |             @Override
46 |             public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) throws Exception {
47 |                 String[] tokens = value.toLowerCase().split(",");
48 |                 for(String token : tokens) {
49 |                     if(token.length() > 0) {
50 |                         collector.collect(new Tuple2<String,Integer>(token,1));
51 |                     }
52 |                 }
53 |             }
54 |         }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print().setParallelism(1);
55 | 
56 | 
57 |         //step4 执行函数
58 |         env.execute("StreamingWCJavaApp");
59 |     }
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.common.functions.MapFunction;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.util.Collector;
10 | 
11 | /**
12 |  * @author zhangyang
13 |  * 按照步骤来一步步拆分Task是如何划分的
14 |  * wc统计的数据我们源自于socket
15 |  */
16 | public class StreamingWordCount {
17 | 
18 | 	public static void main(String[] args) throws Exception {
19 | 		 // step1 ：获取执行环境
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 |         
22 |         // step2：读取数据
23 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
24 | 
25 |         env.setParallelism(2);
26 | 
27 |         // 拆词
28 |         SingleOutputStreamOperator<String> words = text.flatMap(new FlatMapFunction<String, String>() {
29 |             @Override
30 |             public void flatMap(String value, Collector<String> out) throws Exception {
31 |                 String[] words = value.split(" ");
32 |                 for (String word : words) {
33 |                     out.collect(word);
34 |                 }
35 |             }
36 |         });
37 | 
38 |         //把单词和1拼一块
39 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
40 |             @Override
41 |             public Tuple2<String, Integer> map(String value) throws Exception {
42 |                 return Tuple2.of(value, 1);
43 |             }
44 |         });
45 | 
46 |         //分组、累加
47 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
48 | 
49 | 
50 |         //sink
51 |         sumed.print().setParallelism(2);
52 | 
53 |         //execute
54 |         env.execute("StreamingWordCount");
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/DruidUtils.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import com.alibaba.druid.pool.DruidDataSourceFactory;
 4 | import lombok.extern.slf4j.Slf4j;
 5 | 
 6 | import javax.sql.DataSource;
 7 | import java.io.IOException;
 8 | import java.io.InputStream;
 9 | import java.sql.Connection;
10 | import java.sql.SQLException;
11 | import java.util.Properties;
12 | 
13 | 
14 | /**
15 |  * @author zhangyang
16 |  * Druid数据库连接池工具类的设计
17 |  */
18 | @Slf4j
19 | public class DruidUtils {
20 |     /**
21 |      * 默认配置文件名
22 |      */
23 |     private transient static String confile = "druid.properties";
24 |     /**
25 |      * 配置文件
26 |      */
27 |     private transient static Properties p = null;
28 |     /**
29 |      * 唯一dateSource，保证全局只有一个数据库连接池
30 |      */
31 |     private transient static DataSource dataSource = null;
32 | 
33 | 
34 |     static {
35 |         p = new Properties();
36 |         InputStream inputStream = null;
37 |         try {
38 |             // java应用 读取配置文件
39 |             inputStream = DruidUtils.class.getClassLoader().getResourceAsStream(confile);
40 |             p.load(inputStream);
41 |         } catch (Exception e) {
42 |             e.printStackTrace();
43 |         } finally {
44 |             try {
45 |                 if (inputStream != null) {
46 |                     inputStream.close();
47 |                 }
48 |             } catch (IOException e) {
49 |                 // ignore
50 |             }
51 |         } // end finally
52 | 
53 |         try {
54 |             //通过工厂类获取DataSource对象
55 |             dataSource = DruidDataSourceFactory.createDataSource(p);
56 |         } catch (Exception e) {
57 |             log.error("获取连接异常 ", e);
58 |         }
59 | 
60 |     } // end static
61 | 
62 |     private DruidUtils() {
63 |     }
64 | 
65 |     /**
66 |      * 获取连接
67 |      *
68 |      * @return
69 |      */
70 |     public static Connection getConnection() throws SQLException {
71 |         return dataSource.getConnection();
72 | 
73 |     }
74 | 
75 | 
76 |     /**
77 |      * 关闭连接
78 |      *
79 |      * @param  con
80 |      * @date : 2017-10-16 10:08:10
81 |      */
82 | 
83 | }
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/exactly/transactionway/FlinkKafkaToOracle.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.exactly.transactionway;
 2 | 
 3 | import cn.northpark.flink.util.FlinkUtils;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.api.java.tuple.Tuple3;
 7 | import org.apache.flink.api.java.utils.ParameterTool;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.util.Collector;
11 | import org.apache.flink.util.StringUtils;
12 | 
13 | import java.io.InputStream;
14 | import java.time.LocalDateTime;
15 | import java.util.UUID;
16 | 
17 | /***
18 |  * Flink从kafka读取数据写入Oracle 并且实现exactly once
19 |  * @author bruce
20 |  */
21 | public class FlinkKafkaToOracle {
22 | 
23 |     public static void main(String[] args) throws  Exception{
24 | 
25 |         InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties");
26 | 
27 |         ParameterTool parameters = ParameterTool.fromPropertiesFile(is);
28 | 
29 |         DataStream<String> kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class);
30 | 
31 | 
32 |         SingleOutputStreamOperator<Tuple3<String, String, String>> words = kafkaStream.flatMap(new FlatMapFunction<String, Tuple3<String, String, String>>() {
33 |             @Override
34 |             public void flatMap(String value, Collector<Tuple3<String, String, String>> out) throws Exception {
35 |                 if (!StringUtils.isNullOrWhitespaceOnly(value)) {
36 | 
37 | //                    if ("AAA".equalsIgnoreCase(value)) {
38 | //                        System.out.println(1 / 0);
39 | //                    }
40 | 
41 |                     out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString()));
42 |                 }
43 |             }
44 |         });
45 | 
46 |         words.print();
47 | 
48 | 
49 |         words.addSink(new OracleTwoPhaseCommitSink());
50 | 
51 | 
52 |         FlinkUtils.getEnv().execute("FlinkKafkaToOracle");
53 | 
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/RabbitMQConFactory.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import com.rabbitmq.client.Address;
 4 | import com.rabbitmq.client.Connection;
 5 | import com.rabbitmq.client.ConnectionFactory;
 6 | 
 7 | import java.util.*;
 8 | 
 9 | /**
10 |  * 消息连接工厂，支持集群自动重连
11 |  * 
12 |  * @author bruce
13 |  *
14 |  */
15 | public class RabbitMQConFactory {
16 | 	
17 | 	/**
18 | 	 * 缓存连接工厂,将建立的链接放入map缓存，为每个Storm的spout都建立独立的连接，其他用通用的。
19 | 	 */
20 | 	private static Map<String, Connection> connectionMap = new HashMap<String, Connection>();
21 | 
22 | 	private static ConnectionFactory factory=new ConnectionFactory();
23 | 	private static List<Address> addrs=new ArrayList<Address>();
24 | 	static {
25 | 
26 | 		ResourceBundle bundle = ResourceBundle.getBundle("config");
27 | 		if(bundle==null){
28 | 			throw new IllegalArgumentException("找不到config.properties!");
29 | 		}
30 | 		factory.setAutomaticRecoveryEnabled(true);
31 | 		factory.setUsername(bundle.getString("mq.user"));
32 | 		factory.setPassword(bundle.getString("mq.pass"));
33 | 		String address=bundle.getString("mq.host");
34 | 		int port=Integer.parseInt(bundle.getString("mq.port"));
35 | 
36 | 		Address address1= new Address(address,port);
37 | 		addrs.add(address1);
38 | 
39 | //		String []addressArray=addresses.split(",");
40 | //		for(int i=0;i<addressArray.length;i++)
41 | //		{
42 | //			Address address=new Address(addressArray[i].split(":")[0],Integer.valueOf(addressArray[i].split(":")[1]));
43 | //			addrs.add(address);
44 | //		}
45 | 	}
46 | 	/**
47 | 	 * 获取connectionName
48 | 	 * 新建或者从map中获取
49 | 	*/
50 | 	public static Connection getConnection(String connectionName) {
51 | 		if(connectionName==null)
52 | 		{
53 | 			return connectionMap.get("common");
54 | 		}
55 | 		if(connectionMap.get(connectionName)==null){
56 | 			try {
57 | 				Connection connection = factory.newConnection(addrs);
58 | 				connectionMap.put(connectionName, connection);
59 | 				return connection;
60 | 			} 
61 | 			catch (Exception e) {
62 | 				return null;
63 | 			}
64 | 
65 | 	    }
66 | 		else
67 | 		{
68 | 			return connectionMap.get(connectionName);
69 | 		}
70 | 	 }
71 | 	/*
72 | 	 * 获取默认连接
73 | 	 */
74 | 	public static Connection getConnection() 
75 | 	{
76 | 		return getConnection("common");
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/util/HikariUtils.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.util;
 2 | 
 3 | import com.zaxxer.hikari.HikariConfig;
 4 | import com.zaxxer.hikari.HikariDataSource;
 5 | import lombok.extern.slf4j.Slf4j;
 6 | 
 7 | import java.io.IOException;
 8 | import java.io.InputStream;
 9 | import java.sql.Connection;
10 | import java.sql.SQLException;
11 | import java.util.Properties;
12 | 
13 | 
14 | /**
15 |  * @author zhangyang
16 |  * Hikaricp数据库连接池工具类的设计
17 |  */
18 | @Slf4j
19 | public class HikariUtils {
20 | 
21 |     // 定义HikariDataSource类型的dataSource
22 |     // 注意： 因为HikariDataSource类 实现了DataSource 接口。 因此 dataSource 即是HikariDataSource类型也是DataSource类型
23 |     /**
24 |      * 配置文件
25 |      */
26 |     private static transient Properties p = new Properties();
27 |     /**
28 |      * 唯一dateSource，保证全局只有一个数据库连接池
29 |      */
30 |     private static transient HikariDataSource dataSource = null;
31 | 
32 |     static {
33 |         InputStream inputStream = null;
34 |         try {
35 |             // java应用 读取配置文件
36 |             inputStream = HikariUtils.class.getClassLoader().getResourceAsStream("hikari.properties");
37 |             p.load(inputStream);
38 |         } catch (Exception e) {
39 |             e.printStackTrace();
40 |         } finally {
41 |             try {
42 |                 if (inputStream != null) {
43 |                     inputStream.close();
44 |                 }
45 |             } catch (IOException e) {
46 |                 // ignore
47 |             }
48 |         } // end finally
49 | 
50 |         try {
51 |             //通过工厂类获取DataSource对象
52 |             HikariConfig config = new HikariConfig(p);
53 |             dataSource = new HikariDataSource(config);
54 |         } catch (Exception e) {
55 |             log.error("获取连接异常 ", e);
56 |         }
57 |     }
58 | 
59 | 
60 |     private HikariUtils() {
61 |     }
62 | 
63 |     /**
64 |      * 通过数据源获取连接
65 |      *
66 |      * @return
67 |      * @throws SQLException
68 |      */
69 |     public static Connection getConnection() throws SQLException {
70 |         return dataSource.getConnection();
71 |     }
72 | 
73 | //非自动提交时需要手动提交|回滚...====================================================
74 | 
75 | 
76 |     public static void main(String[] args) {
77 | 
78 | 
79 |     }
80 | 
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/SlidingWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
14 | 
15 | /**
16 |  * 滑动窗口
17 |  * @author bruce
18 |  */
19 | public class SlidingWindow {
20 |     public static void main(String[] args) throws Exception {
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
24 | 
25 |         //spark 1
26 |         //spark 2
27 |         //java 3
28 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
29 |             @Override
30 |             public Tuple2<String, Integer> map(String value) throws Exception {
31 |                 String[] lines = value.split(" ");
32 |                 return Tuple2.of(lines[0],Integer.parseInt(lines[1]));
33 |             }
34 |         });
35 | 
36 |         //先分组
37 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
38 | 
39 |         //按照分组后分窗口
40 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5));
41 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(SlidingProcessingTimeWindows.of(Time.seconds(10),Time.seconds(5)));
42 | 
43 | 
44 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
45 | 
46 |         summed.print();
47 | 
48 |         env.execute("SlidingWindow");
49 | 
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/SessionWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
14 | 
15 | /**
16 |  * Session窗口 :以2条数据的时间差来划分窗口,时间差>n,则触发窗口
17 |  * @author bruce
18 |  */
19 | public class SessionWindow {
20 |     public static void main(String[] args) throws Exception {
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
24 | 
25 |         //spark 1
26 |         //spark 2
27 |         //java 3
28 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
29 |             @Override
30 |             public Tuple2<String, Integer> map(String value) throws Exception {
31 |                 String[] lines = value.split(" ");
32 |                 return Tuple2.of(lines[0],Integer.parseInt(lines[1]));
33 |             }
34 |         });
35 | 
36 |         //先分组
37 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
38 | 
39 |         //按照分组后分窗口
40 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5));
41 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(ProcessingTimeSessionWindows.withGap( Time.seconds(5)));
42 | 
43 | 
44 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
45 | 
46 |         summed.print();
47 | 
48 |         env.execute("SessionWindow");
49 | 
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/TumblingWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
14 | 
15 | /**
16 |  * 滚动窗口--先分组，达到N秒 划分窗口,窗口内的所有key组都会被执行
17 |  * @author bruce
18 |  */
19 | public class TumblingWindow {
20 |     public static void main(String[] args) throws Exception {
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         DataStreamSource<String> source = env.socketTextStream("localhost", 4000);
24 | 
25 |         //spark 1
26 |         //spark 2
27 |         //java 3
28 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
29 |             @Override
30 |             public Tuple2<String, Integer> map(String value) throws Exception {
31 |                 String[] lines = value.split(" ");
32 |                 return Tuple2.of(lines[0],Integer.parseInt(lines[1]));
33 |             }
34 |         });
35 | 
36 |         //先分组
37 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
38 | 
39 |         //按照分组后分窗口
40 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.of(5, TimeUnit.SECONDS));
41 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(TumblingProcessingTimeWindows.of(Time.seconds(5)));
42 | 
43 | 
44 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
45 | 
46 |         summed.print();
47 | 
48 |         env.execute("TumblingWindow");
49 | 
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/GlobalEntity.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | //定义车辆监控基本信息
 4 | case class MonitorCarInfo(areaId:String,roadId:String,monitorId:String,cameraId:String,actionTime:Long,car:String,speed:Double)
 5 | //定义车辆监控基本信息 + 限速信息
 6 | case class NewMonitorCarInfo(areaId:String,roadId:String,monitorId:String,cameraId:String,actionTime:Long,car:String,speed:Double,speedLimit:Double)
 7 | 
 8 | //定义车辆限速的信息
 9 | case class MonitorLimitSpeedInfo(areaId:String,roadId:String,monitorId:String,limitSpeed:Double)
10 | 
11 | //定义超速车辆的信息
12 | case class OverSpeedCarInfo(car:String,monitorId:String,roadId:String,realSpeed:Double,limitSpeed:Double,actionTime:Long)
13 | 
14 | //定义卡扣平均速度信息
15 | case class MonitorAvgSpeedInfo(windowStartTime:String,windowEndTime:String,monitorId:String,avgSpeed:Double,carCount:Long)
16 | 
17 | //定义最通畅的top5 卡扣信息
18 | case class Top5MonitorInfo(windowStartTime:String,windowEndTime:String,monitorId:String,hightSpeedCarCount:Long,middleSpeedCount:Long,normalSpeedCarCount:Long,lowSpeedCarCount:Long)
19 | 
20 | //定义卡扣通过车辆数的统计对象
21 | case class MonitorSpeedClsCount(xhightSpeedCarCount:Long,xmiddleSpeedCount:Long,xnormalSpeedCarCount:Long,xlowSpeedCarCount:Long) extends Ordered[MonitorSpeedClsCount]{
22 |   var hightSpeedCarCount = xhightSpeedCarCount
23 |   var middleSpeedCount = xmiddleSpeedCount
24 |   var normalSpeedCarCount = xnormalSpeedCarCount
25 |   var lowSpeedCarCount = xlowSpeedCarCount
26 | 
27 |   override def compare(that: MonitorSpeedClsCount): Int = {
28 |     //先比较 高速
29 |     if(this.hightSpeedCarCount != that.hightSpeedCarCount){
30 |       (this.hightSpeedCarCount - that.hightSpeedCarCount).toInt
31 |     }else if(this.middleSpeedCount != that.middleSpeedCount){
32 |       (this.middleSpeedCount - that.middleSpeedCount).toInt
33 |     }else if(this.normalSpeedCarCount != that.normalSpeedCarCount){
34 |       (this.normalSpeedCarCount - that.normalSpeedCarCount).toInt
35 |     }else{
36 |       (this.lowSpeedCarCount - that.lowSpeedCarCount).toInt
37 |     }
38 |   }
39 | }
40 | 
41 | //定义违法车辆信息
42 | case class ViolationCarInfo(car:String,violation:String,createTime:String,detail:String)
43 | 
44 | //定义出警信息
45 | case class PoliceInfo(policeId:String,car:String,policeTime:Long,policeState:String)
46 | 
47 | 
48 | 
49 | object GlobalEntity {
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/KafkaSourceV2.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 7 | import org.apache.flink.streaming.api.CheckpointingMode;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
12 | 
13 | import java.util.Properties;
14 | 
15 | /**
16 |  * @author zhangyang
17 |  *	使用Kafka作为数据源读取数据 exactly once
18 |  */
19 | public class KafkaSourceV2 {
20 | 
21 |     public static void main(String[] args) throws Exception {
22 | 
23 |         //1.环境
24 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
25 | 
26 | 
27 |         env.enableCheckpointing(5000);
28 | 
29 |         env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/backEnd"));
30 | 
31 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
32 | 
33 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000));
34 | 
35 |         //精准一次
36 |         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
37 | 
38 |         Properties props = new Properties();
39 | 
40 |         //指定Kafka的Broker地址
41 |         props.setProperty( "bootstrap.servers", "localhost:9092");
42 |         //指定组ID
43 |         props.setProperty("group.id", "bruce");
44 |         //如果没有记录偏移量，第一次从最开始消费
45 |         props.setProperty("auto.offset.reset", "earliest") ;
46 |         //kafka的消费者不自动提交偏移量
47 |         props.setProperty("enable.auto.commit", "false");
48 | 
49 |         //2.read
50 |         FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props);
51 | 
52 | 
53 |         DataStream<String> lines = env.addSource(kafkaSource);
54 | 
55 |         //3.sink/transform
56 |         lines.print();
57 | 
58 |         //4.execute
59 |         env.execute("KafkaSourceV2");
60 |     }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StateBackend2.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | 
12 | public class StateBackend2 {
13 |     public static void main(String[] args) throws  Exception{
14 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
15 | 
16 |         //只有开启了checkpoint 才会有重启策略
17 |         env.enableCheckpointing(8000);
18 | 
19 |         //hdfs://localhost:9000/np-backend
20 | 
21 |         //设置重启策略为重启2次，间隔2秒
22 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2,2));
23 | 
24 |         //设置StateBackend策略为本地文件系统
25 | //        env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/np-stateBackend"));
26 | 
27 |         env.setStateBackend(new FsStateBackend("hdfs://localhost:9000/np-backend1"));
28 | 
29 |         //设置cancelJob或者异常退出Job以后不删除checkpoint数据
30 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
31 | 
32 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
33 | 
34 |         SingleOutputStreamOperator<Tuple2<String, Integer>>  wordOne = lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
35 | 
36 |             @Override
37 |             public Tuple2<String, Integer> map(String value) throws Exception {
38 |                 if (value.startsWith("jeyy")) {
39 |                     throw new RuntimeException("jeyy来了，程序出错了！！！");
40 |                 }
41 |                 return Tuple2.of(value, 1);
42 |             }
43 |         });
44 | 
45 | 
46 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = wordOne.keyBy(0).sum(1);
47 | 
48 |         summed.print();
49 | 
50 |         env.execute("StateBackend2");
51 | 
52 | 
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/weiboAPP/hbase/AddOne.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.weiboAPP.hbase;
 2 | 
 3 | import cn.northpark.flink.util.PhoenixUtilV2;
 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations;
 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType;
 6 | 
 7 | import java.util.UUID;
 8 | 
 9 | /**
10 |  * @author bruce
11 |  * @date 2022年06月26日 10:03:33
12 |  */
13 | public class AddOne {
14 |     public static void main(String[] args) {
15 | 
16 |         //2.增加1条关系
17 | //        String uid = UUID.randomUUID().toString().replace("-","");
18 | //        String rel_uid = UUID.randomUUID().toString().replace("-","");
19 | //        WeiboRelations bean_by = new WeiboRelations(uid,RelType.REPLY,rel_uid,1);
20 | //        addOne(bean_by);
21 | 
22 |         String uid = UUID.randomUUID().toString().replace("-","");
23 |         //添加1对多的转发关系
24 | //              A -B1
25 | //                -B2
26 | //                -B3
27 |         for (int i = 0; i < 20; i++) {
28 | 
29 |             String rel_uid_ = UUID.randomUUID().toString().replace("-","");
30 |             WeiboRelations bean_by_ = new WeiboRelations(uid, RelType.TRANS_LINK,rel_uid_,1);
31 |             addOne(bean_by_);
32 |         }
33 |     }
34 | 
35 |     /**
36 |      * 添加一条转发/评论关系
37 |      * @param bean_by
38 |      */
39 |     private static void addOne(WeiboRelations bean_by) {
40 |         //根据被转发关系构造一条主动转发关系
41 |         WeiboRelations bean_self = new WeiboRelations(bean_by.getRel_user_id(), bean_by.getRel_type(), bean_by.getUser_id(),bean_by.getBy_type()==1?0:1);
42 | 
43 |         //分别插入2条数据
44 | 
45 |         String insert_rel_sql =  "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " +
46 |                 " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_by.getUser_id()+"' ,'"+ bean_by.getRel_type()+"','"+ bean_by.getRel_user_id()+"',"+ bean_by.getBy_type()+" )";
47 | 
48 | 
49 |         String insert_rel_sql2 =  "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " +
50 |                 " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_self.getUser_id()+"' ,'"+bean_self.getRel_type()+"','"+bean_self.getRel_user_id()+"',"+bean_self.getBy_type()+" )";
51 | 
52 | 
53 |         PhoenixUtilV2.insertData(insert_rel_sql);
54 |         PhoenixUtilV2.insertData(insert_rel_sql2);
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/StreamSqlWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api;
 2 | 
 3 | import org.apache.flink.api.common.functions.FilterFunction;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStream;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.table.api.Table;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 | import org.apache.flink.types.Row;
13 | import org.apache.flink.util.Collector;
14 | 
15 | import java.util.Arrays;
16 | 
17 | public class StreamSqlWordCount {
18 |     public static void main(String[] args) throws Exception {
19 | 
20 |         //实时dataStream api
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         //实时Table执行上下文
24 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
25 | 
26 |         //word count java scala
27 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
28 | 
29 |         SingleOutputStreamOperator<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
30 |             @Override
31 |             public void flatMap(String value, Collector<String> out) throws Exception {
32 |                 Arrays.stream(value.split(" ")).forEach(out::collect);
33 |             }
34 |         });
35 | 
36 |         //注册程表
37 |         tableEnv.registerDataStream("t_word_count",words,"word");
38 | 
39 |         //写sql
40 |         Table table = tableEnv.sqlQuery("select word,count(1) counts from t_word_count group by word");
41 | 
42 |         DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tableEnv.toRetractStream(table, Row.class);
43 | 
44 | //        tuple2DataStream.print();
45 | 
46 | 
47 |         tuple2DataStream.filter(new FilterFunction<Tuple2<Boolean, Row>>() {
48 |             @Override
49 |             public boolean filter(Tuple2<Boolean, Row> value) throws Exception {
50 | 
51 |                 return value.f0;
52 |             }
53 |         }).print();
54 |         env.execute("StreamSqlWordCount");
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # np-flink
 2 | # flink详细学习实践
 3 | 
 4 | 
 5 | ## 在 MacOS 上安装 Kafka
 6 | 记录一下在 Mac 上安装和测试 kafka 的步骤。
 7 | 
 8 | MacOS 上可以方便的使用 brew 进行安装。
 9 | 
10 | 安装
11 | 
12 | 如果还没有安装Java, 可以先安装Java: 
13 | `brew cask install java`
14 | 
15 | 
16 | 然后安装zookeeper和kafka。
17 | 
18 | `brew install kafka`
19 | 
20 | `brew install zookeeper`
21 | 
22 | 修改 **/usr/local/etc/kafka/server.properties**, 找到 **listeners=PLAINTEXT://:9092** 那一行，把注释取消掉。
23 | 然后修改为:
24 | 
25 | `############################# Socket Server Settings #############################
26 | `# The address the socket server listens on. It will get the value returned from 
27 | `# java.net.InetAddress.getCanonicalHostName() if not configured.
28 | `#   FORMAT:
29 | `#     listeners = listener_name://host_name:port
30 | `#   EXAMPLE:
31 | `#     listeners = PLAINTEXT://your.host.name:9092
32 | `listeners=PLAINTEXT://localhost:9092`
33 | 
34 | 启动
35 | 
36 | 如果想以服务的方式启动，那么可以:
37 | 
38 | `$ brew services start zookeeper`
39 | 
40 | `$ brew services start kafka`
41 | 
42 | 如果只是临时启动，可以:
43 | `$ zkServer start`
44 | 
45 | `$ kafka-server-start /usr/local/etc/kafka/server.properties`
46 | 
47 | 创建Topic
48 | 
49 | `$ kafka-topics --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic flink000`
50 | 
51 | 查看所有topic
52 | 
53 | `
54 | kafka-topics --list --zookeeper localhost:2181
55 | `
56 | 
57 | 产生消息
58 | 
59 | `$ kafka-console-producer --broker-list localhost:9092 --topic flink000`
60 | 
61 | `>HELLO Kafka`
62 | 
63 | 消费
64 | 
65 | 简单方式:
66 | 
67 | `$ kafka-console-consumer --bootstrap-server localhost:9092 --topic flink000 --from-beginning
68 | `
69 | 
70 | 如果使用消费组:
71 | 
72 | `kafka-console-consumer --bootstrap-server localhost:9092 --topic flink000 --group test-consumer1 --from-beginning
73 | `
74 | 
75 | **_Producer_**：消息生产者。
76 | 
77 | **_Broker_**：kafka集群中的服务器。
78 | 
79 | **_Topic_**：消息的主题，可以理解为消息的分类，kafka的数据就保存在topic。在每个broker上都可以创建多个topic。
80 | 
81 | **_Partition_**：Topic的分区，每个topic可以有多个分区，分区的作用是做负载，提高kafka的吞吐量。
82 | 
83 | **_Replication_**：每一个分区都有多个副本，副本的作用是做备胎。当主分区（Leader）故障的时候会选择一个备胎（Follower）上位，成为Leader。在kafka中默认副本的最大数量是10个，且副本的数量不能大于Broker的数量，follower和leader绝对是在不同的机器，同一机器对同一个分区也只可能存放一个副本（包括自己）。
84 | 
85 | **_Consumer_**：消息消费者。
86 | 
87 | **_Consumer Group_**：我们可以将多个消费组组成一个消费者组，在kafka的设计中同一个分区的数据只能被消费者组中的某一个消费者消费。同一个消费者组的消费者可以消费同一个topic的不同分区的数据，这也是为了提高kafka的吞吐量！
88 | 
89 | **_Zookeeper_**：kafka集群依赖zookeeper来保存集群的的元信息，来保证系统的可用性。
90 | 
91 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/StreamTableWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api;
 2 | 
 3 | import org.apache.flink.api.common.functions.FilterFunction;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.datastream.DataStream;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.table.api.Table;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 | import org.apache.flink.types.Row;
13 | import org.apache.flink.util.Collector;
14 | 
15 | import java.util.Arrays;
16 | 
17 | public class StreamTableWordCount {
18 |     public static void main(String[] args) throws Exception {
19 | 
20 |         //实时dataStream api
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         //实时Table执行上下文
24 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
25 | 
26 |         //word count java scala
27 |         DataStreamSource<String> lines = env.socketTextStream("localhost", 4000);
28 | 
29 |         SingleOutputStreamOperator<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
30 |             @Override
31 |             public void flatMap(String value, Collector<String> out) throws Exception {
32 |                 Arrays.stream(value.split(" ")).forEach(out::collect);
33 |             }
34 |         });
35 | 
36 |         //将dataStream注册成表
37 |         Table table = tableEnv.fromDataStream(words, "word");
38 | 
39 |         System.out.printf("schema---", table.getSchema());
40 | 
41 |         Table table2 = table.groupBy("word").select("word, count(1) as counts");
42 | 
43 |         DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tableEnv.toRetractStream(table2, Row.class);
44 | 
45 | //        tuple2DataStream.print();
46 | 
47 | 
48 |         tuple2DataStream.filter(new FilterFunction<Tuple2<Boolean, Row>>() {
49 |             @Override
50 |             public boolean filter(Tuple2<Boolean, Row> value) throws Exception {
51 | 
52 |                 return value.f0;
53 |             }
54 |         }).print();
55 | 
56 |         env.execute("StreamTableWordCount");
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/spark/scoreApp/sinkScore.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.spark.scoreApp;
 2 | 
 3 | 
 4 | import java.io.File;
 5 | import java.io.FileWriter;
 6 | import java.io.IOException;
 7 | import java.util.Arrays;
 8 | import java.util.List;
 9 | import java.util.Objects;
10 | 
11 | /**
12 |  * @author bruce
13 |  * @date 2022年06月15日 15:19:41
14 |  * 编写Java程序，利用IO流向d:\\hadoop\score.txt写入5个同位3科成绩
15 |  */
16 | public class sinkScore {
17 |     private static final String sinkDir = "c:///Users/Bruce/Desktop/5/score.txt";
18 | 
19 | 
20 |     public static void main(String[] args) {
21 |         List<String> list = Arrays.asList(
22 |                 String.join(",", "1", "马克", "3403", "家园的治理：环境科学概论", "92", "2022年6月15日"),
23 |                 String.join(",", "1", "马克", "B0021001", "军事理论", "88", "2022年6月15日"),
24 |                 String.join(",", "1", "马克", "3509", "创业创新领导力", "76", "2022年6月14日"),
25 |                 String.join(",", "2", "刘晓莉", "3403", "家园的治理：环境科学概论", "89", "2022年6月15日"),
26 |                 String.join(",", "2", "刘晓莉", "B0021001", "军事理论", "82", "2022年6月15日"),
27 |                 String.join(",", "2", "刘晓莉", "3509", "创业创新领导力", "93", "2022年6月14日"),
28 |                 String.join(",", "3", "王博罗", "3403", "家园的治理：环境科学概论", "66", "2022年6月15日"),
29 |                 String.join(",", "3", "王博罗", "B0021001", "军事理论", "99", "2022年6月15日"),
30 |                 String.join(",", "3", "王博罗", "3509", "创业创新领导力", "95", "2022年6月14日")
31 | 
32 |         );
33 |         FileWriter writer = null;
34 |         try {
35 | 
36 |             File file  = new File(sinkDir);
37 |             if(!file.getParentFile().exists()){
38 |                 boolean result = file.getParentFile().mkdirs();
39 |                 if(!result){
40 |                     throw new RuntimeException("创建文件路径失败");
41 |                 }
42 |             }
43 |             writer = new FileWriter(file);
44 | 
45 |             for (String str : list) {
46 |                 writer.write(str);
47 |                 writer.write("\n");
48 |             }
49 |         }catch (Exception e) {
50 |             e.printStackTrace();
51 |         }finally {
52 |             if(Objects.nonNull(writer)){
53 |                 try {
54 |                     writer.flush();
55 |                     writer.close();
56 |                 } catch (IOException e) {
57 |                     e.printStackTrace();
58 |                 }
59 | 
60 |             }
61 |         }
62 | 
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/spark/scoreApp/sinkScoreAppended.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.spark.scoreApp;
 2 | 
 3 | 
 4 | import java.io.File;
 5 | import java.io.FileWriter;
 6 | import java.io.IOException;
 7 | import java.util.Arrays;
 8 | import java.util.List;
 9 | import java.util.Objects;
10 | 
11 | /**
12 |  * @author bruce
13 |  * @date 2022年06月15日 15:19:41
14 |  * 编写Java程序，利用IO流向d:\\hadoop\score.txt写入5个同位3科成绩
15 |  * 追加模式写入
16 |  */
17 | public class sinkScoreAppended {
18 |     private static final String sinkDir = "c:///Users/Bruce/Desktop/5/score.txt";
19 | 
20 | 
21 |     public static void main(String[] args) {
22 |         List<String> list = Arrays.asList(
23 |                 String.join(",", "4", "马云", "3403", "家园的治理：环境科学概论", "92", "2022年6月15日"),
24 |                 String.join(",", "4", "马云", "B0021001", "军事理论", "88", "2022年6月15日"),
25 |                 String.join(",", "4", "马云", "3509", "创业创新领导力", "76", "2022年6月14日"),
26 |                 String.join(",", "5", "赵散散", "3403", "家园的治理：环境科学概论", "89", "2022年6月15日"),
27 |                 String.join(",", "5", "赵散散", "B0021001", "军事理论", "82", "2022年6月15日"),
28 |                 String.join(",", "5", "赵散散", "3509", "创业创新领导力", "93", "2022年6月14日"),
29 |                 String.join(",", "6", "李科及", "3403", "家园的治理：环境科学概论", "66", "2022年6月15日"),
30 |                 String.join(",", "6", "李科及", "B0021001", "军事理论", "97", "2022年6月15日"),
31 |                 String.join(",", "6", "李科及", "3509", "创业创新领导力", "92", "2022年6月14日")
32 | 
33 |         );
34 |         FileWriter writer = null;
35 |         try {
36 | 
37 |             File file  = new File(sinkDir);
38 |             if(!file.getParentFile().exists()){
39 |                 boolean result = file.getParentFile().mkdirs();
40 |                 if(!result){
41 |                     throw new RuntimeException("创建文件路径失败");
42 |                 }
43 |             }
44 |             //第二个参数为true则追加
45 |             writer = new FileWriter(file,true);
46 | 
47 |             for (String str : list) {
48 |                 writer.write(str);
49 |                 writer.write("\n");
50 |             }
51 |         }catch (Exception e) {
52 |             e.printStackTrace();
53 |         }finally {
54 |             if(Objects.nonNull(writer)){
55 |                 try {
56 |                     writer.flush();
57 |                     writer.close();
58 |                 } catch (IOException e) {
59 |                     e.printStackTrace();
60 |                 }
61 | 
62 |             }
63 |         }
64 | 
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StateBackend1.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | 
11 | /**
12 |  * @author zhangyang
13 |  * 重启策略
14 |  */
15 | public class StateBackend1 {
16 | 
17 | 	public static void main(String[] args) throws Exception {
18 | 		 // step1 ：获取执行环境
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | 
21 |         //只有开启了checkpoint 才会有重启策略 默认是不重启
22 |         env.enableCheckpointing(5000);//每隔5s进行一次checkpoint
23 |         //默认的重启策略是无限重启  Integer.MAX_VALUE 次
24 | 
25 |         //重启重试次数
26 |         env.setRestartStrategy(org.apache.flink.api.common.restartstrategy.RestartStrategies.fixedDelayRestart(3,2000));
27 | 
28 |         //设置状态存储的后端,一般写在flink的配置文件中
29 | //        env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/np-backend"));
30 |         env.setStateBackend(new FsStateBackend("hdfs://localhost:9000/np-backend"));
31 | 
32 | 
33 |         //程序异常退出或者人为cancel以后，不删除checkpoint数据
34 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
35 | 
36 |         // step2：读取数据
37 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
38 | 
39 | 
40 |         //把单词和1拼一块
41 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = text.map(new MapFunction<String, Tuple2<String, Integer>>() {
42 |             @Override
43 |             public Tuple2<String, Integer> map(String value) throws Exception {
44 |                 if(value.startsWith("jeyy")){
45 |                     throw new RuntimeException("jeyy来了，发生异常！！");
46 |                 }
47 |                 return Tuple2.of(value, 1);
48 |             }
49 |         });
50 | 
51 |         //分组、累加
52 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
53 | 
54 | 
55 |         //sink
56 |         sumed.print();
57 | 
58 |         //execute
59 |         env.execute("StateBackend1");
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/monitorWarning/RTCarAnaly.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.monitorWarning
 2 | 
 3 | import java.util.Properties
 4 | 
 5 | import cn.northpark.flink.scala.transformApp.util._
 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 8 | import org.apache.flink.streaming.api.scala.function.WindowFunction
 9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
12 | import org.apache.flink.util.Collector
13 | import org.apache.kafka.common.serialization.StringDeserializer
14 | 
15 | /**
16 |   * 实时车辆区域分布统计
17 |   */
18 | object RTCarAnaly {
19 |   def main(args: Array[String]): Unit = {
20 |     val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
21 |     import org.apache.flink.streaming.api.scala._
22 | 
23 |     //kafka 配置
24 | 
25 |     val props = new Properties()
26 |     props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092")
27 |     props.setProperty("key.deserializer",classOf[StringDeserializer].getName)
28 |     props.setProperty("value.deserializer",classOf[StringDeserializer].getName)
29 |     props.setProperty("group.id","group112601xx")
30 | 
31 |     val ds: DataStream[String] = env.addSource(new FlinkKafkaConsumer[String]("monitortopic1125",new SimpleStringSchema(),props).setStartFromEarliest())
32 | 
33 |     val carDS: DataStream[MonitorCarInfo] = ds.map(line => {
34 |       val arr: Array[String] = line.split("\t")
35 |       MonitorCarInfo(arr(0), arr(1), arr(2), arr(3), arr(4).toLong, arr(5), arr(6).toDouble)
36 |     })
37 | 
38 |     //每个一分钟统计每个区域中的车辆总数
39 |     carDS.keyBy(_.areaId)
40 |       .timeWindow(Time.minutes(1))
41 |       .apply(new WindowFunction[MonitorCarInfo,String,String,TimeWindow] {
42 |         //key : 当前区域，window:当前窗口对象，input : 当前窗口内的数据，out : 回收数据对象
43 |         override def apply(key: String, window: TimeWindow, input: Iterable[MonitorCarInfo], out: Collector[String]): Unit = {
44 |           val carSet = scala.collection.mutable.Set[String]()
45 | 
46 |           val iter: Iterator[MonitorCarInfo] = input.iterator
47 |           while(iter.hasNext){
48 |             val mci: MonitorCarInfo = iter.next()
49 |             carSet.add(mci.car)
50 |           }
51 |           out.collect(s"窗口起始时间：${window.getStart} - ${window.getEnd},当前区域：${key} ,车辆总数为：${carSet.size}")
52 |         }
53 |       }).print()
54 | 
55 |     env.execute()
56 | 
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCountChain.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FilterFunction;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.util.Collector;
11 | 
12 | /**
13 |  * @author zhangyang
14 |  * 按照步骤来一步步拆分Task是如何划分的
15 |  * wc统计的数据我们源自于socket
16 |  */
17 | public class StreamingWordCountChain {
18 | 
19 | 	public static void main(String[] args) throws Exception {
20 | 		 // step1 ：获取执行环境
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
22 |         
23 |         // step2：读取数据
24 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
25 | 
26 | 
27 |         // 拆词
28 |         SingleOutputStreamOperator<String> words = text.flatMap(new FlatMapFunction<String, String>() {
29 |             @Override
30 |             public void flatMap(String value, Collector<String> out) throws Exception {
31 |                 String[] words = value.split(" ");
32 |                 for (String word : words) {
33 |                     out.collect(word);
34 |                 }
35 |             }
36 |         });
37 | 
38 |         //过滤
39 |         SingleOutputStreamOperator<String> filtered = words.filter(new FilterFunction<String>() {
40 |             @Override
41 |             public boolean filter(String value) throws Exception {
42 |                 return value.startsWith("a");
43 |             }
44 |         }).disableChaining();//将这个算子单独划分，生成一个Task，和其他的算子不再有operator chain
45 |                 //.startNewChain();//将这个开始划分，生成一个新的Task
46 | 
47 |         //把单词和1拼一块
48 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = filtered.map(new MapFunction<String, Tuple2<String, Integer>>() {
49 |             @Override
50 |             public Tuple2<String, Integer> map(String value) throws Exception {
51 |                 return Tuple2.of(value, 1);
52 |             }
53 |         });
54 | 
55 |         //分组、累加
56 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
57 | 
58 | 
59 |         //sink
60 |         sumed.print().setParallelism(2);
61 | 
62 |         //execute
63 |         env.execute("StreamingWordCount");
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/StreamingWordCountSharingGroup.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FilterFunction;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.util.Collector;
11 | 
12 | /**
13 |  * @author zhangyang
14 |  * Flink资源槽，默认的名字  都是default
15 |  */
16 | public class StreamingWordCountSharingGroup {
17 | 
18 | 	public static void main(String[] args) throws Exception {
19 | 		 // step1 ：获取执行环境
20 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
21 |         
22 |         // step2：读取数据
23 |         DataStreamSource<String> text = env.socketTextStream("localhost", 4000);
24 | 
25 | 
26 |         // 拆词
27 |         SingleOutputStreamOperator<String> words = text.flatMap(new FlatMapFunction<String, String>() {
28 |             @Override
29 |             public void flatMap(String value, Collector<String> out) throws Exception {
30 |                 String[] words = value.split(" ");
31 |                 for (String word : words) {
32 |                     out.collect(word);
33 |                 }
34 |             }
35 |         }).slotSharingGroup("ddd");
36 | 
37 |         //过滤
38 |         SingleOutputStreamOperator<String> filtered = words.filter(new FilterFunction<String>() {
39 |             @Override
40 |             public boolean filter(String value) throws Exception {
41 |                 return value.startsWith("a");
42 |             }
43 |         });//.disableChaining();//将这个算子单独划分，生成一个Task，和其他的算子不再有operator chain
44 |                 //.startNewChain();//将这个开始划分，生成一个新的Task
45 | 
46 |         //把单词和1拼一块
47 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = filtered.map(new MapFunction<String, Tuple2<String, Integer>>() {
48 |             @Override
49 |             public Tuple2<String, Integer> map(String value) throws Exception {
50 |                 return Tuple2.of(value, 1);
51 |             }
52 |         });
53 | 
54 |         //分组、累加
55 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
56 | 
57 | 
58 |         //sink
59 |         sumed.print();
60 | 
61 |         //execute
62 |         env.execute("StreamingWordCountSharingGroup");
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/exactly/overrideway/FlinkKafkaToRedis.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.exactly.overrideway;
 2 | 
 3 | import cn.northpark.flink.util.FlinkUtils;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 7 | import org.apache.flink.api.java.tuple.Tuple2;
 8 | import org.apache.flink.api.java.tuple.Tuple3;
 9 | import org.apache.flink.api.java.utils.ParameterTool;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.util.Collector;
13 | 
14 | /***
15 |  * Flink从kafka读取数据写入redis 并且实现exactly once
16 |  */
17 | public class FlinkKafkaToRedis {
18 | 
19 |     public static void main(String[] args) throws  Exception{
20 | 
21 | //        ParameterTool parameters = ParameterTool.fromArgs(args);
22 | 
23 |         ParameterTool parameters  = ParameterTool.fromPropertiesFile("/Users/bruce/Documents/workspace/np-flink/src/main/resources/config.properties");
24 | 
25 |         DataStream<String> kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class);
26 | 
27 | 
28 |         // 拆词
29 |         SingleOutputStreamOperator<String> words = kafkaStream.flatMap(new FlatMapFunction<String, String>() {
30 |             @Override
31 |             public void flatMap(String value, Collector<String> out) throws Exception {
32 |                 String[] words = value.split(" ");
33 |                 for (String word : words) {
34 |                     out.collect(word);
35 |                 }
36 |             }
37 |         });
38 | 
39 |         //把单词和1拼一块
40 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
41 |             @Override
42 |             public Tuple2<String, Integer> map(String value) throws Exception {
43 |                 return Tuple2.of(value, 1);
44 |             }
45 |         });
46 | 
47 |         //分组、累加
48 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
49 | 
50 |         sumed.map(new MapFunction<Tuple2<String, Integer>, Tuple3<String, String,String>>() {
51 |             @Override
52 |             public Tuple3<String, String, String> map(Tuple2<String, Integer> value) throws Exception {
53 |                 return Tuple3.of("NP-wordcount-sink-redis",value.f0,value.f1.toString() );
54 |             }
55 |         }).addSink(new MyRedisSink());
56 | 
57 |         FlinkUtils.getEnv().execute("FlinkKafkaToRedis");
58 | 
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/ConvertCSV2KafkaBean.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api;
 2 | 
 3 | import cn.northpark.flink.bean.Covid;
 4 | import cn.northpark.flink.util.KafkaString;
 5 | import com.alibaba.fastjson.JSON;
 6 | 
 7 | import java.io.BufferedReader;
 8 | import java.io.FileNotFoundException;
 9 | import java.io.FileReader;
10 | import java.io.IOException;
11 | 
12 | /**
13 |  * @author bruce
14 |  * @date 2022年05月08日 22:52:34
15 |  */
16 | public class ConvertCSV2KafkaBean {
17 |     public static void main(String[] args) {
18 |         String csvFile = "C:\\Users\\Bruce\\Desktop\\today_province_2022_04_15.csv";
19 |         BufferedReader br = null;
20 |         String line = "";
21 |         String cvsSplitBy = ",";
22 | 
23 |         try {
24 |             br = new BufferedReader(new FileReader(csvFile));
25 |             br.readLine(); // 提前读一下就跳过了
26 |             while ((line = br.readLine()) != null) {
27 |                 String[] split = line.split(",");
28 |                 String  var0  = split[0];
29 |                 String  var1  = split[1];
30 |                 String  var2  = split[2];
31 |                 String var3  = split[3] ;
32 |                 String var4  = split[4] ;
33 |                 String var5  = split[5] ;
34 |                 String var6  = split[6] ;
35 |                 String var7  = split[7] ;
36 |                 String var8  = split[8] ;
37 |                 String var9  = split[9] ;
38 |                 String var10 = split[10];
39 |                 String var11 = split[11];
40 |                 String var12 = split[12];
41 |                 String var13 = split[13];
42 |                 String var14 = split[14];
43 |                 String var15 = split[15];
44 |                 String var16 = split[16];
45 |                 String var17 = split[17];
46 | 
47 |                 Covid bean = new Covid(var0,var1,var2,var3,var4,var5,
48 |                         var6,var7,var8,var9,var10
49 |                         ,var11,var12,var13,var14,var15,var16,var17);
50 | 
51 |                 System.err.println(bean.toString());
52 | 
53 | 
54 | 
55 |                 KafkaString.sendKafkaString(KafkaString.buildBasicKafkaProperty(),"covid19", JSON.toJSONString(bean));
56 |             }
57 | 
58 |         } catch (FileNotFoundException e) {
59 |             e.printStackTrace();
60 |         } catch (IOException e) {
61 |             e.printStackTrace();
62 |         } finally {
63 |             if (br != null) {
64 |                 try {
65 |                     br.close();
66 |                 } catch (IOException e) {
67 |                     e.printStackTrace();
68 |                 }
69 |             }
70 |         }
71 | 
72 |     }
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/hadoop/MR/CarBean.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.hadoop.MR;
 2 | 
 3 | 
 4 | import org.apache.commons.lang3.StringUtils;
 5 | import org.apache.hadoop.io.Writable;
 6 | 
 7 | import java.io.DataInput;
 8 | import java.io.DataOutput;
 9 | import java.io.IOException;
10 | 
11 | /**
12 |  * @author bruce
13 |  * @date 2022年04月19日 21:29:12
14 |  */
15 | public class CarBean implements Writable {
16 | 
17 |     private String date;
18 |     private String upSpeed;
19 |     private String speed;
20 |     private String no;
21 | 
22 | 
23 |     public String getDate() {
24 |         return date;
25 |     }
26 | 
27 |     public void setDate(String date) {
28 |         this.date = date;
29 |     }
30 | 
31 |     public String getUpSpeed() {
32 |         return upSpeed;
33 |     }
34 | 
35 |     public void setUpSpeed(String upSpeed) {
36 |         this.upSpeed = upSpeed;
37 |     }
38 | 
39 |     public String getSpeed() {
40 |         return speed;
41 |     }
42 | 
43 |     public void setSpeed(String speed) {
44 |         this.speed = speed;
45 |     }
46 | 
47 |     public String getNo() {
48 |         return no;
49 |     }
50 | 
51 |     public void setNo(String no) {
52 |         this.no = no;
53 |     }
54 | 
55 |     @Override
56 |     public String toString() {
57 |         StringBuilder sb = new StringBuilder();
58 |         if (StringUtils.isNotEmpty(no)) {
59 |             sb.append(no).append("    ");
60 |         }
61 | 
62 |         if (StringUtils.isNotEmpty(date)) {
63 |             sb.append(date).append("    ");
64 |         }
65 |         if (StringUtils.isNotEmpty(upSpeed)) {
66 |             sb.append(upSpeed).append("    ");
67 |         }
68 |         if (StringUtils.isNotEmpty(speed)) {
69 |             sb.append(speed).append("    ");
70 |         }
71 | 
72 |         return sb.toString();
73 |     }
74 |     @Override
75 |     public void write(DataOutput dataOutput) throws IOException {
76 |         dataOutput.writeUTF(date    );
77 |         dataOutput.writeUTF(upSpeed );
78 |         dataOutput.writeUTF(speed   );
79 |         dataOutput.writeUTF(no      );
80 |     }
81 | 
82 |     @Override
83 |     public void readFields(DataInput dataInput) throws IOException {
84 |         this.date         =dataInput.readUTF();
85 |         this.upSpeed      =dataInput.readUTF();
86 |         this.speed        =dataInput.readUTF();
87 |         this.no           =dataInput.readUTF();
88 |     }
89 | 
90 |     public static void main(String[] args) {
91 |         CarBean bean = new CarBean();
92 |         bean.setDate("2022-4-20");
93 |         bean.setSpeed("222");
94 |         bean.setUpSpeed("0.618");
95 |         bean.setNo("6188");
96 |         System.err.println(bean.toString());
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/table_sql_api/stream/sql/IpLocation.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.table_sql_api.stream.sql;
 2 | 
 3 | import com.google.common.collect.Lists;
 4 | import org.apache.flink.api.java.tuple.Tuple4;
 5 | import org.apache.flink.table.functions.FunctionContext;
 6 | import org.apache.flink.table.functions.ScalarFunction;
 7 | import org.apache.flink.types.Row;
 8 | 
 9 | import java.io.BufferedReader;
10 | import java.io.File;
11 | import java.io.FileInputStream;
12 | import java.io.InputStreamReader;
13 | import java.util.List;
14 | 
15 | public class IpLocation extends ScalarFunction {
16 |     private List<Tuple4<Long, Long, String, String>> lines = Lists.newArrayList();
17 | 
18 |     @Override
19 |     public void open(FunctionContext context) throws Exception {
20 |         File cachedFile = context.getCachedFile("ip-rules");
21 | 
22 |         BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(cachedFile)));
23 | 
24 |         String line = null;
25 | 
26 |         while ((line = bufferedReader.readLine()) != null) {
27 |             String[] fields = line.split("[|]");
28 |             Long startNum = Long.parseLong(fields[2]);
29 |             Long endNum = Long.parseLong(fields[3]);
30 |             String province = fields[6];
31 |             String city = fields[7];
32 |             lines.add(Tuple4.of(startNum, endNum, province, city));
33 |         }
34 | 
35 |     }
36 | 
37 |     //必须
38 |     public Row eval(String ip) {
39 |         Long ipNum = ip2Long(ip);
40 |         return binarySearch(ipNum);
41 |     }
42 | 
43 |     public static Long ip2Long(String dottedIP) {
44 |         String[] addrArray = dottedIP.split("\\.");
45 |         long num = 0;
46 |         for (int i=0;i<addrArray.length;i++) {
47 |             int power = 3-i;
48 |             num += ((Integer.parseInt(addrArray[i]) % 256) * Math.pow(256,power));
49 |         }
50 |         return num;
51 |     }
52 | 
53 | 
54 |     private Row binarySearch(Long ipNum){
55 | 
56 |         Row result = null;
57 |         int index = -1;
58 |         int low = 0;
59 |         int high = lines.size() -1;
60 |         while (low<=high){
61 |             int middle = (low + high) /2;
62 |             if(ipNum>=lines.get(middle).f0 && ipNum<=lines.get(middle).f1){
63 |                 index = middle;
64 |             }
65 |             if(ipNum<lines.get(middle).f0){
66 |                 high = middle-1;
67 |             }else{
68 |                 low = middle+1;
69 |             }
70 |         }
71 |         if(index!=-1){
72 |             Tuple4<Long, Long, String, String> tp4 = lines.get(index);
73 |             result = Row.of(tp4.f2,tp4.f3);
74 |         }
75 |         return result;
76 |     }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/starrocks/bean/EventMsg.java:
--------------------------------------------------------------------------------
  1 | 
  2 | package cn.northpark.flink.starrocks.bean;
  3 | 
  4 | import com.google.gson.annotations.Expose;
  5 | import com.google.gson.annotations.SerializedName;
  6 | 
  7 | import javax.annotation.Generated;
  8 | 
  9 | @Generated("net.hexar.json2pojo")
 10 | @SuppressWarnings("unused")
 11 | public class EventMsg {
 12 | 
 13 |     @SerializedName("_flush_time")
 14 |     private Long _flushTime;
 15 |     @SerializedName("_track_id")
 16 |     private Long _trackId;
 17 |     @SerializedName("anonymous_id")
 18 |     private String anonymousId;
 19 |     @SerializedName("distinct_id")
 20 |     private String distinctId;
 21 |     @Expose
 22 |     private String event;
 23 |     @Expose
 24 |     private Identities identities;
 25 |     @Expose
 26 |     private Lib lib;
 27 |     @Expose
 28 |     private Properties properties;
 29 |     @Expose
 30 |     private Long time;
 31 |     @Expose
 32 |     private String type;
 33 | 
 34 |     public Long get_flushTime() {
 35 |         return _flushTime;
 36 |     }
 37 | 
 38 |     public void set_flushTime(Long _flushTime) {
 39 |         this._flushTime = _flushTime;
 40 |     }
 41 | 
 42 |     public Long get_trackId() {
 43 |         return _trackId;
 44 |     }
 45 | 
 46 |     public void set_trackId(Long _trackId) {
 47 |         this._trackId = _trackId;
 48 |     }
 49 | 
 50 |     public String getAnonymousId() {
 51 |         return anonymousId;
 52 |     }
 53 | 
 54 |     public void setAnonymousId(String anonymousId) {
 55 |         this.anonymousId = anonymousId;
 56 |     }
 57 | 
 58 |     public String getDistinctId() {
 59 |         return distinctId;
 60 |     }
 61 | 
 62 |     public void setDistinctId(String distinctId) {
 63 |         this.distinctId = distinctId;
 64 |     }
 65 | 
 66 |     public String getEvent() {
 67 |         return event;
 68 |     }
 69 | 
 70 |     public void setEvent(String event) {
 71 |         this.event = event;
 72 |     }
 73 | 
 74 |     public Identities getIdentities() {
 75 |         return identities;
 76 |     }
 77 | 
 78 |     public void setIdentities(Identities identities) {
 79 |         this.identities = identities;
 80 |     }
 81 | 
 82 |     public Lib getLib() {
 83 |         return lib;
 84 |     }
 85 | 
 86 |     public void setLib(Lib lib) {
 87 |         this.lib = lib;
 88 |     }
 89 | 
 90 |     public Properties getProperties() {
 91 |         return properties;
 92 |     }
 93 | 
 94 |     public void setProperties(Properties properties) {
 95 |         this.properties = properties;
 96 |     }
 97 | 
 98 |     public Long getTime() {
 99 |         return time;
100 |     }
101 | 
102 |     public void setTime(Long time) {
103 |         this.time = time;
104 |     }
105 | 
106 |     public String getType() {
107 |         return type;
108 |     }
109 | 
110 |     public void setType(String type) {
111 |         this.type = type;
112 |     }
113 | 
114 | }
115 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/EventTimeSessionWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.TimeCharacteristic;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows;
13 | import org.apache.flink.streaming.api.windowing.time.Time;
14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
15 | 
16 | /**
17 |  * Session窗口 :以2条数据的时间差来划分窗口,时间差>n,则触发窗口
18 |  * @author bruce
19 |  */
20 | public class EventTimeSessionWindow {
21 |     public static void main(String[] args) throws Exception {
22 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
23 | 
24 |         //设置使用EventTime作为时间标准
25 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
26 | 
27 |         SingleOutputStreamOperator<String> source = env.socketTextStream("localhost", 4000)
28 | 
29 |                 //提取时间字段
30 |                 .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
31 |                     @Override
32 |                     public long extractTimestamp(String line) {
33 |                         String[] fields = line.split(" ");
34 |                         return Long.parseLong(fields[0]);
35 |                     }
36 |                 });
37 | 
38 | 
39 |         //1000 spark 1
40 |         //1999 spark 2
41 |         //4999 java 3
42 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
43 |             @Override
44 |             public Tuple2<String, Integer> map(String value) throws Exception {
45 |                 String[] lines = value.split(" ");
46 |                 return Tuple2.of(lines[1],Integer.parseInt(lines[2]));
47 |             }
48 |         });
49 | 
50 |         //先分组
51 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
52 | 
53 |         //按照分组后分窗口
54 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5));
55 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(EventTimeSessionWindows.withGap( Time.seconds(5)));
56 | 
57 | 
58 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
59 | 
60 |         summed.print();
61 | 
62 |         env.execute("EventTimeSessionWindow");
63 | 
64 | 
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/MakeData.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.io.PrintWriter
 4 | import java.text.SimpleDateFormat
 5 | import java.util.Properties
 6 | 
 7 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator}
 8 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 9 | import org.apache.kafka.common.serialization.StringSerializer
10 | 
11 | import scala.util.Random
12 | 
13 | /**
14 |   * 模拟生成数据
15 |   *  1.将模拟的数据生成到文件中
16 |   *  2.将模拟的数据生成到Kafka中
17 |   *  区域id,道路id,卡扣id,摄像头id,拍摄时间，车辆信息，车辆速度
18 |   */
19 | object MakeData {
20 |   def main(args: Array[String]): Unit = {
21 |     //创建写入数据的文件
22 |     val pw = new PrintWriter("C:\\Users\\Bruce\\Desktop\\3\\trafficdata")
23 |     //创建kafka配置
24 |     val props = new Properties()
25 |     props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092")
26 |     props.setProperty("key.serializer",classOf[StringSerializer].getName)
27 |     props.setProperty("value.serializer",classOf[StringSerializer].getName)
28 |     props.setProperty("auto.offset.reset","latest")
29 |     //创建Kafka Producer
30 |     val producer = new KafkaProducer[String,String](props)
31 | 
32 |     //模拟3000辆车 京Axxxxx
33 |     val locations = Array[String]("京","津","冀","京","鲁","京","京","京","京","京")
34 |     val random = new Random()
35 |     val generator = new GaussianRandomGenerator(new JDKRandomGenerator())
36 | 
37 |     for(i <- 1 to 30000){
38 |       //模拟车辆
39 |       val car = locations(random.nextInt(10))+(65+random.nextInt(26)).toChar+random.nextInt(99999).formatted("%05d")
40 |       //模拟每辆车通过的卡扣数 ，一辆车每天通过卡扣数可能是大部分都不超过100个卡扣
41 |       val throuldMonitorCount = (generator.nextNormalizedDouble() * 100).abs.toInt
42 |       for(j <- 0 until throuldMonitorCount){
43 |         //通过的区域
44 |         val areaId = random.nextInt(8).formatted("%02d")
45 |         //通过的道路
46 |         val roadId = random.nextInt(50).formatted("%02d")
47 |         //通过的卡扣
48 |         val monitorId = random.nextInt(9999).formatted("%04d")
49 |         //通过的摄像头
50 |         val cameraId = random.nextInt(99999).formatted("%05d")
51 |         //摄像头拍摄时间,转换成时间戳
52 |         val yyyyMMddHHmmss =DateUtils.getCurrentDate() + " "+DateUtils.getRandomHour()+":"+DateUtils.getRandomMinutesOrSeconds+":"+DateUtils.getRandomMinutesOrSeconds
53 |         val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
54 |         val actionTime = format.parse(yyyyMMddHHmmss).getTime
55 | 
56 |         //拍摄车辆速度 ,大部分车辆速度位于60左右
57 |         val speed :Double = (generator.nextNormalizedDouble()*60).abs.formatted("%.2f").toDouble
58 | 
59 |         val info = s"${areaId}\t${roadId}\t${monitorId}\t${cameraId}\t${actionTime}\t${car}\t${speed}"
60 |         println(info)
61 |         //向文件中写入
62 |         pw.println(info)
63 |         //向kafka中写入
64 |         producer.send( new ProducerRecord[String,String]("flink_traffic5",info))
65 |       }
66 |     }
67 |     pw.close()
68 |     producer.close()
69 | 
70 |   }
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/scala/cn/northpark/spark/scoreStt.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.spark
 2 | 
 3 | import java.util.Properties
 4 | 
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
 8 | import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
 9 | 
10 | /**
11 |  * @author bruce
12 |  * @date 2022年06月15日 16:18:43
13 |  */
14 | object scoreStt {
15 | 
16 |   def main(args: Array[String]): Unit = {
17 | 
18 | 
19 |     val sparkSession: SparkSession = SparkSession.builder()
20 |       .appName("scoreStt")
21 |       .master("local")
22 |       .getOrCreate()
23 | 
24 |     val sparkContext: SparkContext = sparkSession.sparkContext
25 | 
26 |     var path = "C:\\Users\\Bruce\\Desktop\\5\\score.txt"
27 | 
28 |     val rdd1: RDD[String] = sparkContext.textFile(path)
29 | 
30 |     // map处理返回bean
31 |     val mapRDD: RDD[Row] = rdd1.map(line => {
32 |       val strings: Array[String] = line.split(",")
33 | 
34 |       var row: Row = null
35 | 
36 |       try {
37 |         val stuID: String = strings(0)
38 |         val stuName: String = strings(1)
39 |         val clzID: String = strings(2)
40 |         val clzName: String = strings(3)
41 |         val score: Double = strings(4).toDouble
42 |         val time: String = strings(5)
43 | 
44 |         // 注意，这里的类型，以及后续的structtype类型需要一一匹配，否则就会出错
45 |         row = Row(stuID, stuName, clzID, clzName,score,time )
46 |       } catch {
47 |         case e:Exception =>{
48 |           e.printStackTrace()
49 |         }
50 |       }
51 | 
52 |       row
53 |     }).filter(ele=> ele != null)
54 | 
55 |     // 创建结构化schema信息，注意这里要求是Seq，也就是有序集合，
56 |     // 因为需要按照顺序去解析每个列的字段信息
57 |     val structType: StructType = StructType(List(
58 |       StructField("stuID", DataTypes.StringType),
59 |       StructField("stuName", DataTypes.StringType, false),
60 |       StructField("clzID", DataTypes.StringType, false),
61 |       StructField("clzName", DataTypes.StringType, false),
62 |       StructField("score", DataTypes.DoubleType, false),
63 |       StructField("time", DataTypes.StringType, false)
64 |     ))
65 | 
66 |     // 通过RDD以及对应的schema信息，创建dataFrame对象
67 |     val dataFrame: DataFrame = sparkSession.createDataFrame(mapRDD, structType)
68 | 
69 |     // 打印schema信息
70 |     dataFrame.printSchema()
71 | 
72 |     //注册临时表
73 |     dataFrame.createTempView("tt_score")
74 | 
75 |     //查询所有数据并打印到控制台
76 |     sparkSession.sql("select * from tt_score").show()
77 | 
78 |     //统计平均分并打印
79 |     val avgScore = sparkSession.sql("select stuID,stuName,avg(score) from tt_score group by stuID,stuName ")
80 |     avgScore.show()
81 | 
82 | 
83 |     //把统计的平均分写入mysql
84 |     val prop = new Properties()
85 |     prop.put("user","root")
86 |     prop.put("password","123456")
87 |     avgScore.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://localhost:3306/spark","tt_score",prop)
88 | 
89 |     sparkSession.close()
90 | 
91 |     //======================================================
92 |   }
93 | 
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/scala/transformApp/util/MakeData1.scala:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.scala.transformApp.util
 2 | 
 3 | import java.io.PrintWriter
 4 | import java.text.SimpleDateFormat
 5 | import java.util.Properties
 6 | 
 7 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator}
 8 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 9 | import org.apache.kafka.common.serialization.StringSerializer
10 | 
11 | import scala.util.Random
12 | 
13 | /**
14 |   * 模拟生成数据
15 |   *  1.将模拟的数据生成到文件中
16 |   *  2.将模拟的数据生成到Kafka中
17 |   *  区域id,道路id,卡扣id,摄像头id,拍摄时间，车辆信息，车辆速度
18 |   */
19 | object MakeData1 {
20 |   def main(args: Array[String]): Unit = {
21 |     //创建写入数据的文件
22 |     val pw = new PrintWriter("C:\\Users\\Bruce\\Desktop\\3\\trafficdata")
23 |     //创建kafka配置
24 |     val props = new Properties()
25 |     props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092")
26 |     props.setProperty("key.serializer",classOf[StringSerializer].getName)
27 |     props.setProperty("value.serializer",classOf[StringSerializer].getName)
28 | 
29 |     //创建Kafka Producer
30 |     val producer = new KafkaProducer[String,String](props)
31 | 
32 |     //模拟3000辆车 京Axxxxx
33 |     val locations = Array[String]("京","津","冀","京","鲁","京","京","京","京","京")
34 |     val monitorIds = Array[String]("01","02")
35 |     val random = new Random()
36 |     val generator = new GaussianRandomGenerator(new JDKRandomGenerator())
37 | 
38 |     //往固定区域、道路、通道发送车辆消息
39 |     for(i <- 1 to 3000){
40 |       //模拟车辆
41 |       val car = locations(random.nextInt(10))+(65+random.nextInt(26)).toChar+random.nextInt(99999).formatted("%05d")
42 |       //模拟每辆车通过的卡扣数 ，一辆车每天通过卡扣数可能是大部分都不超过100个卡扣
43 |       val throuldMonitorCount = (generator.nextNormalizedDouble() * 100).abs.toInt
44 |       for(j <- 0 until throuldMonitorCount){
45 |         //通过的区域
46 |         val areaId = "02"
47 |         //通过的道路
48 |         val roadId = "02"
49 |         //通过的通道
50 |         val monitorId = monitorIds(random.nextInt(2))
51 |         //通过的摄像头
52 |         val cameraId = random.nextInt(99999).formatted("%05d")
53 |         //摄像头拍摄时间,转换成时间戳
54 |         val yyyyMMddHHmmss =DateUtils.getCurrentDate() + " "+DateUtils.getRandomHour()+":"+DateUtils.getRandomMinutesOrSeconds+":"+DateUtils.getRandomMinutesOrSeconds
55 |         val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
56 |         val actionTime = format.parse(yyyyMMddHHmmss).getTime
57 | 
58 |         //拍摄车辆速度 ,大部分车辆速度位于60左右
59 |         val speed :Double = (generator.nextNormalizedDouble()*60).abs.formatted("%.2f").toDouble
60 | 
61 |         val info = s"${areaId}\t${roadId}\t${monitorId}\t${cameraId}\t${actionTime}\t${car}\t${speed}"
62 |         println(info)
63 |         //向文件中写入
64 |         pw.println(info)
65 |         //向kafka中写入
66 |         producer.send( new ProducerRecord[String,String]("flink_traffic5",info))
67 | 
68 |         //2s发一条
69 | //        Thread.sleep(2000)
70 |       }
71 |     }
72 |     pw.close()
73 |     producer.close()
74 | 
75 |   }
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/EventTimeSlidingWindowWithWaterMark.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.TimeCharacteristic;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
13 | import org.apache.flink.streaming.api.windowing.time.Time;
14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
15 | 
16 | /**
17 |  * eventTime 滑动窗口
18 |  * 带 watermark水位线
19 |  *
20 |  * @author bruce
21 |  */
22 | public class EventTimeSlidingWindowWithWaterMark {
23 |     public static void main(String[] args) throws Exception {
24 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
25 | 
26 |         //设置使用EventTime作为时间标准
27 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
28 | 
29 |         SingleOutputStreamOperator<String> source = env.socketTextStream("localhost", 4000)
30 | 
31 |                 //提取时间字段
32 |                 .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
33 |                     @Override
34 |                     public long extractTimestamp(String line) {
35 |                         String[] fields = line.split(" ");
36 |                         return Long.parseLong(fields[0]);
37 |                     }
38 |                 });
39 | 
40 | 
41 |         //1000 spark 1
42 |         //1999 spark 2
43 |         //4999 java 3
44 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
45 |             @Override
46 |             public Tuple2<String, Integer> map(String value) throws Exception {
47 |                 String[] lines = value.split(" ");
48 |                 return Tuple2.of(lines[1],Integer.parseInt(lines[2]));
49 |             }
50 |         });
51 | 
52 |         //先分组
53 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
54 | 
55 |         //按照分组后分窗口
56 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5));
57 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(SlidingEventTimeWindows.of(Time.seconds(6), Time.seconds(2)));
58 | 
59 | 
60 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
61 | 
62 |         summed.print();
63 | 
64 |         env.execute("EventTimeTumblingWindow");
65 | 
66 | 
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/window/EventTimeTumblingWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.window;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.api.java.tuple.Tuple;
 5 | import org.apache.flink.api.java.tuple.Tuple2;
 6 | import org.apache.flink.streaming.api.TimeCharacteristic;
 7 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.datastream.WindowedStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
13 | import org.apache.flink.streaming.api.windowing.time.Time;
14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
15 | 
16 | /**
17 |  * eventTime 滚动窗口
18 |  * 如果使用的是并行的source，例如kafkaSource,创建kafka的Topic时有多个分区
19 |  * 每一个Source的分区都要满足触发的条件，整个窗口才会被触发
20 |  * @author bruce
21 |  */
22 | public class EventTimeTumblingWindow {
23 |     public static void main(String[] args) throws Exception {
24 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
25 | 
26 |         //设置使用EventTime作为时间标准
27 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
28 | 
29 |         SingleOutputStreamOperator<String> source = env.socketTextStream("localhost", 4000)
30 | 
31 |                 //提取时间字段
32 |                 .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<String>(Time.seconds(0)) {
33 |                     @Override
34 |                     public long extractTimestamp(String line) {
35 |                         String[] fields = line.split(" ");
36 |                         return Long.parseLong(fields[0]);
37 |                     }
38 |                 });
39 | 
40 | 
41 |         //1000 spark 1
42 |         //1999 spark 2
43 |         //4999 java 3
44 |         SingleOutputStreamOperator<Tuple2<String,Integer>> map = source.map(new MapFunction<String, Tuple2<String,Integer>>() {
45 |             @Override
46 |             public Tuple2<String, Integer> map(String value) throws Exception {
47 |                 String[] lines = value.split(" ");
48 |                 return Tuple2.of(lines[1],Integer.parseInt(lines[2]));
49 |             }
50 |         });
51 | 
52 |         //先分组
53 |         KeyedStream<Tuple2<String, Integer>, Tuple> keyed = map.keyBy(0);
54 | 
55 |         //按照分组后分窗口
56 | //        WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5));
57 |         WindowedStream<Tuple2<String, Integer>, Tuple, TimeWindow> window = keyed.window(TumblingEventTimeWindows.of(Time.seconds(5)));
58 | 
59 | 
60 |         SingleOutputStreamOperator<Tuple2<String, Integer>> summed = window.sum(1);
61 | 
62 |         summed.print();
63 | 
64 |         env.execute("EventTimeTumblingWindow");
65 | 
66 | 
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/OperatorStateAndKeyedState.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.common.functions.MapFunction;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
12 | import org.apache.flink.util.Collector;
13 | 
14 | import java.util.Properties;
15 | 
16 | /**
17 |  * @author zhangyang
18 |  * 按照步骤来一步步拆分Task是如何划分的
19 |  * wc统计的数据我们源自于socket
20 |  */
21 | public class OperatorStateAndKeyedState {
22 | 
23 | 	public static void main(String[] args) throws Exception {
24 | 
25 |         //1.环境
26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | 
28 |         env.enableCheckpointing(5000);
29 | 
30 |         env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
31 | 
32 |         Properties props = new Properties();
33 | 
34 |         //指定Kafka的Broker地址
35 |         props.setProperty( "bootstrap.servers", "localhost:9092");
36 |         //指定组ID
37 |         props.setProperty("group.id", "bruce");
38 |         //如果没有记录偏移量，第一次从最开始消费
39 |         props.setProperty("auto.offset.reset", "earliest") ;
40 |         //kafka的消费者不自动提交偏移量
41 |         props.setProperty("enable. auto. commit", "false");
42 | 
43 |         //2.read
44 |         FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props);
45 | 
46 | 
47 |         DataStream<String> lines = env.addSource(kafkaSource);
48 | 
49 |         // 拆词
50 |         SingleOutputStreamOperator<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
51 |             @Override
52 |             public void flatMap(String value, Collector<String> out) throws Exception {
53 |                 String[] words = value.split(" ");
54 |                 for (String word : words) {
55 |                     out.collect(word);
56 |                 }
57 |             }
58 |         });
59 | 
60 |         //把单词和1拼一块
61 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
62 |             @Override
63 |             public Tuple2<String, Integer> map(String value) throws Exception {
64 |                 return Tuple2.of(value, 1);
65 |             }
66 |         });
67 | 
68 |         //分组、累加
69 |         SingleOutputStreamOperator<Tuple2<String, Integer>> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1);
70 | 
71 | 
72 |         //sink
73 |         sumed.print();
74 | 
75 |         //execute
76 |         env.execute("OperatorStateAndKeyedState");
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/java/cn/northpark/flink/project3/MyExactlyOnceParaFileSource.java:
--------------------------------------------------------------------------------
 1 | package cn.northpark.flink.project3;
 2 | 
 3 | import org.apache.flink.api.common.state.ListState;
 4 | import org.apache.flink.api.common.state.ListStateDescriptor;
 5 | import org.apache.flink.api.common.typeinfo.TypeHint;
 6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 7 | import org.apache.flink.api.java.tuple.Tuple2;
 8 | import org.apache.flink.runtime.state.FunctionInitializationContext;
 9 | import org.apache.flink.runtime.state.FunctionSnapshotContext;
10 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
11 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
12 | 
13 | import java.io.RandomAccessFile;
14 | 
15 | public class MyExactlyOnceParaFileSource extends RichParallelSourceFunction<Tuple2<String,String>> implements CheckpointedFunction {
16 |     private String path;
17 | 
18 |     private boolean flag = true;
19 | 
20 |     private transient ListState<Long> offsetState;
21 | 
22 |     private Long offset = 0L;
23 | 
24 |     public MyExactlyOnceParaFileSource(String path) {
25 |         this.path = path;
26 |     }
27 | 
28 |     public MyExactlyOnceParaFileSource() {
29 |     }
30 | 
31 |     @Override
32 |     public void run(SourceContext ctx) throws Exception {
33 |         Iterable<Long> iterable = offsetState.get();
34 |         while(iterable.iterator().hasNext()){
35 |             offset = offsetState.get().iterator().next();
36 |         }
37 | 
38 |         int index = getRuntimeContext().getIndexOfThisSubtask();
39 | 
40 |         RandomAccessFile randomAccessFile = new RandomAccessFile(path +"/" +index +".txt","r");
41 | 
42 |         randomAccessFile.seek(offset);
43 | 
44 |         final  Object lock = ctx.getCheckpointLock();
45 | 
46 |         while (flag){
47 |             String line = randomAccessFile.readLine();
48 |             if(line!=null){
49 |                 line = new String(line.getBytes("ISO-8859-1"),"utf-8");
50 |                 synchronized (lock){
51 |                     offset = randomAccessFile.getFilePointer();
52 |                     ctx.collect(Tuple2.of(index+"",line));
53 |                 }
54 |             }else{
55 |                 Thread.sleep(2000);
56 |             }
57 |         }
58 |     }
59 | 
60 |     @Override
61 |     public void cancel() {
62 |         flag = true;
63 |     }
64 | 
65 |     @Override
66 |     public void snapshotState(FunctionSnapshotContext context) throws Exception {
67 |         //clear
68 |         offsetState.clear();
69 | 
70 |         //set offset
71 |         offsetState.add(offset);
72 |     }
73 | 
74 |     @Override
75 |     public void initializeState(FunctionInitializationContext context) throws Exception {
76 |         ListStateDescriptor<Long> stateDescriptor = new ListStateDescriptor<Long>("np-operator-state",
77 |                     TypeInformation.of(new TypeHint<Long>() {
78 |                     })
79 | //                Types.LONG
80 | //                Long.class
81 |         );
82 |        offsetState = context.getOperatorStateStore().getListState(stateDescriptor);
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------