├── .gitignore ├── src └── main │ ├── resources │ ├── A级景区经纬度.xlsx │ ├── 去哪儿网景点数据.csv │ ├── phoenix.properties │ ├── clickhouse.properties │ ├── sqlcfg.properties │ ├── oracle.properties │ ├── hikari.properties │ ├── config.properties │ ├── log4j.properties │ ├── druid.properties │ └── kafka.properties │ ├── java │ └── cn │ │ └── northpark │ │ ├── spark │ │ └── scoreApp │ │ │ ├── Score.txt │ │ │ ├── sinkScore.java │ │ │ └── sinkScoreAppended.java │ │ ├── flink │ │ ├── bean │ │ │ ├── UserVO.java │ │ │ ├── Product.java │ │ │ ├── StatisticsVO.java │ │ │ ├── Message.java │ │ │ ├── EventCatagoryProductCount.java │ │ │ └── Access.java │ │ ├── weiboAPP │ │ │ └── hbase │ │ │ │ ├── enums │ │ │ │ └── RelType.java │ │ │ │ ├── bean │ │ │ │ └── WeiboRelations.java │ │ │ │ ├── CreateTable.java │ │ │ │ ├── DelMany.java │ │ │ │ ├── DelOne.java │ │ │ │ ├── AddMony.java │ │ │ │ └── AddOne.java │ │ ├── util │ │ │ ├── RocksSaveable.java │ │ │ ├── ObjectUtil.java │ │ │ ├── RabbitMQUtils.java │ │ │ ├── FlinkUtilsV1.java │ │ │ ├── DruidUtils.java │ │ │ ├── RabbitMQConFactory.java │ │ │ └── HikariUtils.java │ │ ├── WordCount.java │ │ ├── PrintSink.java │ │ ├── topN │ │ │ └── spark │ │ │ │ └── passwordStasApp │ │ │ │ └── Test1.java │ │ ├── table_sql_api │ │ │ ├── WordCountBean.java │ │ │ ├── stream │ │ │ │ └── sql │ │ │ │ │ ├── Split.java │ │ │ │ │ ├── UDTFSQL.java │ │ │ │ │ ├── UDFSQL.java │ │ │ │ │ ├── KafkaWordCountSQL.java │ │ │ │ │ ├── udf │ │ │ │ │ └── UserBrowseLog.java │ │ │ │ │ └── IpLocation.java │ │ │ ├── batch │ │ │ │ ├── SQLWordCount.java │ │ │ │ ├── TableWordCount.java │ │ │ │ └── BatchSQLWordCountQueryCommon.java │ │ │ ├── StreamSqlWordCount.java │ │ │ ├── StreamTableWordCount.java │ │ │ └── ConvertCSV2KafkaBean.java │ │ ├── window │ │ │ ├── udf │ │ │ │ ├── TopNAggregateFunction.java │ │ │ │ └── TopNWindowFunction.java │ │ │ ├── CountWindowAll.java │ │ │ ├── SlidingWindowAll.java │ │ │ ├── TumblingWindowAll.java │ │ │ ├── CountWindow.java │ │ │ ├── SlidingWindow.java │ │ │ ├── SessionWindow.java │ │ │ ├── TumblingWindow.java │ │ │ ├── EventTimeSessionWindow.java │ │ │ ├── EventTimeSlidingWindowWithWaterMark.java │ │ │ └── EventTimeTumblingWindow.java │ │ ├── MerchantDayStaApp │ │ │ └── MerchantDaySta.java │ │ ├── join │ │ │ ├── CountBean.java │ │ │ ├── StreamDataSourceB.java │ │ │ ├── StreamDataSourceC.java │ │ │ └── StreamDataSourceA.java │ │ ├── starrocks │ │ │ └── bean │ │ │ │ ├── Identities.java │ │ │ │ ├── Lib.java │ │ │ │ └── EventMsg.java │ │ ├── timeout │ │ │ └── TimeOutResult.java │ │ ├── KeyBy1.java │ │ ├── project │ │ │ ├── RestfulActivityLocationsApplication.java │ │ │ ├── syncIO │ │ │ │ ├── AsyncMysqlApplication.java │ │ │ │ ├── AsyncRestfulApplication.java │ │ │ │ ├── SinkToMysqlApplication.java │ │ │ │ └── function │ │ │ │ │ └── NP_MySqlSinkFunction.java │ │ │ ├── function │ │ │ │ └── MysqlToActivityBeanFunciton.java │ │ │ ├── InspectSitemap.java │ │ │ └── MysqlActivityNameApplication.java │ │ ├── project3 │ │ │ ├── MyParaFileSource.java │ │ │ ├── OperatorStateTest.java │ │ │ └── MyExactlyOnceParaFileSource.java │ │ ├── KeyBy2Bean.java │ │ ├── clickhouse │ │ │ └── ReadSQL.java │ │ ├── oracle │ │ │ ├── FlinkKafkaLinkOracleSource.java │ │ │ ├── OracleToTupleFunciton.java │ │ │ ├── SinkOracle.java │ │ │ └── FlinkKafkaSink.java │ │ ├── project2 │ │ │ ├── NP_ParallelismFileSource.java │ │ │ └── OperatorState1.java │ │ ├── BatchWC.java │ │ ├── exactly │ │ │ ├── overrideway │ │ │ │ ├── MyRedisSink.java │ │ │ │ └── FlinkKafkaToRedis.java │ │ │ └── transactionway │ │ │ │ ├── FlinkKafkaPrint.java │ │ │ │ └── FlinkKafkaToOracle.java │ │ ├── KafkaSource.java │ │ ├── TT.java │ │ ├── KeyBy3.java │ │ ├── RestartStrategy2.java │ │ ├── AddSink1.java │ │ ├── StreamingWordCountSocket.java │ │ ├── StreamingWordCount2.java │ │ ├── RestartStrategies1.java │ │ ├── StreamingWordCountParam.java │ │ ├── StreamingWordCount.java │ │ ├── KafkaSourceV2.java │ │ ├── StateBackend2.java │ │ ├── StateBackend1.java │ │ ├── StreamingWordCountChain.java │ │ ├── StreamingWordCountSharingGroup.java │ │ └── OperatorStateAndKeyedState.java │ │ └── hadoop │ │ └── MR │ │ ├── covid │ │ ├── CovidReducer.java │ │ ├── CovidMapper.java │ │ ├── Covid.java │ │ └── CovidApp.java │ │ └── CarBean.java │ ├── scala │ ├── yxlm │ │ └── LolApp.scala │ ├── transformApp │ │ ├── util │ │ │ ├── GuassTest.scala │ │ │ ├── DateUtils.scala │ │ │ ├── HBaseSink.scala │ │ │ ├── KafkaSink.scala │ │ │ ├── TrafficEntity.scala │ │ │ ├── JDBCSink.scala │ │ │ ├── GlobalEntity.scala │ │ │ ├── MakeData.scala │ │ │ └── MakeData1.scala │ │ └── monitorWarning │ │ │ └── RTCarAnaly.scala │ ├── cn │ │ └── northpark │ │ │ └── spark │ │ │ ├── StreamWordCountScala.scala │ │ │ └── scoreStt.scala │ └── suicideApp │ │ └── HdfsTest.java │ └── python │ ├── LogisticRegression.py │ ├── gendata.py │ ├── KmeansGroup2.py │ └── KmeansGroup.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | .idea 3 | /backEnd/ 4 | *.log 5 | redis.properties 6 | *.iml -------------------------------------------------------------------------------- /src/main/resources/A级景区经纬度.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/resources/A级景区经纬度.xlsx -------------------------------------------------------------------------------- /src/main/resources/去哪儿网景点数据.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/resources/去哪儿网景点数据.csv -------------------------------------------------------------------------------- /src/main/java/cn/northpark/spark/scoreApp/Score.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liuhouer/np-flinks/HEAD/src/main/java/cn/northpark/spark/scoreApp/Score.txt -------------------------------------------------------------------------------- /src/main/resources/phoenix.properties: -------------------------------------------------------------------------------- 1 | # 数据库驱动|不填写HikariCp会自动识别 2 | driverClassName=org.apache.phoenix.jdbc.PhoenixDriver 3 | # 访问数据库连接 4 | jdbcUrl=jdbc:phoenix:node1:2181:/hbase 5 | -------------------------------------------------------------------------------- /src/main/scala/yxlm/LolApp.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.yxlm 2 | 3 | /** 4 | * 5 | * @author bruce 6 | * @date 2022年04月18日 09:34:02 7 | */ 8 | class LolApp { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/resources/clickhouse.properties: -------------------------------------------------------------------------------- 1 | # 数据库驱动|不填写HikariCp会自动识别 2 | driverClassName=ru.yandex.clickhouse.ClickHouseDriver 3 | # 访问数据库连接 4 | #jdbcUrl=jdbc:clickhouse://localhost:8123/ 5 | jdbcUrl=jdbc:clickhouse://node1:8123/ 6 | # 数据库用户名 7 | #username=test 8 | # 数据库密码 9 | #password=test 10 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/UserVO.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | import java.io.Serializable; 4 | 5 | public class UserVO implements Serializable { 6 | 7 | 8 | public Integer id; 9 | 10 | public String username; 11 | 12 | public String email; 13 | 14 | 15 | } -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/enums/RelType.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase.enums; 2 | 3 | /** 4 | * @author bruce 5 | * @date 2022年06月26日 10:58:15 6 | */ 7 | public interface RelType { 8 | 9 | String REPLY = "reply";// 回复 10 | String TRANS_LINK = "transLink";// 转发 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/RocksSaveable.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.util; 2 | 3 | import org.rocksdb.RocksDB; 4 | 5 | /** 6 | * @author zhangyang 7 | * @date 2020年07月22日 17:58:03 8 | */ 9 | public interface RocksSaveable { 10 | void save(RocksDB rocksDB) throws Exception; 11 | 12 | void deleteFromRocks(RocksDB rocksDB) throws Exception; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/resources/sqlcfg.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #sql configs here 3 | ################################################################################ 4 | 5 | #sql query configs 6 | sql=select word,sum(counts) as counts from word_count group by word having sum(counts) >=2 order by counts desc 7 | table=word_count 8 | columns=word,counts 9 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/Product.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | public class Product { 4 | 5 | public String category; 6 | public String name; 7 | 8 | @Override 9 | public String toString() { 10 | return "Product{" + 11 | "category='" + category + '\'' + 12 | ", name='" + name + '\'' + 13 | '}'; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/StatisticsVO.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | import java.util.Map; 4 | 5 | /** 6 | * @author bruce 7 | * @date 2021年11月17日 16:24:01 8 | */ 9 | public class StatisticsVO { 10 | public String url ; 11 | public String method ; 12 | public String ip ; 13 | public String class_method ; 14 | public String args ; 15 | 16 | public UserVO userVO; 17 | 18 | public Map cookieMap; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/GuassTest.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator} 4 | 5 | object GaussTest { 6 | def main(args: Array[String]): Unit = { 7 | val generator = new GaussianRandomGenerator(new JDKRandomGenerator()) 8 | for(i <- 1 to 100){ 9 | val result: Int = (generator.nextNormalizedDouble() * 100).abs.toInt 10 | println(result) 11 | } 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/resources/oracle.properties: -------------------------------------------------------------------------------- 1 | # 数据库驱动|不填写HikariCp会自动识别 2 | driverClassName=oracle.jdbc.driver.OracleDriver 3 | # 访问数据库连接 4 | jdbcUrl=jdbc:oracle:thin:@localhost:1521:test 5 | # 数据库用户名 6 | username=test 7 | # 数据库密码 8 | password=test 9 | # 最大连接数 10 | maximumPoolSize=30 11 | # 连接池空闲连接的最小数量 12 | minimumIdle=5 13 | # 开启事务自动提交 14 | autoCommit=false 15 | # 是否自定义配置,为true时下面两个参数才生效 16 | dataSource.cachePrepStmts=true 17 | # 连接池大小默认25,官方推荐250-500 18 | dataSource.prepStmtCacheSize=250 19 | # 单条语句最大长度默认256,官方推荐2048 20 | dataSource.prepStmtCacheSqlLimit=2048 21 | 22 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/Message.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | /** 4 | * @author bruce 5 | * @date 2024年01月25日 13:44:13 6 | */ 7 | public class Message { 8 | private String msgType; 9 | private String body; 10 | 11 | public String getMsgType() { 12 | return msgType; 13 | } 14 | 15 | public void setMsgType(String msgType) { 16 | this.msgType = msgType; 17 | } 18 | 19 | public String getBody() { 20 | return body; 21 | } 22 | 23 | public void setBody(String body) { 24 | this.body = body; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/main/resources/hikari.properties: -------------------------------------------------------------------------------- 1 | # 数据库驱动|不填写HikariCp会自动识别 2 | driverClassName=com.mysql.jdbc.Driver 3 | # 访问数据库连接 4 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true 5 | # 数据库用户名 6 | username=root 7 | # 数据库密码 8 | password=123456 9 | # 最大连接数 10 | maximumPoolSize=30 11 | # 连接池空闲连接的最小数量 12 | minimumIdle=5 13 | # 开启事务自动提交 14 | autoCommit=false 15 | 16 | # 是否自定义配置,为true时下面两个参数才生效 17 | dataSource.cachePrepStmts=true 18 | # 连接池大小默认25,官方推荐250-500 19 | dataSource.prepStmtCacheSize=250 20 | # 单条语句最大长度默认256,官方推荐2048 21 | dataSource.prepStmtCacheSqlLimit=2048 -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/WordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | public class WordCount { 4 | public String word; 5 | public Integer counts; 6 | 7 | public WordCount() { 8 | } 9 | 10 | public WordCount(String word, Integer counts) { 11 | this.word = word; 12 | this.counts = counts; 13 | } 14 | 15 | public static WordCount of(String word, Integer counts) { 16 | return new WordCount(word, counts); 17 | } 18 | 19 | @Override 20 | public String toString() { 21 | return "WordCount{" + 22 | "word='" + word + '\'' + 23 | ", counts=" + counts + 24 | '}'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/PrintSink.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * @author bruce 8 | * 揭秘subTask的编号 9 | */ 10 | public class PrintSink { 11 | 12 | public static void main(String[] args) throws Exception { 13 | 14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | 16 | DataStreamSource source = env.socketTextStream("localhost", 4000); 17 | 18 | source.print("the res is ").setParallelism(2); 19 | 20 | env.execute("PrintSink"); 21 | 22 | 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/topN/spark/passwordStasApp/Test1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.topN.spark.passwordStasApp; 2 | 3 | import java.util.StringTokenizer; 4 | 5 | /** 6 | * @author bruce 7 | * @date 2022年06月25日 12:21:23 8 | */ 9 | public class Test1 { 10 | 11 | public static void main(String[] args) { 12 | String exp = "shibazi_lin@126.com\t6584596"; 13 | // 去掉所有的键盘上的不可输入字符,不包括双字节的,32-126 14 | String pattern = "[^\040-\176]"; 15 | String line = exp.toString().replaceAll(pattern, " "); 16 | StringTokenizer itr = new StringTokenizer(line); 17 | 18 | while (itr.hasMoreTokens()) { 19 | System.err.println(itr.nextToken()); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/WordCountBean.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api; 2 | 3 | public class WordCountBean { 4 | public String word; 5 | public int counts; 6 | 7 | public WordCountBean() { 8 | } 9 | 10 | public WordCountBean(String word, int counts) { 11 | this.word = word; 12 | this.counts = counts; 13 | } 14 | 15 | public static WordCountBean of(String word, int counts) { 16 | return new WordCountBean(word, counts); 17 | } 18 | 19 | @Override 20 | public String toString() { 21 | return "WordCount{" + 22 | "word='" + word + '\'' + 23 | ", counts=" + counts + 24 | '}'; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/udf/TopNAggregateFunction.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window.udf; 2 | 3 | import cn.northpark.flink.bean.Access; 4 | import org.apache.flink.api.common.functions.AggregateFunction; 5 | 6 | public class TopNAggregateFunction implements AggregateFunction { 7 | @Override 8 | public Long createAccumulator() { 9 | return 0L; 10 | } 11 | 12 | @Override 13 | public Long add(Access value, Long accumulator) { 14 | return accumulator + 1; 15 | } 16 | 17 | @Override 18 | public Long getResult(Long accumulator) { 19 | return accumulator; 20 | } 21 | 22 | @Override 23 | public Long merge(Long a, Long b) { 24 | return null; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/Split.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.stream.sql; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.table.functions.TableFunction; 6 | import org.apache.flink.types.Row; 7 | 8 | public class Split extends TableFunction { 9 | private String separator = ","; 10 | public Split(String separator) { 11 | this.separator = separator; 12 | } 13 | public void eval(String line){ 14 | for (String s: line.split(separator)){ 15 | collect(Row.of(s)); 16 | } 17 | } 18 | 19 | @Override 20 | public TypeInformation getResultType() { 21 | return Types.ROW(Types.STRING); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/MerchantDayStaApp/MerchantDaySta.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.MerchantDayStaApp; 2 | 3 | import lombok.Data; 4 | 5 | 6 | /** 7 | * @author bruce 8 | * @date 2022年05月09日 18:06:54 9 | */ 10 | 11 | @Data 12 | public class MerchantDaySta { 13 | 14 | private String merchantId; 15 | 16 | private Double totalDeductMoney; 17 | 18 | public MerchantDaySta() { 19 | } 20 | 21 | public MerchantDaySta(String merchantId, Double totalDeductMoney) { 22 | this.merchantId = merchantId; 23 | this.totalDeductMoney = totalDeductMoney; 24 | } 25 | 26 | @Override 27 | public String toString() { 28 | return "MerchantDaySta{" + 29 | "merchantId='" + merchantId + '\'' + 30 | ", totalDeductMoney=" + totalDeductMoney + 31 | '}'; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/bean/WeiboRelations.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase.bean; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author bruce 7 | * @date 2022年06月26日 10:56:13 8 | */ 9 | @Data 10 | public class WeiboRelations { 11 | private String user_id; 12 | 13 | /** 14 | * reply:回复 15 | * transLink:转发 16 | */ 17 | private String rel_type; 18 | private String rel_user_id; 19 | 20 | /** 21 | * 1 : 被转发/被回复 22 | * 0 : 主动转发/主动回复 23 | */ 24 | private int by_type; 25 | private WeiboRelations(){}; 26 | 27 | public WeiboRelations(String user_id, String rel_type, String rel_user_id ,int by_type) { 28 | this.user_id = user_id; 29 | this.rel_type = rel_type; 30 | this.rel_user_id = rel_user_id; 31 | this.by_type = by_type; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/hadoop/MR/covid/CovidReducer.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.hadoop.MR.covid; 2 | 3 | import org.apache.hadoop.io.Text; 4 | import org.apache.hadoop.mapreduce.Reducer; 5 | 6 | import java.io.IOException; 7 | 8 | /** 9 | * @author bruce 10 | * @date 2023年03月20日 13:59:45 11 | */ 12 | public class CovidReducer extends Reducer { 13 | @Override 14 | public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { 15 | int totalCases = 0; 16 | int totalDeaths = 0; 17 | 18 | for (Covid value : values) { 19 | totalCases += value.getNewCases(); 20 | totalDeaths += value.getNewDeaths(); 21 | } 22 | 23 | String result = "[" + totalCases + ", " + totalDeaths + "]"; 24 | context.write(key, new Text(result)); 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/DateUtils.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.text.SimpleDateFormat 4 | import java.util.Date 5 | 6 | import scala.util.Random 7 | 8 | object DateUtils { 9 | //获取当前天的日期 10 | def getCurrentDate() = { 11 | val sdf = new SimpleDateFormat("yyyy-MM-dd") 12 | sdf.format(new Date()) 13 | } 14 | //随机获取一个小时 15 | def getRandomHour() = { 16 | val random = new Random() 17 | random.nextInt(24).formatted("%02d") 18 | } 19 | //随机获取分钟或者秒 20 | def getRandomMinutesOrSeconds() = { 21 | val random = new Random() 22 | random.nextInt(60).formatted("%02d") 23 | } 24 | 25 | //根据时间戳转换成 yyyy-mm-dd HH:mm:ss 数据格式 26 | def timestampToDataStr(timeStamp:Long): String = { 27 | val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") 28 | sdf.format(new Date(timeStamp)) 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/EventCatagoryProductCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | public class EventCatagoryProductCount { 4 | 5 | public String event; 6 | public String catagory; 7 | public String product; 8 | public long count; 9 | public long start; 10 | public long end; 11 | 12 | public EventCatagoryProductCount() { 13 | } 14 | 15 | public EventCatagoryProductCount(String event, String catagory, String product, long count, long start, long end) { 16 | this.event = event; 17 | this.catagory = catagory; 18 | this.product = product; 19 | this.count = count; 20 | this.start = start; 21 | this.end = end; 22 | } 23 | 24 | @Override 25 | public String toString() { 26 | return event + "\t" + catagory + "\t" + product + "\t" + count + "\t" + start + "\t" + end; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/join/CountBean.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.join; 2 | 3 | public class CountBean { 4 | 5 | /** 6 | * 设置为 public 7 | */ 8 | public String name; 9 | /** 10 | * 设置为 public 11 | */ 12 | public long number; 13 | 14 | public CountBean() { 15 | } 16 | 17 | public CountBean(String name, long number) { 18 | this.name = name; 19 | this.number = number; 20 | } 21 | 22 | public String getName() { 23 | return name; 24 | } 25 | 26 | public void setName(String name) { 27 | this.name = name; 28 | } 29 | 30 | public long getNumber() { 31 | return number; 32 | } 33 | 34 | public void setNumber(int number) { 35 | this.number = number; 36 | } 37 | 38 | @Override 39 | public String toString() { 40 | return this.name + ":" + this.number; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/python/LogisticRegression.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import LogisticRegression 2 | import pandas as pd 3 | from sklearn.model_selection import train_test_split 4 | 5 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径 6 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称 7 | data=pd.read_csv(path,header=None,names=Cname) 8 | data.index.name='index' #datadrame结构的行索引与列索引名字 9 | data.columns.name='columns' 10 | print(data) 11 | 12 | X=data[data.columns[1:17]] #提取特征值,不需要第一列的字母值 13 | x_train,x_test,y_train,y_test=train_test_split(X,data["字母"],train_size=0.8,random_state=77) 14 | #设置最大迭代次数为4000,默认为1000.不更改会出现警告提示 15 | lr=LogisticRegression(max_iter=4000) 16 | clm=lr.fit(x_train,y_train) #输入训练集 17 | print('对测试集的预测结果:') 18 | #输出预测结果、预测结果的结构类型及尺寸 19 | print(clm.predict(x_test),type(clm.predict(x_test)),clm.predict(x_test).shape) 20 | # 21 | print('模型评分:'+ str(clm.score(x_test,y_test))) #用决定系数来打分 -------------------------------------------------------------------------------- /src/main/python/gendata.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | # 机构等级 4 | hos_levels = ['一级:1', '二级:2', '三级:3'] 5 | 6 | # 收入类别 7 | income_categories = ['药品收入:yp', '耗材收入:hc', '检查检验:jcjy', '化验:hy', '医疗服务:yl', '其他:qt'] 8 | 9 | # 机构编码和名称 10 | hos_codes = ['hos001', 'hos002', 'hos003', 'hos004'] 11 | hos_names = ['医院A', '医院B', '医院C', '医院D'] 12 | 13 | # 生成insert语句 14 | for i in range(36): 15 | year_mon = f'2019{i+1:02d}' 16 | for j in range(100): 17 | hos_level = random.choice(hos_levels) 18 | hos_code = random.choice(hos_codes) 19 | hos_name = random.choice(hos_names) 20 | income_category = random.choice(income_categories) 21 | income_amount = round(random.uniform(1000, 10000), 2) 22 | sql = f"INSERT INTO med_ins_income (year, year_mon, hos_level, hos_code, hos_name, income_category, income_amount) VALUES ('2019', '{year_mon}', '{hos_level}', '{hos_code}', '{hos_name}', '{income_category}', {income_amount});" 23 | print(sql) -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/CreateTable.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase; 2 | 3 | import cn.northpark.flink.util.PhoenixUtilV2; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Objects; 8 | import java.util.stream.Collectors; 9 | 10 | /** 11 | * @author bruce 12 | * @date 2022年06月26日 10:03:33 13 | */ 14 | public class CreateTable { 15 | 16 | public static void main(String[] args) { 17 | 18 | //1.建表 19 | String t_weibo_relations_sql = "CREATE TABLE \"stt\".T_WEIBO_RELATIONS_V2 (\n" + 20 | " ID VARCHAR NOT NULL,\n" + 21 | " USER_ID VARCHAR ,\n" + 22 | " REL_TYPE VARCHAR,\n" + 23 | " REL_USER_ID VARCHAR,\n" + 24 | " BY_TYPE INTEGER\n" + 25 | " CONSTRAINT PK PRIMARY KEY (ID)\n" + 26 | ")"; 27 | PhoenixUtilV2.createTable(t_weibo_relations_sql); 28 | 29 | 30 | 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/starrocks/bean/Identities.java: -------------------------------------------------------------------------------- 1 | 2 | package cn.northpark.flink.starrocks.bean; 3 | 4 | import com.google.gson.annotations.SerializedName; 5 | 6 | import javax.annotation.Generated; 7 | 8 | @Generated("net.hexar.json2pojo") 9 | @SuppressWarnings("unused") 10 | public class Identities { 11 | 12 | @SerializedName("$identity_anonymous_id") 13 | private String $identityAnonymousId; 14 | @SerializedName("$identity_mp_id") 15 | private String $identityMpId; 16 | 17 | public String get$identityAnonymousId() { 18 | return $identityAnonymousId; 19 | } 20 | 21 | public void set$identityAnonymousId(String $identityAnonymousId) { 22 | this.$identityAnonymousId = $identityAnonymousId; 23 | } 24 | 25 | public String get$identityMpId() { 26 | return $identityMpId; 27 | } 28 | 29 | public void set$identityMpId(String $identityMpId) { 30 | this.$identityMpId = $identityMpId; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/hadoop/MR/covid/CovidMapper.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.hadoop.MR.covid; 2 | 3 | import org.apache.hadoop.io.LongWritable; 4 | import org.apache.hadoop.io.Text; 5 | import org.apache.hadoop.mapreduce.Mapper; 6 | 7 | import java.io.IOException; 8 | import java.util.Objects; 9 | 10 | /** 11 | * @author bruce 12 | * @date 2023年03月20日 13:58:54 13 | */ 14 | public class CovidMapper extends Mapper { 15 | @Override 16 | public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 17 | String[] data = value.toString().split(","); 18 | String country = data[2]; 19 | int newCases = Integer.parseInt(Objects.nonNull(data[4])?data[4]:"0"); 20 | int newDeaths = Integer.parseInt(Objects.nonNull(data[6])?data[6]:"0"); 21 | 22 | if (country.equals("China") || country.equals("United States of America")) { 23 | context.write(new Text(country), new Covid(newCases, newDeaths)); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/resources/config.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #configs here 3 | ################################################################################ 4 | 5 | #Kafka configs 6 | topics=flink005 7 | group.id=bruce 8 | bootstrap.servers=localhost:9092 9 | auto.offset.reset=earliest 10 | enable.auto.commit=true 11 | 12 | #kafka for browse demo 13 | kafkaBootstrapServers=localhost:9092 14 | browseTopic=flink4b 15 | browseTopicGroupID=bruce 16 | 17 | #kafka for northpark movie 18 | npKafkaBootstrapServers=np:9092 19 | npTopic=northpark 20 | npTopicGroupID=bruce 21 | 22 | 23 | 24 | #flink configs 25 | checkpoint.interval=10000 26 | 27 | 28 | #redis 29 | redis.host=localhost 30 | redis.pwd= 31 | redis.db=0 32 | 33 | #jdbc config 34 | # 数据库驱动|不填写HikariCp会自动识别 35 | driverClassName=com.mysql.jdbc.Driver 36 | # 访问数据库连接 37 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true 38 | # 数据库用户名 39 | username=root 40 | # 数据库密码 41 | password=123456 -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/join/StreamDataSourceB.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.join; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | 7 | public class StreamDataSourceB extends RichParallelSourceFunction> { 8 | private volatile boolean running = true; 9 | 10 | @Override 11 | public void run(SourceContext> ctx) throws InterruptedException { 12 | 13 | Tuple3[] elements = new Tuple3[]{ 14 | Tuple3.of("a", "hangzhou", 1000000059000L), 15 | Tuple3.of("b", "beijing", 1000000105000L), 16 | }; 17 | 18 | int count = 0; 19 | while (running && count < elements.length) { 20 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (long) elements[count].f2)); 21 | count++; 22 | Thread.sleep(1000); 23 | } 24 | } 25 | 26 | @Override 27 | public void cancel() { 28 | running = false; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/udf/TopNWindowFunction.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window.udf; 2 | 3 | import cn.northpark.flink.bean.EventCatagoryProductCount; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction; 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 7 | import org.apache.flink.util.Collector; 8 | 9 | public class TopNWindowFunction implements WindowFunction, TimeWindow> { 10 | @Override 11 | public void apply(Tuple3 value, TimeWindow window, Iterable input, Collector out) throws Exception { 12 | 13 | String event = value.f0; 14 | String catagory = value.f1; 15 | String product = value.f2; 16 | Long count = input.iterator().next(); 17 | long start = window.getStart(); 18 | long end = window.getEnd(); 19 | 20 | out.collect(new EventCatagoryProductCount(event, catagory, product,count,start, end)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/ObjectUtil.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.util; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.io.ByteArrayOutputStream; 5 | import java.io.ObjectInputStream; 6 | import java.io.ObjectOutputStream; 7 | 8 | public class ObjectUtil { 9 | 10 | 11 | /** 12 | * 对象转Byte数组 13 | * 14 | * @param obj 15 | * @return 16 | * @throws Exception 17 | */ 18 | public static byte[] objectToBytes(Object obj) throws Exception { 19 | ByteArrayOutputStream out = new ByteArrayOutputStream(); 20 | ObjectOutputStream sOut = new ObjectOutputStream(out); 21 | sOut.writeObject(obj); 22 | sOut.flush(); 23 | byte[] bytes = out.toByteArray(); 24 | 25 | 26 | return bytes; 27 | } 28 | 29 | /** 30 | * 字节数组转对象 31 | * 32 | * @param bytes 33 | * @return 34 | * @throws Exception 35 | */ 36 | public static Object bytesToObject(byte[] bytes) throws Exception { 37 | 38 | //byte转object 39 | ByteArrayInputStream in = new ByteArrayInputStream(bytes); 40 | ObjectInputStream sIn = new ObjectInputStream(in); 41 | return sIn.readObject(); 42 | 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/timeout/TimeOutResult.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.timeout; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * @author zhangyang 7 | * @date 2020年12月15日 15:15:35 8 | */ 9 | public class TimeOutResult implements Serializable { 10 | 11 | public TimeOutResult() { 12 | } 13 | 14 | public String queueName; 15 | public String primaryKey; 16 | public String resultMsg; 17 | public String status; 18 | 19 | 20 | public TimeOutResult(String queueName, String primaryKey, String resultMsg,String status) { 21 | this.queueName = queueName; 22 | this.primaryKey = primaryKey; 23 | this.resultMsg = resultMsg; 24 | this.status = status; 25 | } 26 | 27 | @Override 28 | public String toString() { 29 | return "TimeOutResult{" + 30 | "queueName='" + queueName + '\'' + 31 | ", primaryKey='" + primaryKey + '\'' + 32 | ", resultMsg='" + resultMsg + '\'' + 33 | ", status='" + status + '\'' + 34 | '}'; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/starrocks/bean/Lib.java: -------------------------------------------------------------------------------- 1 | 2 | package cn.northpark.flink.starrocks.bean; 3 | 4 | import com.google.gson.annotations.Expose; 5 | import com.google.gson.annotations.SerializedName; 6 | 7 | import javax.annotation.Generated; 8 | 9 | @Generated("net.hexar.json2pojo") 10 | @SuppressWarnings("unused") 11 | public class Lib { 12 | 13 | @Expose 14 | private String $lib; 15 | @SerializedName("$lib_method") 16 | private String $libMethod; 17 | @SerializedName("$lib_version") 18 | private String $libVersion; 19 | 20 | public String get$lib() { 21 | return $lib; 22 | } 23 | 24 | public void set$lib(String $lib) { 25 | this.$lib = $lib; 26 | } 27 | 28 | public String get$libMethod() { 29 | return $libMethod; 30 | } 31 | 32 | public void set$libMethod(String $libMethod) { 33 | this.$libMethod = $libMethod; 34 | } 35 | 36 | public String get$libVersion() { 37 | return $libVersion; 38 | } 39 | 40 | public void set$libVersion(String $libVersion) { 41 | this.$libVersion = $libVersion; 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/cn/northpark/spark/StreamWordCountScala.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.spark 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.streaming.{Seconds, StreamingContext} 5 | /** 6 | * 7 | * @author bruce 8 | * @date 2022年06月17日 09:18:38 9 | */ 10 | object StreamWordCountScala { 11 | 12 | def main(args: Array[String]): Unit = { 13 | //创建SparkConf配置对象 14 | val conf = new SparkConf() 15 | //注意:此处的local[2]表示启动2个进程,一个进程负责读取数据源的数据,一个进程负责处理数据 16 | .setMaster("local[2]") 17 | .setAppName("StreamWordCountScala") 18 | 19 | //创建StreamingContext,指定数据处理间隔为5秒 20 | val ssc = new StreamingContext(conf, Seconds(5)) 21 | 22 | //通过socket获取实时产生的数据 23 | val linesRDD = ssc.socketTextStream("node1", 8888) 24 | 25 | //对接收到的数据使用空格进行切割,转换成单个单词 26 | val wordsRDD = linesRDD.flatMap(_.split(" ")) 27 | 28 | //把每个单词转换成tuple2的形式 29 | val tupRDD = wordsRDD.map((_, 1)) 30 | 31 | //执行reduceByKey操作 32 | val wordcountRDD = tupRDD.reduceByKey(_ + _) 33 | 34 | //将结果数据打印到控制台 35 | wordcountRDD.print() 36 | 37 | //启动任务 38 | ssc.start() 39 | //等待任务停止 40 | ssc.awaitTermination() 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/hadoop/MR/covid/Covid.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.hadoop.MR.covid; 2 | 3 | import org.apache.hadoop.io.Writable; 4 | 5 | import java.io.DataInput; 6 | import java.io.DataOutput; 7 | import java.io.IOException; 8 | 9 | /** 10 | * @author bruce 11 | * @date 2023年03月20日 13:58:33 12 | */ 13 | public class Covid implements Writable { 14 | 15 | public Covid() { 16 | // 空构造函数 17 | } 18 | private int newCases; 19 | private int newDeaths; 20 | 21 | public Covid(int newCases, int newDeaths) { 22 | this.newCases = newCases; 23 | this.newDeaths = newDeaths; 24 | } 25 | 26 | public int getNewCases() { 27 | return newCases; 28 | } 29 | 30 | public int getNewDeaths() { 31 | return newDeaths; 32 | } 33 | 34 | @Override 35 | public void write(DataOutput dataOutput) throws IOException { 36 | dataOutput.writeInt(newCases); 37 | dataOutput.writeInt(newDeaths ); 38 | } 39 | 40 | @Override 41 | public void readFields(DataInput dataInput) throws IOException { 42 | this.newCases =dataInput.readInt(); 43 | this.newDeaths =dataInput.readInt(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/HBaseSink.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.util 4 | 5 | import org.apache.flink.configuration.Configuration 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} 7 | import org.apache.hadoop.conf 8 | import org.apache.hadoop.hbase.client.{ConnectionFactory, Put, Table} 9 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName, client} 10 | /** 11 | * HBase sink ,批量数据插入到HBase中 12 | */ 13 | class HBaseSink extends RichSinkFunction[java.util.List[Put]] { 14 | var configuration: conf.Configuration = _ 15 | var conn: client.Connection = _ 16 | //初始化 RichSinkFunction 对象时 执行一次 17 | override def open(parameters: Configuration): Unit = { 18 | configuration = HBaseConfiguration.create() 19 | configuration.set("hbase.zookeeper.quorum","node3:2181,node4:2181,node5:2181") 20 | conn = ConnectionFactory.createConnection(configuration) 21 | } 22 | 23 | 24 | override def invoke(value: util.List[Put], context: SinkFunction.Context): Unit = { 25 | //连接HBase 表 26 | val table: Table = conn.getTable(TableName.valueOf("a1")) 27 | table.put(value) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/KeyBy1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.typeinfo.Types; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | /** 12 | * KeyBy实例一 13 | * @author bruce 14 | */ 15 | public class KeyBy1 { 16 | 17 | public static void main(String[] args) throws Exception { 18 | 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | //直接输入单词 22 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 23 | 24 | SingleOutputStreamOperator> map = lines.map(i -> Tuple2.of(i, 1)).returns(Types.TUPLE(Types.STRING,Types.INT)); 25 | 26 | 27 | KeyedStream, Tuple> keyed = map.keyBy(0); 28 | 29 | keyed.print(); 30 | 31 | env.execute("KeyBy1"); 32 | 33 | } 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/join/StreamDataSourceC.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.join; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 6 | 7 | 8 | public class StreamDataSourceC extends RichParallelSourceFunction> { 9 | private volatile boolean running = true; 10 | 11 | @Override 12 | public void run(SourceFunction.SourceContext> ctx) throws InterruptedException { 13 | 14 | Tuple3[] elements = new Tuple3[]{ 15 | Tuple3.of("a", "beijing", 1000000058000L), 16 | Tuple3.of("c", "beijing", 1000000055000L), 17 | Tuple3.of("d", "beijing", 1000000106000L), 18 | }; 19 | 20 | int count = 0; 21 | while (running && count < elements.length) { 22 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (long) elements[count].f2)); 23 | count++; 24 | Thread.sleep(1000); 25 | } 26 | } 27 | 28 | @Override 29 | public void cancel() { 30 | running = false; 31 | } 32 | } -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/RestfulActivityLocationsApplication.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project; 2 | 3 | import cn.northpark.flink.project.function.RestfulToActivityBeanFunciton; 4 | import cn.northpark.flink.util.FlinkUtilsV1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | 9 | /** 10 | * 11 | * 需求2:查询高德地图API,关联地理位置信息 12 | * 13 | * 给定的数据: 14 | * u001,A1,2019-09-02 10:10:11,1,115.908923,39.267291 15 | * u002,A1,2019-09-02 10:11:11,1,123.818517,41.312458 16 | * u003,A2,2019-09-02 10:13:11,1,121.26757,37.49794 17 | * 18 | * 希望的得到的数据 19 | * u001,A1,2019-09-02 10:10:11,1,北京市 20 | * u002,A1,2019-09-02 10:11:11,1,辽宁省 21 | * 22 | */ 23 | public class RestfulActivityLocationsApplication { 24 | 25 | public static void main(String[] args) throws Exception { 26 | 27 | DataStream lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema()); 28 | 29 | SingleOutputStreamOperator beans = lines.map(new RestfulToActivityBeanFunciton()); 30 | 31 | beans.print(); 32 | 33 | FlinkUtilsV1.getEnv().execute("HandleActivityLocationsApplication"); 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project3/MyParaFileSource.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project3; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | import java.io.RandomAccessFile; 7 | 8 | public class MyParaFileSource extends RichParallelSourceFunction> { 9 | private String path; 10 | 11 | private boolean flag = true; 12 | 13 | public MyParaFileSource(String path) { 14 | this.path = path; 15 | } 16 | 17 | public MyParaFileSource() { 18 | } 19 | 20 | @Override 21 | public void run(SourceContext ctx) throws Exception { 22 | int index = getRuntimeContext().getIndexOfThisSubtask(); 23 | 24 | RandomAccessFile randomAccessFile = new RandomAccessFile(path +"/" +index +".txt","r"); 25 | 26 | while (flag){ 27 | String line = randomAccessFile.readLine(); 28 | if(line!=null){ 29 | line = new String(line.getBytes("ISO-8859-1"),"utf-8"); 30 | ctx.collect(Tuple2.of(index+"",line)); 31 | }else{ 32 | Thread.sleep(2000); 33 | } 34 | } 35 | } 36 | 37 | @Override 38 | public void cancel() { 39 | flag = true; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/join/StreamDataSourceA.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.join; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | 7 | public class StreamDataSourceA extends RichParallelSourceFunction> { 8 | private volatile boolean running = true; 9 | 10 | @Override 11 | public void run(SourceContext> ctx) throws InterruptedException { 12 | 13 | Tuple3[] elements = new Tuple3[]{ 14 | Tuple3.of("a", "1", 1000000050000L), 15 | Tuple3.of("a", "2", 1000000054000L), 16 | Tuple3.of("a", "3", 1000000079900L), 17 | Tuple3.of("a", "4", 1000000115000L), 18 | Tuple3.of("b", "5", 1000000100000L), 19 | Tuple3.of("b", "6", 1000000108000L) 20 | }; 21 | 22 | int count = 0; 23 | while (running && count < elements.length) { 24 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2)); 25 | count++; 26 | Thread.sleep(1000); 27 | } 28 | } 29 | 30 | @Override 31 | public void cancel() { 32 | running = false; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/syncIO/AsyncMysqlApplication.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project.syncIO; 2 | 3 | import cn.northpark.flink.project.syncIO.function.AsyncMysqlToActivityBeanFunciton; 4 | import cn.northpark.flink.util.FlinkUtilsV1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | 10 | import java.util.concurrent.TimeUnit; 11 | 12 | /** 13 | * 14 | * Kafka-console-producer --broker-list localhost:9092 --topic flink000 15 | * >A1 16 | * ---------------- 17 | * 1> 新人礼包 18 | * 19 | * 通过连接池异步IO调用数据库关联查询的DEMO 20 | */ 21 | public class AsyncMysqlApplication { 22 | public static void main(String[] args) throws Exception { 23 | 24 | DataStream lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema()); 25 | 26 | //调用异步IO的transform 27 | SingleOutputStreamOperator strs = AsyncDataStream.unorderedWait(lines, new AsyncMysqlToActivityBeanFunciton(), 0, TimeUnit.SECONDS); 28 | 29 | strs.print(); 30 | 31 | FlinkUtilsV1.getEnv().execute("AsyncHandleActivityLocationsApplication"); 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/KeyBy2Bean.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.KeyedStream; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | /** 11 | * KeyBy实例2 12 | * @author bruce 13 | */ 14 | public class KeyBy2Bean { 15 | 16 | public static void main(String[] args) throws Exception { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | 20 | //直接输入单词 21 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 22 | 23 | SingleOutputStreamOperator map = lines.map(new MapFunction() { 24 | 25 | @Override 26 | public WordCount map(String value) throws Exception { 27 | return WordCount.of(value, 1); 28 | } 29 | }); 30 | 31 | KeyedStream word = map.keyBy("word"); 32 | 33 | word.print(); 34 | 35 | env.execute("KeyBy2Bean"); 36 | 37 | } 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/clickhouse/ReadSQL.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.clickhouse; 2 | 3 | import cn.northpark.flink.util.KafkaString; 4 | import org.apache.kafka.clients.producer.KafkaProducer; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | 7 | import java.io.BufferedReader; 8 | import java.io.FileReader; 9 | 10 | /** 11 | * @author bruce 12 | * @date 2024年04月18日 17:31:29 13 | */ 14 | public class ReadSQL { 15 | public static void main(String[] args) throws Exception { 16 | String sqlFilePath = "C:\\Users\\Bruce\\Desktop\\drg_pay.sql"; // SQL 文件路径 17 | try (BufferedReader reader = new BufferedReader(new FileReader(sqlFilePath))) { 18 | 19 | String line; 20 | StringBuilder batch = new StringBuilder(); 21 | 22 | while ((line = reader.readLine()) != null) { 23 | try { 24 | batch.append(line.trim()); 25 | 26 | if (line.endsWith(";")) { 27 | // 发送到 Kafka 28 | KafkaString.sendKafkaString(KafkaString.buildBasicKafkaProperty(),"drg_pay",batch.toString()); 29 | batch.setLength(0); // 清空批次 30 | } 31 | }catch (Exception e){ 32 | 33 | } 34 | 35 | } 36 | 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/python/KmeansGroup2.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import findspark 4 | findspark.init() 5 | 6 | from numpy import array 7 | from math import sqrt 8 | from pyspark import SparkContext 9 | from pyspark.mllib.clustering import KMeans, KMeansModel 10 | import pandas as pd 11 | 12 | if __name__ == "__main__": 13 | sc = SparkContext(appName="KMeansExample",master='local') # SparkContext 14 | 15 | 16 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径 17 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称 18 | data=pd.read_csv(path,header=None,names=Cname) 19 | data.index.name='index' #datadrame结构的行索引与列索引名字 20 | data.columns.name='columns' 21 | print(data) 22 | 23 | X=data[data.columns[1:17]] #提取特征值,不需要第一列的字母值 24 | 25 | 26 | # 读取并处理数据 27 | ScData = sc.parallelize(data) 28 | 29 | parsedData = ScData.map(lambda line: array([float(x) for x in line.split(' ')])) 30 | 31 | 32 | # 训练数据 33 | print(parsedData.collect()) 34 | 35 | clusters = KMeans.train(parsedData, k=2, maxIterations=10, 36 | runs=10, initializationMode="random") 37 | 38 | 39 | 40 | #聚类结果 41 | def sort(point): 42 | return clusters.predict(point) 43 | clusters_result = parsedData.map(sort) 44 | # Save and load model 45 | # $example off$ 46 | print('聚类结果:') 47 | print(clusters_result.collect()) 48 | 49 | sc.stop() -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/oracle/FlinkKafkaLinkOracleSource.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.oracle; 2 | 3 | import cn.northpark.flink.exactly.transactionway.FlinkKafkaToMysql; 4 | import cn.northpark.flink.util.FlinkUtils; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple3; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | 11 | import java.io.InputStream; 12 | 13 | /*** 14 | * flink 读取kafka数据结合Oracle查询 整合数据 15 | * @author bruce 16 | */ 17 | public class FlinkKafkaLinkOracleSource { 18 | 19 | public static void main(String[] args) throws Exception{ 20 | 21 | InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties"); 22 | 23 | ParameterTool parameters = ParameterTool.fromPropertiesFile(is); 24 | 25 | DataStream kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 26 | 27 | 28 | SingleOutputStreamOperator> tupleData = kafkaStream.map(new OracleToTupleFunciton()); 29 | 30 | tupleData.print(); 31 | 32 | FlinkUtils.getEnv().execute("FlinkKafkaLinkOracleSource"); 33 | 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/python/KmeansGroup.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import findspark 4 | findspark.init() 5 | 6 | from numpy import array 7 | from math import sqrt 8 | from pyspark import SparkContext 9 | from pyspark.mllib.clustering import KMeans, KMeansModel 10 | import pandas as pd 11 | 12 | if __name__ == "__main__": 13 | sc = SparkContext(appName="KMeansExample",master='local') # SparkContext 14 | 15 | 16 | path='C:\\Users\\Bruce\\Downloads\\letter-recognition.data' #数据集路径 17 | Cname = ['字母','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16'] #设置列名称 18 | data=pd.read_csv(path,header=None,names=Cname) 19 | data.index.name='index' #datadrame结构的行索引与列索引名字 20 | data.columns.name='columns' 21 | print(data) 22 | 23 | X=data[data.columns[1:17]] #提取特征值,不需要第一列的字母值 24 | 25 | print(X) 26 | 27 | # 读取并处理数据 28 | ScData = sc.parallelize(X) 29 | 30 | parsedData = ScData.map(lambda line: array([float(x) for x in line.split(' ')])) 31 | 32 | 33 | # 训练数据 34 | print(parsedData.collect()) 35 | 36 | clusters = KMeans.train(parsedData, k=2, maxIterations=10, 37 | runs=10, initializationMode="random") 38 | 39 | 40 | 41 | #聚类结果 42 | def sort(point): 43 | return clusters.predict(point) 44 | clusters_result = parsedData.map(sort) 45 | # Save and load model 46 | # $example off$ 47 | print('聚类结果:') 48 | print(clusters_result.collect()) 49 | 50 | sc.stop() -------------------------------------------------------------------------------- /src/main/resources/druid.properties: -------------------------------------------------------------------------------- 1 | # 数据库驱动|不填写HikariCp会自动识别 2 | driverClassName=com.mysql.jdbc.Driver 3 | # 访问数据库连接 4 | url=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true 5 | # 数据库用户名 6 | username=root 7 | # 数据库密码 8 | password=123456 9 | 10 | 11 | 12 | # 开启事务自动提交 13 | defaultAutoCommit=false 14 | 15 | # 配置参数,让ConfigFilter解密密码 16 | #connectionProperties=config.decrypt=true;config.decrypt.key=xxxx 17 | 18 | # 监控统计拦截的filters 19 | filters=stat 20 | 21 | 22 | # 初始化时建立物理连接的个数,初始化发生在显示调用init方法,或者第一次getConnection时 23 | initialSize=10 24 | # 最大连接池数量 25 | maxActive=50 26 | # 最小连接池数量 27 | minIdle:5 28 | # 获取连接等待超时的时间,单位毫秒 29 | maxWait=60000 30 | 31 | # 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒 32 | # 有两个含义:1) Destroy线程会检测连接的间隔时间 2) testWhileIdle的判断依据,详细看testWhileIdle属性的说明 33 | timeBetweenEvictionRunsMillis=60000 34 | # 一个连接在池中最小生存的时间,单位是毫秒 35 | minEvictableIdleTimeMillis=300000 36 | 37 | # 用来检测连接是否有效 38 | validationQuery=SELECT 1 39 | # 申请连接的时候检测,如果空闲时间大于timeBetweenEvictionRunsMillis,执行validationQuery检测连接是否有效 40 | testWhileIdle=true 41 | # 申请连接时执行validationQuery检测连接是否有效,做了这个配置会降低性能 42 | testOnBorrow=false 43 | # 归还连接时执行validationQuery检测连接是否有效,做了这个配置会降低性能 44 | testOnReturn=false 45 | 46 | # 是否缓存preparedStatement,也就是PSCache 47 | poolPreparedStatements=true 48 | 49 | maxPoolPreparedStatementPerConnectionSize=200 50 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project2/NP_ParallelismFileSource.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project2; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | import java.io.RandomAccessFile; 7 | 8 | public class NP_ParallelismFileSource extends RichParallelSourceFunction> { 9 | 10 | private String path; 11 | 12 | private boolean flag = true; 13 | 14 | public NP_ParallelismFileSource(String path) { 15 | this.path = path; 16 | } 17 | 18 | public NP_ParallelismFileSource() { 19 | } 20 | 21 | @Override 22 | public void run(SourceContext ctx) throws Exception { 23 | int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); 24 | 25 | RandomAccessFile file = new RandomAccessFile(path + "/" + subtaskIndex + ".txt", "r"); 26 | 27 | 28 | while (flag) { 29 | String readLine = file.readLine(); 30 | if (readLine != null) { 31 | readLine = new String(readLine.getBytes("ISO-8859-1"),"UTF-8" ); 32 | ctx.collect(Tuple2.of(subtaskIndex + "", readLine)); 33 | }else { 34 | Thread.sleep(1000); 35 | } 36 | } 37 | } 38 | 39 | @Override 40 | public void cancel() { 41 | flag = false; 42 | 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/bean/Access.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.bean; 2 | 3 | public class Access { 4 | 5 | public String device; 6 | public String deviceType; 7 | public String os; 8 | public String event; 9 | public String net; 10 | public String channel; 11 | public String uid; 12 | public int nu; // 1新 13 | public int nu2; 14 | public String ip; // ==> ip去解析 15 | public long time; 16 | public String version; 17 | public String province; 18 | public String city; 19 | 20 | public Product product; 21 | 22 | @Override 23 | public String toString() { 24 | return "Access{" + 25 | "device='" + device + '\'' + 26 | ", deviceType='" + deviceType + '\'' + 27 | ", os='" + os + '\'' + 28 | ", event='" + event + '\'' + 29 | ", net='" + net + '\'' + 30 | ", channel='" + channel + '\'' + 31 | ", uid='" + uid + '\'' + 32 | ", nu=" + nu + 33 | ", nu2=" + nu2 + 34 | ", ip='" + ip + '\'' + 35 | ", time=" + time + 36 | ", version='" + version + '\'' + 37 | ", province='" + province + '\'' + 38 | ", city='" + city + '\'' + 39 | ", product=" + product + 40 | '}'; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/BatchWC.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.DataSource; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @author zhangyang 12 | * 使用Java API来开发Flink的批处理应用程序. 13 | */ 14 | public class BatchWC { 15 | 16 | public static void main(String[] args) throws Exception { 17 | 18 | //1.环境 19 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | //2.read 22 | DataSource readTextFile = env.readTextFile("D:\\mac.txt"); 23 | 24 | readTextFile.print(); 25 | 26 | //3.transform 27 | readTextFile.flatMap(new FlatMapFunction>() { 28 | 29 | @Override 30 | public void flatMap(String value, Collector> collector) throws Exception { 31 | // TODO Auto-generated method stub 32 | String[] tokens = value.toLowerCase().split(","); 33 | for (String token : tokens) { 34 | if(token.length()>0) { 35 | collector.collect(new Tuple2(token,1));//每个单词数量设置为1,后面再统计/累加... 36 | } 37 | } 38 | } 39 | }).groupBy(0).sum(1).print(); 40 | 41 | //4.execute 42 | // env.execute(); 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/resources/kafka.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | #configs here 3 | ################################################################################ 4 | 5 | #Kafka configs 6 | 7 | #for test common other functions 8 | topics=flink000 9 | rdf_topics=flink_rdf 10 | 11 | #for test window count and convert 12 | #topics=event008 13 | 14 | #for test sink statistics 15 | #topics=flink777 16 | group.id=bruce 17 | bootstrap.servers=PLAINTEXT://node1:9092,PLAINTEXT://node2:9092,PLAINTEXT://node3:9092 18 | #zookeeper.connect=node1:2181,node2:2181,node3:2181 19 | auto.offset.reset=latest 20 | #auto.offset.reset=earliest 21 | enable.auto.commit=true 22 | key.serializer=org.apache.kafka.common.serialization.StringSerializer 23 | value.serializer=org.apache.kafka.common.serialization.StringSerializer 24 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer 25 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer 26 | 27 | 28 | 29 | 30 | #redis 31 | redis.host=node1 32 | redis.pwd=123456 33 | redis.db=0 34 | 35 | 36 | #jdbc config 37 | # 数据库驱动|不填写HikariCp会自动识别 38 | driverClassName=com.mysql.jdbc.Driver 39 | # 访问数据库连接 40 | jdbcUrl=jdbc:mysql://localhost:3306/flink?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true 41 | # 数据库用户名 42 | username=root 43 | # 数据库密码 44 | password=123456 45 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/exactly/overrideway/MyRedisSink.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.exactly.overrideway; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.api.java.utils.ParameterTool; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 7 | import redis.clients.jedis.Jedis; 8 | 9 | public class MyRedisSink extends RichSinkFunction> { 10 | private transient Jedis jedis; 11 | 12 | @Override 13 | public void open(Configuration parameters) throws Exception { 14 | super.open(parameters); 15 | 16 | ParameterTool params = (ParameterTool) getRuntimeContext().getExecutionConfig().getGlobalJobParameters(); 17 | String host = params.getRequired("redis.host"); 18 | String password = params.get("redis.pwd", null); 19 | int db = params.getInt( "redis.db", 0); 20 | jedis = new Jedis(host, 6379, 5000); 21 | // jedis.auth(password); 22 | jedis.select(db); 23 | } 24 | 25 | @Override 26 | public void close() throws Exception { 27 | super.close(); 28 | jedis.close(); 29 | } 30 | 31 | @Override 32 | public void invoke(Tuple3 value, Context context) throws Exception { 33 | if(!jedis.isConnected()){ 34 | jedis.connect(); 35 | } 36 | jedis.hset(value.f0,value.f1,value.f2); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/CountWindowAll.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 9 | 10 | /** 11 | * 不分组划分窗口,将整体当成一个组 12 | * @author bruce 13 | */ 14 | public class CountWindowAll { 15 | public static void main(String[] args) throws Exception { 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | DataStreamSource source = env.socketTextStream("localhost", 4000); 19 | 20 | SingleOutputStreamOperator map = source.map(new MapFunction() { 21 | @Override 22 | public Integer map(String value) throws Exception { 23 | return Integer.parseInt(value); 24 | } 25 | }); 26 | //不分组,将整体当成一个组 27 | AllWindowedStream windowAll = map.countWindowAll(5); 28 | 29 | 30 | SingleOutputStreamOperator summed = windowAll.sum(0); 31 | 32 | summed.print(); 33 | 34 | env.execute("CountWindowAll"); 35 | 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/KafkaSink.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.util.Properties 4 | 5 | import org.apache.flink.configuration.Configuration 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} 7 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} 8 | import org.apache.kafka.common.serialization.StringSerializer 9 | 10 | class KafkaSink[T](cls:String)(topic: String) extends RichSinkFunction[T] { 11 | var props :Properties = _ 12 | //创建Kafka Producer 13 | var producer: KafkaProducer[String, String] = _ 14 | //当初始化 RichSinnkFunction时,只会调用一次 15 | override def open(parameters: Configuration): Unit = { 16 | //创建kafka配置 17 | props = new Properties() 18 | props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092") 19 | props.setProperty("key.serializer",classOf[StringSerializer].getName) 20 | props.setProperty("value.serializer",classOf[StringSerializer].getName) 21 | 22 | producer = new KafkaProducer[String,String](props) 23 | } 24 | 25 | //来一条数据,处理一次 26 | override def invoke(value: T, context: SinkFunction.Context): Unit = { 27 | //统计最通畅的top5通道 28 | val info: Top5MonitorBean = value.asInstanceOf[Top5MonitorBean] 29 | 30 | //向kafka中写入 31 | 32 | producer.send( new ProducerRecord[String,String](topic,info.toString)) 33 | 34 | } 35 | 36 | override def close(): Unit = { 37 | producer.close() 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/suicideApp/HdfsTest.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.suicideApp; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.FileSystem; 5 | import org.apache.hadoop.fs.LocatedFileStatus; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.fs.RemoteIterator; 8 | 9 | import java.io.IOException; 10 | 11 | /** 12 | * !!!hadoop hdfs连接不上 直接把9000端口去掉就可以了!!!! 13 | * @author bruce 14 | * @date 2022年04月21日 09:51:08 15 | */ 16 | public class HdfsTest { 17 | 18 | public static void main(String[] args) throws IOException { 19 | Configuration conf = new Configuration(); 20 | //这里设置namenode 21 | conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); 22 | conf.set("dfs.nameservices", "node1"); 23 | conf.set("fs.defaultFS", "hdfs://node1"); 24 | FileSystem fileSystem1 = FileSystem.get(conf); 25 | System.out.println("===contains1==="); 26 | //测试访问情况 27 | Path path=new Path("/scd"); 28 | System.out.println("===contains2==="); 29 | if(fileSystem1.exists(path)){ 30 | System.out.println("===contains3==="); 31 | } 32 | System.out.println("===contains4==="); 33 | RemoteIterator list=fileSystem1.listFiles(path,true); 34 | while (list.hasNext()){ 35 | LocatedFileStatus fileStatus=list.next(); 36 | System.out.println(fileStatus.getPath()); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/UDTFSQL.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.stream.sql; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 7 | import org.apache.flink.types.Row; 8 | 9 | /** 10 | * 自定义UDTF 一行输入 多行输出 11 | */ 12 | public class UDTFSQL { 13 | public static void main(String[] args) throws Exception { 14 | 15 | //实时dataStream api 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | //实时Table执行上下文 19 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 20 | 21 | 22 | //42.57.88.186 23 | //a.b.c.d 24 | //聪.明.的.六.猴.儿 25 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 26 | 27 | tableEnv.registerDataStream("t_lines",lines,"line"); 28 | 29 | //注册自定义函数是一个UDTF,输入一行字符串,返回多列 30 | tableEnv.registerFunction("split",new Split("\\.")); 31 | 32 | //lateral:表生成函数 33 | Table table = tableEnv.sqlQuery("select word from t_lines,lateral table(split(line)) as T(word) "); 34 | 35 | tableEnv.toAppendStream(table, Row.class).print(); 36 | // tableEnv.toRetractStream(table,Row.class).print(); 37 | env.execute("UDTFSQL"); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/DelMany.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase; 2 | 3 | import cn.northpark.flink.util.PhoenixUtilV2; 4 | 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Objects; 8 | import java.util.stream.Collectors; 9 | 10 | /** 11 | * @author bruce 12 | * @date 2022年06月26日 12:12:06 13 | */ 14 | public class DelMany { 15 | public static void main(String[] args) { 16 | //4.删除一批关系 17 | //把 A的所有【被转发/被评论关系】删除 18 | String userid = "75e1227c896a4eaa9fe782556af0fc76"; 19 | delMany( userid); 20 | 21 | } 22 | 23 | private static void delMany(String userid) { 24 | //查询A的sub-list 25 | List> mapList = PhoenixUtilV2.queryList("select * from \"stt\".t_weibo_relations_v2 where rel_user_id = ? and by_type = 0",userid); 26 | List sel_list = mapList.stream().filter(t -> Objects.nonNull(t.get("USER_ID"))).map(t -> t.get("USER_ID").toString()).collect(Collectors.toList()); 27 | 28 | //删除A的数据 29 | String del_A = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and by_type = 1"; 30 | PhoenixUtilV2.delData(del_A,userid); 31 | 32 | //删除sel_list的数据 33 | String del_sel_list = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and rel_user_id = ? and by_type = 0"; 34 | for (String sel_id : sel_list) { 35 | PhoenixUtilV2.delData(del_sel_list,sel_id,userid); 36 | } 37 | 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/DelOne.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase; 2 | 3 | import cn.northpark.flink.util.PhoenixUtilV2; 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations; 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType; 6 | 7 | /** 8 | * @author bruce 9 | * @date 2022年06月26日 12:10:52 10 | */ 11 | public class DelOne { 12 | public static void main(String[] args) { 13 | //3.删除一条关系 14 | //构造A的b 15 | //8538d7f7b0724b518b33129672a38915 reply 4ef14c4e222d4a9c8110d1a435a24ea3 1 16 | WeiboRelations bean_by = new WeiboRelations("8538d7f7b0724b518b33129672a38915", RelType.REPLY,"4ef14c4e222d4a9c8110d1a435a24ea3",1); 17 | delOne(bean_by); 18 | } 19 | 20 | /** 21 | * 删除一条转发/评论关系 22 | */ 23 | private static void delOne(WeiboRelations bean_by) { 24 | 25 | String del_one_sql = "delete from \"stt\".t_weibo_relations_v2 where user_id = ? and rel_type = ? and rel_user_id = ? and by_type = ?"; 26 | //删除A的b 27 | PhoenixUtilV2.delData(del_one_sql, 28 | bean_by.getUser_id(), 29 | bean_by.getRel_type(), 30 | bean_by.getRel_user_id(), 31 | bean_by.getBy_type()); 32 | //删除b的A 33 | PhoenixUtilV2.delData(del_one_sql, 34 | bean_by.getRel_user_id(), 35 | bean_by.getRel_type(), 36 | bean_by.getUser_id(), 37 | bean_by.getBy_type()==1?0:1); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/KafkaSource.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 8 | 9 | import java.util.Properties; 10 | 11 | /** 12 | * @author zhangyang 13 | * 使用Kafka作为数据源读取数据 exactly once 14 | */ 15 | public class KafkaSource { 16 | 17 | public static void main(String[] args) throws Exception { 18 | 19 | //1.环境 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | 22 | Properties props = new Properties(); 23 | 24 | //指定Ka fka的Broker地址 25 | props.setProperty( "bootstrap.servers", "localhost:9092"); 26 | //指定组ID 27 | props.setProperty("group.id", "bruce"); 28 | //如果没有记录偏移量,第一次从最开始消费 29 | props.setProperty("auto.offset.reset", "earliest") ; 30 | //kafka的消费者不自动提交偏移量 31 | //props。setProperty("enable. auto. commit", "false"); 32 | 33 | //2.read 34 | FlinkKafkaConsumer kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props); 35 | 36 | 37 | DataStream lines = env.addSource(kafkaSource); 38 | 39 | //3.sink/transform 40 | lines.print(); 41 | 42 | //4.execute 43 | env.execute(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/TrafficEntity.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | //车辆监控实体 4 | case class MonitorCarBean(areaId: String, roadId: String, monitorId: String, cameraId: String, actionTime: Long, car: String, speed: Double) 5 | 6 | 7 | //最通畅的top5 通道实体 8 | case class Top5MonitorBean(windowStartTime: String, windowEndTime: String, monitorId: String, hightSpeedCarCount: Long, middleSpeedCount: Long, normalSpeedCarCount: Long, lowSpeedCarCount: Long) 9 | 10 | //通道通过车辆数的统计实体 11 | case class MonitorSpeedClsNumsBean(xhightSpeedCarCount: Long, xmiddleSpeedCount: Long, xnormalSpeedCarCount: Long, xlowSpeedCarCount: Long) extends Ordered[MonitorSpeedClsNumsBean] { 12 | var hightSpeedCarCount = xhightSpeedCarCount 13 | var middleSpeedCount = xmiddleSpeedCount 14 | var normalSpeedCarCount = xnormalSpeedCarCount 15 | var lowSpeedCarCount = xlowSpeedCarCount 16 | 17 | override def compare(that: MonitorSpeedClsNumsBean): Int = { 18 | if (this.hightSpeedCarCount != that.hightSpeedCarCount) { 19 | (this.hightSpeedCarCount - that.hightSpeedCarCount).toInt 20 | } else if (this.middleSpeedCount != that.middleSpeedCount) { 21 | (this.middleSpeedCount - that.middleSpeedCount).toInt 22 | } else if (this.normalSpeedCarCount != that.normalSpeedCarCount) { 23 | (this.normalSpeedCarCount - that.normalSpeedCarCount).toInt 24 | } else { 25 | (this.lowSpeedCarCount - that.lowSpeedCarCount).toInt 26 | } 27 | } 28 | } 29 | 30 | 31 | object TrafficEntity { 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/UDFSQL.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.stream.sql; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 7 | import org.apache.flink.types.Row; 8 | 9 | /** 10 | * 自定义UDF 一行输入 一行输出 11 | */ 12 | public class UDFSQL { 13 | public static void main(String[] args) throws Exception { 14 | 15 | //实时dataStream api 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | //注册一个可以cache的文件,通过网络发送给taskManager 19 | env.registerCachedFile("/Users/bruce/Desktop/ip.txt","ip-rules"); 20 | 21 | //实时Table执行上下文 22 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 23 | 24 | 25 | //42.57.88.186 26 | //106.121.4.223 27 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 28 | 29 | tableEnv.registerDataStream("t_lines",lines,"ip"); 30 | 31 | //注册自定义函数是一个UDF,输入一个IP地址,返回ROW<省、市> 32 | tableEnv.registerFunction("ipLocation",new IpLocation()); 33 | 34 | Table table = tableEnv.sqlQuery("select ip,ipLocation(ip) from t_lines "); 35 | 36 | tableEnv.toAppendStream(table, Row.class).print(); 37 | // tableEnv.toRetractStream(table,Row.class).print(); 38 | env.execute("UDFSQL"); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/SlidingWindowAll.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 10 | 11 | /** 12 | * 滑动窗口 13 | * @author bruce 14 | */ 15 | public class SlidingWindowAll { 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | 19 | DataStreamSource source = env.socketTextStream("localhost", 4000); 20 | 21 | SingleOutputStreamOperator map = source.map(new MapFunction() { 22 | @Override 23 | public Integer map(String value) throws Exception { 24 | return Integer.parseInt(value); 25 | } 26 | }); 27 | 28 | //不分组,将整体当成一个组 29 | AllWindowedStream windowAll = map.timeWindowAll(Time.seconds(10),Time.seconds(5)); 30 | 31 | 32 | SingleOutputStreamOperator summed = windowAll.sum(0); 33 | 34 | summed.print(); 35 | 36 | env.execute("SlidingWindowAll"); 37 | 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/TumblingWindowAll.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 10 | 11 | /** 12 | * 滚动窗口,不分组划分窗口,将整体当成一个组,5秒一个窗口 13 | * @author bruce 14 | */ 15 | public class TumblingWindowAll { 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | 19 | DataStreamSource source = env.socketTextStream("localhost", 4000); 20 | 21 | SingleOutputStreamOperator map = source.map(new MapFunction() { 22 | @Override 23 | public Integer map(String value) throws Exception { 24 | return Integer.parseInt(value); 25 | } 26 | }); 27 | //不分组,将整体当成一个组 28 | AllWindowedStream windowAll = map.timeWindowAll(Time.seconds(5)); 29 | 30 | 31 | SingleOutputStreamOperator summed = windowAll.sum(0); 32 | 33 | summed.print(); 34 | 35 | env.execute("TumblingWindowAll"); 36 | 37 | 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/TT.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import cn.hutool.core.codec.Base64; 4 | import cn.hutool.http.HttpUtil; 5 | import cn.northpark.flink.util.JsonUtil; 6 | import com.alibaba.fastjson.JSON; 7 | import com.alibaba.fastjson.JSONArray; 8 | 9 | import java.util.Map; 10 | 11 | /** 12 | * @author bruce 13 | * @date 2023年03月21日 16:51:37 14 | */ 15 | public class TT { 16 | public static void main(String[] args) { 17 | String decode = Base64.decodeStr("NzRhODVkNjFmYTkyYWZlMzViYzY2YmUyZjk1ZjBjMTY="); 18 | 19 | // String baseUrl = "https://restapi.amap.com/v3/geocode/geo?address="+"西山温泉"+"&key="+decode; 20 | // String res = HttpUtil.get(baseUrl); 21 | // Map json2map = JsonUtil.json2map(res); 22 | // System.err.println(json2map); 23 | // if(json2map.get("info").toString().equals("OK")){ 24 | // Object geocodes = json2map.get("geocodes"); 25 | // JSONArray jsonArray = JSON.parseArray(geocodes.toString()); 26 | // Map geocodesMap = JsonUtil.json2map(jsonArray.get(0).toString()); 27 | // String location = geocodesMap.get("location").toString(); 28 | // String longitude = location.split(",")[0]; 29 | // String latitude = location.split(",")[1]; 30 | // System.err.println(longitude); 31 | // System.err.println(latitude); 32 | // 33 | // } 34 | 35 | System.err.println(Double.valueOf("114.123456")==0d); 36 | System.err.println(Double.valueOf("0.000000")==0d); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/syncIO/AsyncRestfulApplication.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project.syncIO; 2 | 3 | import cn.northpark.flink.project.ActivityBean; 4 | import cn.northpark.flink.project.syncIO.function.AsyncRestfulToActivityBeanFunciton; 5 | import cn.northpark.flink.util.FlinkUtilsV1; 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | 11 | import java.util.concurrent.TimeUnit; 12 | 13 | /** 14 | * 15 | * Kafka-console-producer --broker-list localhost:9092 --topic flink000 16 | * >u002,A1,2019-09-02 10:11:11,1,123.818517,41.312458 17 | * ---------------- 18 | * 1> ActivityBean{uid='u002', aid='A1', activityName='null', time='2019-09-02 10:11:11', eventType=1, province='辽宁省', longitude=null, latitude=null} 19 | * 20 | * 通过Http 异步IO 调用restful请求高德接口获取关联的地区的DEMO 21 | */ 22 | public class AsyncRestfulApplication { 23 | public static void main(String[] args) throws Exception { 24 | 25 | DataStream lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema()); 26 | 27 | //调用异步IO的transform 28 | SingleOutputStreamOperator beans = AsyncDataStream.unorderedWait(lines, new AsyncRestfulToActivityBeanFunciton(), 0, TimeUnit.SECONDS, 10); 29 | 30 | beans.print(); 31 | 32 | FlinkUtilsV1.getEnv().execute("AsyncHandleActivityLocationsApplication"); 33 | 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/JDBCSink.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.sql.{Connection, DriverManager, PreparedStatement} 4 | 5 | import org.apache.flink.configuration.Configuration 6 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction} 7 | 8 | class JDBCSink[T](cls:String) extends RichSinkFunction[T] { 9 | var conn: Connection = _ 10 | var pst: PreparedStatement = _ 11 | var stop = false 12 | //当初始化 RichSinnkFunction时,只会调用一次 13 | override def open(parameters: Configuration): Unit = { 14 | conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink", "root", "123456") 15 | } 16 | 17 | //来一条数据,处理一次 18 | override def invoke(value: T, context: SinkFunction.Context): Unit = { 19 | if("Top5MonitorInfo".equals(cls)){ 20 | //统计最通畅的top5通道 21 | val info: Top5MonitorBean = value.asInstanceOf[Top5MonitorBean] 22 | pst = conn.prepareStatement("insert into t_top5_monitor_info (start_time,end_time,monitor_id,hight_speed_carcount,middle_speed_carcount,normal_speed_carcount,low_speed_carcount) values(?,?,?,?,?,?,?)") 23 | pst.setString(1,info.windowStartTime) 24 | pst.setString(2,info.windowEndTime) 25 | pst.setString(3,info.monitorId) 26 | pst.setDouble(4,info.hightSpeedCarCount) 27 | pst.setDouble(5,info.middleSpeedCount) 28 | pst.setDouble(6,info.normalSpeedCarCount) 29 | pst.setDouble(7,info.lowSpeedCarCount) 30 | pst.executeUpdate() 31 | } 32 | 33 | } 34 | 35 | override def close(): Unit = { 36 | pst.close() 37 | conn.close() 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project3/OperatorStateTest.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project3; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 6 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | public class OperatorStateTest { 10 | public static void main(String[] args) throws Exception { 11 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 12 | 13 | env.enableCheckpointing(5000); 14 | 15 | env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/backEnd")); 16 | 17 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 18 | 19 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000)); 20 | 21 | env.addSource(new MyExactlyOnceParaFileSource("/Users/bruce/Desktop/data")).print(); 22 | 23 | //THROW exception 24 | env.socketTextStream("localhost",4000).map(new MapFunction() { 25 | @Override 26 | public Object map(String value) throws Exception { 27 | if(value.startsWith("jeyy")){ 28 | System.out.println(1/0); 29 | } 30 | return value; 31 | } 32 | }); 33 | 34 | env.execute("OperatorStateTest"); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/KeyBy3.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | /** 10 | * KeyBy实例3 keyBy多个字段进行分组 11 | * @author bruce 12 | */ 13 | public class KeyBy3 { 14 | 15 | public static void main(String[] args) throws Exception { 16 | 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | 19 | //直接输入单词 20 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 21 | 22 | //辽宁,沈阳,1000 23 | //山东,青岛,2000 24 | //山东,青岛,2000 25 | //山东,烟台,1000 26 | SingleOutputStreamOperator> proCityAndMoney = lines.map(new MapFunction>() { 27 | 28 | @Override 29 | public Tuple3 map(String value) throws Exception { 30 | String[] words = value.split(","); 31 | String province = words[0]; 32 | String city = words[1]; 33 | Double money = Double.parseDouble(words[2]); 34 | return Tuple3.of(province, city, money); 35 | } 36 | }); 37 | 38 | proCityAndMoney.keyBy(0,1).sum(2).print(); 39 | 40 | 41 | env.execute("KeyBy3"); 42 | 43 | } 44 | 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/RabbitMQUtils.java: -------------------------------------------------------------------------------- 1 | 2 | package cn.northpark.flink.util; 3 | 4 | import cn.northpark.flink.MerchantDayStaApp.MerchantDaySta; 5 | import com.alibaba.fastjson.JSON; 6 | import com.rabbitmq.client.*; 7 | import lombok.extern.slf4j.Slf4j; 8 | 9 | /** 10 | * 操作rabbitmq队列工具类,支持集群自动重连 11 | * 12 | * @author bruce 13 | * 14 | */ 15 | @Slf4j 16 | public class RabbitMQUtils { 17 | 18 | final static String CHARSET_UTF8 = "UTF-8"; 19 | static Channel channel = null; 20 | 21 | /** 22 | * 发送消息到rabbitmq 23 | * 24 | * @param queueName 25 | * 队列名 26 | * @param Message 27 | * 消息 28 | */ 29 | public static void Send(String queueName, String Message) { 30 | try { 31 | if(channel==null) 32 | { 33 | Connection connection = RabbitMQConFactory.getConnection("common"); 34 | channel = connection.createChannel(); 35 | } 36 | channel.queueDeclare(queueName, true, false, false, null); 37 | channel.basicPublish("", queueName, null, Message.getBytes(CHARSET_UTF8)); 38 | log.info(" Sent '" + Message + "' SUCCESS!"); 39 | } catch (Exception e) { 40 | e.printStackTrace(); 41 | } 42 | } 43 | public static void main(String[] args) throws InterruptedException { 44 | 45 | MerchantDaySta bean = new MerchantDaySta("1001",20.55); 46 | // MerchantDaySta bean2 = new MerchantDaySta("1002",80.55); 47 | // MerchantDaySta bean3 = new MerchantDaySta("1002",70.55); 48 | 49 | RabbitMQUtils.Send("flink_amount_queue", JSON.toJSONString(bean)); 50 | // RabbitMqUtils.Send("flink_amount_queue", JSON.toJSONString(bean2)); 51 | // RabbitMqUtils.Send("flink_amount_queue", JSON.toJSONString(bean3)); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/RestartStrategy2.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | public class RestartStrategy2 { 11 | public static void main(String[] args) throws Exception{ 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | 14 | //只有开启了checkpoint 才会有重启策略 15 | env.enableCheckpointing(8000); 16 | 17 | //设置重启策略为重启2次,间隔2秒 18 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2,2)); 19 | 20 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 21 | 22 | SingleOutputStreamOperator> wordOne = lines.map(new MapFunction>() { 23 | 24 | @Override 25 | public Tuple2 map(String value) throws Exception { 26 | if (value.startsWith("jeyy")) { 27 | throw new RuntimeException("jeyy来了,程序出错了!!!"); 28 | } 29 | return Tuple2.of(value, 1); 30 | } 31 | }); 32 | 33 | 34 | SingleOutputStreamOperator> summed = wordOne.keyBy(0).sum(1); 35 | 36 | summed.print(); 37 | 38 | env.execute("RestartStrategy2"); 39 | 40 | 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/AddSink1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 9 | 10 | /** 11 | * @author bruce 12 | * 通过自定义sink来讲解subTask的编号问题 13 | * 14 | */ 15 | public class AddSink1 { 16 | public static void main(String[] args) throws Exception { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | 20 | DataStreamSource socketTextStream = env.socketTextStream("localhost", 4000); 21 | 22 | 23 | SingleOutputStreamOperator> maped = socketTextStream.map(new MapFunction>() { 24 | @Override 25 | public Tuple2 map(String value) throws Exception { 26 | return Tuple2.of(value, 1L); 27 | } 28 | }); 29 | 30 | // maped.print("the result is ").setParallelism(2); 31 | 32 | maped.addSink(new RichSinkFunction>() { 33 | 34 | @Override 35 | public void invoke(Tuple2 value, Context context) throws Exception { 36 | 37 | System.out.println(getRuntimeContext().getIndexOfThisSubtask() +1 + "> "+value); 38 | 39 | } 40 | }); 41 | 42 | 43 | env.execute("AddSink1"); 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCountSocket.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.api.windowing.time.Time; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @author zhangyang 12 | * 使用Java API来开发Flink的实时处理应用程序. 13 | * 14 | * wc统计的数据我们源自于socket 15 | */ 16 | public class StreamingWordCountSocket { 17 | 18 | public static void main(String[] args) throws Exception { 19 | // step1 :获取执行环境 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | 22 | // step2:读取数据 23 | DataStreamSource text = env.socketTextStream("localhost", 4000); 24 | 25 | 26 | // step3: transform 27 | text.flatMap(new FlatMapFunction>() { 28 | @Override 29 | public void flatMap(String value, Collector> collector) throws Exception { 30 | String[] tokens = value.toLowerCase().split(","); 31 | for(String token : tokens) { 32 | if(token.length() > 0) { 33 | collector.collect(new Tuple2(token,1)); 34 | } 35 | } 36 | } 37 | }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print();//.setParallelism(1); 38 | 39 | //step 4:execute 40 | env.execute("StreamingWordCountSocket"); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/hadoop/MR/covid/CovidApp.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.hadoop.MR.covid; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.Text; 6 | import org.apache.hadoop.mapreduce.Job; 7 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 8 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 9 | 10 | /** 11 | * 12 | * 13 | * //hadoop提交作业 14 | * 1.hadoop jar np_hadoop-1.0-SNAPSHOT-jar-with-dependencies.jar hadoop.CovidApp 15 | * 16 | * //列出输出文件列表 17 | * 2.hdfs dfs -ls /BigDataProject/A 18 | * 19 | * //查看输出结果 20 | * 3.hadoop fs -cat /BigDataProject/A/part-r-00000 21 | * @author bruce 22 | * @date 2023年03月20日 14:00:35 23 | */ 24 | public class CovidApp { 25 | public static void main(String[] args) throws Exception { 26 | 27 | Configuration conf = new Configuration(); 28 | //本地执行 29 | conf.set("fs.defaultFS", "file:///"); 30 | conf.set("mapreduce.framework.name", "local"); 31 | 32 | Job job = Job.getInstance(conf, "Covid"); 33 | job.setJarByClass(CovidApp.class); 34 | job.setMapperClass(CovidMapper.class); 35 | job.setReducerClass(CovidReducer.class); 36 | 37 | //map 38 | job.setMapOutputKeyClass(Text.class); 39 | job.setMapOutputValueClass(Covid.class); 40 | 41 | //out 42 | job.setOutputKeyClass(Text.class); 43 | job.setOutputValueClass(Text.class); 44 | 45 | FileInputFormat.addInputPath(job, new Path("C:\\Users\\Bruce\\Downloads\\COVID-19.dat")); 46 | FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Bruce\\Downloads\\output")); 47 | System.exit(job.waitForCompletion(true) ? 0 : 1); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/batch/SQLWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.batch; 2 | 3 | import cn.northpark.flink.WordCount; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.DataSource; 7 | import org.apache.flink.table.api.Table; 8 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment; 9 | 10 | /** 11 | * @author bruce 12 | * 利用sql api进行离线计算 13 | */ 14 | public class SQLWordCount { 15 | public static void main(String[] args) throws Exception { 16 | 17 | //dataSet api 18 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 19 | 20 | //实时Table执行上下文 21 | BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); 22 | 23 | //模拟数据 24 | DataSource wordCountDataSource = env.fromElements( 25 | new WordCount("java", 1), 26 | new WordCount("scala", 1), 27 | new WordCount("java", 1), 28 | new WordCount("java", 1), 29 | new WordCount("flink", 1), 30 | new WordCount("flink", 1), 31 | new WordCount("vue", 1) 32 | 33 | 34 | ); 35 | 36 | 37 | //将dataSet注册成表,指定字段名称 38 | tableEnv.registerDataSet("word_count",wordCountDataSource,"word,counts"); 39 | 40 | String sql = "select word,sum(counts) as counts from word_count group by word having sum(counts) >=2 order by counts desc "; 41 | 42 | Table table = tableEnv.sqlQuery(sql); 43 | 44 | //把表转换成dataSet 45 | DataSet rowDataSet = tableEnv.toDataSet(table, WordCount.class); 46 | 47 | rowDataSet.print(); 48 | 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCount2.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @author zhangyang 12 | * 按照步骤来一步步拆分Task是如何划分的 13 | * wc统计的数据我们源自于socket 14 | */ 15 | public class StreamingWordCount2 { 16 | 17 | public static void main(String[] args) throws Exception { 18 | // step1 :获取执行环境 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | // step2:读取数据 22 | DataStreamSource text = env.socketTextStream("localhost", 4000); 23 | 24 | env.setParallelism(2); 25 | 26 | // 拆词 + 拼数 27 | SingleOutputStreamOperator> wordAndOne = text.flatMap(new FlatMapFunction>() { 28 | @Override 29 | public void flatMap(String value, Collector> out) throws Exception { 30 | String[] words = value.split(" "); 31 | for (String word : words) { 32 | out.collect(Tuple2.of(word, 1)); 33 | } 34 | } 35 | }); 36 | 37 | 38 | //分组、累加 39 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1); 40 | 41 | 42 | //sink 43 | sumed.print().setParallelism(2); 44 | 45 | //execute 46 | env.execute("StreamingWordCount"); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/oracle/OracleToTupleFunciton.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.oracle; 2 | 3 | 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple3; 6 | import org.apache.flink.configuration.Configuration; 7 | 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.PreparedStatement; 11 | import java.sql.ResultSet; 12 | 13 | /** 14 | * 15 | */ 16 | public class OracleToTupleFunciton extends RichMapFunction> { 17 | 18 | 19 | private transient Connection connection = null; 20 | 21 | @Override 22 | public void open(Configuration parameters) throws Exception { 23 | super.open(parameters); 24 | Class.forName("oracle.jdbc.OracleDriver"); 25 | connection = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:test", "root", "123456"); 26 | } 27 | 28 | 29 | @Override 30 | public Tuple3 map(String word) throws Exception { 31 | 32 | PreparedStatement preparedStatement = connection.prepareStatement("select \"id\",\"word\",\"uptime\" from FLINK.\"t_word_counts\" WHERE \"word\" = ?"); 33 | 34 | preparedStatement.setString(1, word); 35 | ResultSet resultSet = preparedStatement.executeQuery(); 36 | String id = ""; 37 | String uptime = ""; 38 | while (resultSet.next()) { 39 | id = resultSet.getString(1); 40 | uptime = resultSet.getString(3); 41 | } 42 | 43 | preparedStatement.close(); 44 | 45 | 46 | return Tuple3.of(id, word, uptime); 47 | } 48 | 49 | @Override 50 | public void close() throws Exception { 51 | super.close(); 52 | connection.close(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/syncIO/SinkToMysqlApplication.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project.syncIO; 2 | 3 | import cn.northpark.flink.project.ActivityBean; 4 | import cn.northpark.flink.project.syncIO.function.AsyncRestfulToActivityBeanFunciton; 5 | import cn.northpark.flink.project.syncIO.function.NP_MySqlSinkFunction; 6 | import cn.northpark.flink.util.FlinkUtilsV1; 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 8 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSink; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | 13 | import java.util.concurrent.TimeUnit; 14 | 15 | /*** 16 | * 统计event,省份等纬度的数目,把结果写到t_activity_counts表 17 | * 写入mysql数据库DEMO 18 | */ 19 | public class SinkToMysqlApplication { 20 | public static void main(String[] args) throws Exception { 21 | 22 | DataStream lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema()); 23 | 24 | //调用异步IO的transform 25 | SingleOutputStreamOperator beans = AsyncDataStream.unorderedWait(lines, new AsyncRestfulToActivityBeanFunciton(), 0, TimeUnit.SECONDS, 10); 26 | 27 | SingleOutputStreamOperator summed = beans.keyBy("eventType").sum("counts"); 28 | 29 | SingleOutputStreamOperator summed2 = beans.keyBy("eventType","province").sum("counts"); 30 | 31 | DataStreamSink addSink = summed.addSink(new NP_MySqlSinkFunction()); 32 | 33 | // DataStreamSink addSink2 = summed2.addSink(new NP_MySqlSink()); 34 | 35 | FlinkUtilsV1.getEnv().execute("SinkToMysqlApplication"); 36 | 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/oracle/SinkOracle.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.oracle; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 7 | 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.PreparedStatement; 11 | 12 | @Slf4j 13 | public class SinkOracle extends RichSinkFunction> { 14 | 15 | private Connection connection; 16 | private PreparedStatement statement; 17 | 18 | // 1,初始化 19 | @Override 20 | public void open(Configuration parameters) throws Exception { 21 | super.open(parameters); 22 | Class.forName("oracle.jdbc.OracleDriver"); 23 | connection = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:test", "root", "123456"); 24 | } 25 | 26 | // 2,执行 27 | @Override 28 | public void invoke(Tuple3 objectNode, Context context) throws Exception { 29 | log.info("---------------------->", connection); 30 | String sql = "INSERT INTO \"FLINK\".\"t_word_counts\"(\"id\", \"word\", \"uptime\") values (?,?,?) " ; 31 | PreparedStatement ps = connection.prepareStatement(sql); 32 | log.info("sql--------------------->", sql); 33 | ps.setString(1, objectNode.f0); 34 | ps.setString(2, objectNode.f1); 35 | ps.setString(3, objectNode.f2); 36 | ps.executeUpdate(); 37 | } 38 | 39 | // 3,关闭 40 | @Override 41 | public void close() throws Exception { 42 | super.close(); 43 | if (statement != null) 44 | statement.close(); 45 | if (connection != null) 46 | connection.close(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/batch/TableWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.batch; 2 | 3 | import cn.northpark.flink.WordCount; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.DataSource; 7 | import org.apache.flink.table.api.Table; 8 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment; 9 | 10 | /** 11 | * @author bruce 12 | * 利用table api进行离线计算 13 | */ 14 | public class TableWordCount { 15 | public static void main(String[] args) throws Exception { 16 | 17 | //dataSet api 18 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 19 | 20 | //实时Table执行上下文 21 | BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); 22 | 23 | //模拟数据 24 | DataSource wordCountDataSource = env.fromElements(new WordCount("java", 1), 25 | new WordCount("scala", 1), 26 | new WordCount("java", 1), 27 | new WordCount("java", 1), 28 | new WordCount("flink", 1), 29 | new WordCount("flink", 1), 30 | new WordCount("vue", 1) 31 | 32 | 33 | ); 34 | 35 | //将dataSet注册成表 36 | Table table = tableEnv.fromDataSet(wordCountDataSource); 37 | 38 | System.out.printf("schema---", table.getSchema()); 39 | 40 | Table table2 = table 41 | .groupBy("word") 42 | .select("word, counts.sum as counts") 43 | .filter("counts >=2 ") 44 | .orderBy("counts.desc"); 45 | 46 | //把表转换成dataSet 47 | DataSet rowDataSet = tableEnv.toDataSet(table2, WordCount.class); 48 | 49 | rowDataSet.print(); 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/FlinkUtilsV1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.util; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 7 | 8 | import java.util.Properties; 9 | 10 | /** 11 | * 执行程序的参数 12 | * --topics flink000 --group.id bruce --bootstrap.servers node1:9092 13 | */ 14 | public class FlinkUtilsV1 { 15 | 16 | public static final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | /** 19 | * 从kafka读取数据 20 | * 21 | * @param args 22 | * @param simpleStringSchema 23 | * @return 24 | */ 25 | public static DataStream createKafkaStream(String[] args, SimpleStringSchema simpleStringSchema) { 26 | 27 | String topic = args[0]; 28 | String groupId = args[1]; 29 | String brokerList = args[2]; 30 | Properties props = new Properties(); 31 | //指定Ka fka的Broker地址 32 | props.setProperty("bootstrap.servers", brokerList); 33 | //指定组ID 34 | props.setProperty("group.id", groupId); 35 | //如果没有记录偏移量,第一次从最开始消费 36 | props.setProperty("auto.offset.reset", "earliest"); 37 | //kafka的消费者不自动提交偏移量 38 | //props . setProperty("enable.auto. commit", "false"); 39 | //KafkaSource 40 | FlinkKafkaConsumer kafkaSource = new FlinkKafkaConsumer<>( 41 | topic, 42 | new SimpleStringSchema(), 43 | props); 44 | 45 | 46 | return env.addSource(kafkaSource); 47 | } 48 | 49 | public static StreamExecutionEnvironment getEnv() { 50 | return env; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project2/OperatorState1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project2; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | public class OperatorState1 { 12 | public static void main(String[] args) throws Exception { 13 | 14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | 16 | env.enableCheckpointing(5000); 17 | 18 | env.setStateBackend(new FsStateBackend("file:////Users/bruce/Documents/workspace/np-flink/backEnd")); 19 | 20 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000)); 21 | 22 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 23 | 24 | 25 | DataStreamSource> lines = env.addSource(new NP_ExactlyOnceParallelismFileSource("/Users/bruce/Desktop/data")); 26 | 27 | lines.print(); 28 | 29 | 30 | DataStreamSource socketTextStream = env.socketTextStream("localhost", 4000); 31 | 32 | socketTextStream.map(new MapFunction() { 33 | @Override 34 | public Object map(String value) throws Exception { 35 | if(value.startsWith("jeyy")){ 36 | System.out.println(1/0); 37 | } 38 | return value; 39 | } 40 | }); 41 | 42 | env.execute("OperatorState1"); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/exactly/transactionway/FlinkKafkaPrint.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.exactly.transactionway; 2 | 3 | import cn.northpark.flink.util.FlinkUtils; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple3; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.util.Collector; 11 | import org.apache.flink.util.StringUtils; 12 | 13 | import java.io.InputStream; 14 | import java.time.LocalDateTime; 15 | import java.util.UUID; 16 | 17 | /*** 18 | * Flink从kafka读取数据写入Oracle 并且实现exactly once 19 | * @author bruce 20 | */ 21 | public class FlinkKafkaPrint { 22 | 23 | public static void main(String[] args) throws Exception{ 24 | 25 | InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties"); 26 | 27 | ParameterTool parameters = ParameterTool.fromPropertiesFile(is); 28 | 29 | DataStream kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 30 | 31 | 32 | SingleOutputStreamOperator> words = kafkaStream.flatMap(new FlatMapFunction>() { 33 | @Override 34 | public void flatMap(String value, Collector> out) throws Exception { 35 | if (!StringUtils.isNullOrWhitespaceOnly(value)) { 36 | 37 | out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString())); 38 | } 39 | } 40 | }); 41 | 42 | words.print(); 43 | 44 | FlinkUtils.getEnv().execute("FlinkKafkaPrint"); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/syncIO/function/NP_MySqlSinkFunction.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project.syncIO.function; 2 | 3 | import cn.northpark.flink.project.ActivityBean; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 6 | 7 | import java.sql.Connection; 8 | import java.sql.DriverManager; 9 | import java.sql.PreparedStatement; 10 | 11 | public class NP_MySqlSinkFunction extends RichSinkFunction { 12 | 13 | private Connection connection = null; 14 | 15 | @Override 16 | public void open(Configuration parameters) throws Exception { 17 | super.open(parameters); 18 | connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink?characterEncoding=UTF-8","root","123456"); 19 | 20 | } 21 | 22 | @Override 23 | public void invoke(ActivityBean bean, Context context) throws Exception { 24 | 25 | //插入或者更新 26 | //INSERT INTO t_activity_counts (id, event, counts) VALUES (?, ?, ?) 27 | // ON DUPLICATE KEY UPDATE counts = ? 28 | 29 | PreparedStatement preparedStatement = null; 30 | try{ 31 | 32 | preparedStatement = connection.prepareStatement(" INSERT INTO t_activity_counts (id, event, counts) VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE counts = ? "); 33 | preparedStatement.setString(1,bean.aid); 34 | preparedStatement.setInt(2,bean.eventType); 35 | preparedStatement.setInt(3,bean.counts); 36 | preparedStatement.setInt(4,bean.counts); 37 | 38 | preparedStatement.executeUpdate(); 39 | }finally { 40 | if(preparedStatement!=null){ 41 | preparedStatement.close(); 42 | } 43 | } 44 | 45 | 46 | 47 | } 48 | 49 | @Override 50 | public void close() throws Exception { 51 | super.close(); 52 | connection.close(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/CountWindow.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 12 | 13 | /** 14 | * 先分组,每个组达到一定数目才会被触发窗口 15 | * @author bruce 16 | */ 17 | public class CountWindow { 18 | public static void main(String[] args) throws Exception { 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | DataStreamSource source = env.socketTextStream("localhost", 4000); 22 | 23 | //spark 1 24 | //spark 2 25 | //java 3 26 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 27 | @Override 28 | public Tuple2 map(String value) throws Exception { 29 | String[] lines = value.split(" "); 30 | return Tuple2.of(lines[0],Integer.parseInt(lines[1])); 31 | } 32 | }); 33 | 34 | //先分组 35 | KeyedStream, Tuple> keyed = map.keyBy(0); 36 | 37 | //按照分组后分窗口 38 | WindowedStream, Tuple, GlobalWindow> window = keyed.countWindow(5); 39 | 40 | 41 | SingleOutputStreamOperator> summed = window.sum(1); 42 | 43 | summed.print(); 44 | 45 | env.execute("CountWindow"); 46 | 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/RestartStrategies1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | /** 11 | * @author zhangyang 12 | * 重启策略 13 | */ 14 | public class RestartStrategies1 { 15 | 16 | public static void main(String[] args) throws Exception { 17 | // step1 :获取执行环境 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | 20 | //只有开启了checkpoint 才会有重启策略 默认是不重启 21 | env.enableCheckpointing(5000);//每隔5s进行一次checkpoint 22 | //默认的重启策略是无限重启 Integer.MAX_VALUE 次 23 | 24 | //重启重试次数 25 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000)); 26 | 27 | 28 | // step2:读取数据 29 | DataStreamSource text = env.socketTextStream("localhost", 4000); 30 | 31 | 32 | //把单词和1拼一块 33 | SingleOutputStreamOperator> wordAndOne = text.map(new MapFunction>() { 34 | @Override 35 | public Tuple2 map(String value) throws Exception { 36 | if(value.startsWith("jeyy")){ 37 | throw new RuntimeException("jeyy来了,发生异常!!"); 38 | } 39 | return Tuple2.of(value, 1); 40 | } 41 | }); 42 | 43 | //分组、累加 44 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 45 | 46 | 47 | //sink 48 | sumed.print(); 49 | 50 | //execute 51 | env.execute("RestartStrategies1"); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/KafkaWordCountSQL.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.stream.sql; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 7 | import org.apache.flink.table.descriptors.Json; 8 | import org.apache.flink.table.descriptors.Kafka; 9 | import org.apache.flink.table.descriptors.Schema; 10 | import org.apache.flink.types.Row; 11 | 12 | /** 13 | * 读取kafka数据 利用sql api来查询 14 | */ 15 | public class KafkaWordCountSQL { 16 | public static void main(String[] args) throws Exception { 17 | 18 | //实时dataStream api 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | 22 | //实时Table执行上下文 23 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 24 | 25 | 26 | tableEnv.connect(new Kafka() 27 | .version("universal") 28 | .topic("bruce") 29 | .startFromEarliest() 30 | .property("bootstrap.servers","localhost:9092") 31 | 32 | ).withFormat(new Json().deriveSchema()) 33 | .withSchema(new Schema() 34 | .field("name", TypeInformation.of(String.class)) 35 | .field("gender",TypeInformation.of(String.class)) 36 | ).inAppendMode().createTemporaryTable("kafkaSource"); 37 | 38 | 39 | 40 | //这里是table api的实现写法 41 | Table table = tableEnv.scan("kafkaSource") 42 | .groupBy("gender") 43 | .select("gender ,count(1) as counts"); 44 | 45 | tableEnv.toAppendStream(table, Row.class).print(); 46 | // tableEnv.toRetractStream(table,Row.class).print(); 47 | env.execute("KafkaWordCountSQL"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/oracle/FlinkKafkaSink.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.oracle; 2 | 3 | import cn.northpark.flink.exactly.transactionway.FlinkKafkaToMysql; 4 | import cn.northpark.flink.util.FlinkUtils; 5 | import org.apache.flink.api.common.functions.FlatMapFunction; 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 | import org.apache.flink.api.java.tuple.Tuple3; 8 | import org.apache.flink.api.java.utils.ParameterTool; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.util.Collector; 12 | import org.apache.flink.util.StringUtils; 13 | 14 | import java.io.InputStream; 15 | import java.time.LocalDateTime; 16 | import java.util.UUID; 17 | 18 | /*** 19 | * @author bruce 20 | */ 21 | public class FlinkKafkaSink { 22 | 23 | public static void main(String[] args) throws Exception{ 24 | 25 | InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties"); 26 | 27 | ParameterTool parameters = ParameterTool.fromPropertiesFile(is); 28 | 29 | DataStream kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 30 | 31 | 32 | SingleOutputStreamOperator> words = kafkaStream.flatMap(new FlatMapFunction>() { 33 | @Override 34 | public void flatMap(String value, Collector> out) throws Exception { 35 | if (!StringUtils.isNullOrWhitespaceOnly(value)) { 36 | 37 | out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString())); 38 | } 39 | } 40 | }); 41 | 42 | words.print(); 43 | 44 | words.addSink(new SinkOracle()); 45 | 46 | FlinkUtils.getEnv().execute("FlinkKafkaPrint"); 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/function/MysqlToActivityBeanFunciton.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project.function; 2 | 3 | 4 | import cn.northpark.flink.project.ActivityBean; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.configuration.Configuration; 7 | 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.PreparedStatement; 11 | import java.sql.ResultSet; 12 | 13 | /** 14 | */ 15 | public class MysqlToActivityBeanFunciton extends RichMapFunction { 16 | 17 | 18 | private transient Connection connection = null; 19 | 20 | @Override 21 | public void open(Configuration parameters) throws Exception { 22 | super.open(parameters); 23 | connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/flink?characterEncoding=UTF-8","root","123456"); 24 | } 25 | 26 | 27 | @Override 28 | public ActivityBean map(String line) throws Exception { 29 | String[] fields = line.split( ","); 30 | //u001,A1,2019-09-02 10:10:11,1 ,北京市 31 | 32 | PreparedStatement preparedStatement = connection.prepareStatement("SELECT NAME FROM T_ACTIVITY WHERE ID = ?"); 33 | 34 | String uid = fields[0] ; 35 | String aid = fields[1] ; 36 | String time = fields[2] ; 37 | int eventType = Integer.parseInt(fields[3]) ; 38 | String province = fields[4] ; 39 | String activityName = null; 40 | preparedStatement.setString(1,aid); 41 | ResultSet resultSet = preparedStatement.executeQuery(); 42 | while (resultSet.next()){ 43 | activityName = resultSet.getString(1); 44 | } 45 | 46 | preparedStatement.close(); 47 | 48 | 49 | return ActivityBean.of(uid,aid,activityName,time,eventType,province); 50 | } 51 | 52 | @Override 53 | public void close() throws Exception { 54 | super.close(); 55 | connection.close(); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/InspectSitemap.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project; 2 | 3 | import cn.northpark.flink.util.HttpGetUtils; 4 | import org.apache.commons.lang.StringUtils; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.api.java.ExecutionEnvironment; 7 | import org.apache.flink.api.java.operators.DataSource; 8 | import org.apache.flink.api.java.operators.MapOperator; 9 | import org.apache.flink.configuration.Configuration; 10 | import org.apache.http.HttpResponse; 11 | import org.apache.http.client.config.RequestConfig; 12 | import org.apache.http.client.methods.HttpGet; 13 | import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; 14 | import org.apache.http.impl.nio.client.HttpAsyncClients; 15 | import org.apache.http.util.EntityUtils; 16 | 17 | import java.util.concurrent.CompletableFuture; 18 | import java.util.concurrent.Future; 19 | import java.util.function.Supplier; 20 | 21 | /** 22 | * @author bruce 23 | * @date 2022年10月08日 13:48:34 24 | */ 25 | public class InspectSitemap { 26 | public static void main(String[] args) throws Exception{ 27 | 28 | //dataSet api 29 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 30 | 31 | //读取数据 32 | DataSource sitemap_lines = env.readTextFile("C:\\Users\\Bruce\\Downloads\\soft.txt"); 33 | 34 | MapOperator bad_url = sitemap_lines.map(new RichMapFunction() { 35 | 36 | @Override 37 | public Object map(String value) throws Exception { 38 | 39 | 40 | String dataResult = HttpGetUtils.getDataResult(value); 41 | 42 | if (StringUtils.isBlank(dataResult)) { 43 | return value; 44 | } 45 | 46 | return null; 47 | } 48 | 49 | }); 50 | 51 | bad_url.writeAsText("C:\\Users\\Bruce\\Downloads\\bad_req.txt"); 52 | 53 | env.execute("aa"); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/batch/BatchSQLWordCountQueryCommon.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.batch; 2 | 3 | import cn.northpark.flink.WordCount; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.DataSource; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.table.api.Table; 9 | import org.apache.flink.table.api.bridge.java.BatchTableEnvironment; 10 | import org.apache.flink.types.Row; 11 | 12 | /** 13 | * @author bruce 14 | * 利用sql api进行离线查询 15 | * 从配置文件读取参数匹配 16 | * 适合不会编程的人员调用 17 | */ 18 | public class BatchSQLWordCountQueryCommon { 19 | public static void main(String[] args) throws Exception { 20 | 21 | ParameterTool parameters = ParameterTool.fromPropertiesFile("/Users/bruce/Documents/workspace/np-flink/src/main/resources/sqlcfg.properties"); 22 | 23 | //dataSet api 24 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | //实时Table执行上下文 27 | BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env); 28 | 29 | //模拟数据 30 | DataSource wordCountDataSource = env.fromElements( 31 | new WordCount("java", 1), 32 | new WordCount("scala", 1), 33 | new WordCount("java", 1), 34 | new WordCount("java", 1), 35 | new WordCount("flink", 1), 36 | new WordCount("flink", 1), 37 | new WordCount("vue", 1) 38 | 39 | 40 | ); 41 | 42 | 43 | //将dataSet注册成表,指定字段名称 44 | tableEnv.registerDataSet(parameters.getRequired("table"),wordCountDataSource,parameters.getRequired("columns")); 45 | 46 | Table table = tableEnv.sqlQuery(parameters.getRequired("sql")); 47 | 48 | //把表转换成dataSet 49 | DataSet rowDataSet = tableEnv.toDataSet(table, Row.class); 50 | 51 | rowDataSet.print(); 52 | 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project/MysqlActivityNameApplication.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project; 2 | 3 | import cn.northpark.flink.project.function.MysqlToActivityBeanFunciton; 4 | import cn.northpark.flink.util.FlinkUtilsV1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | 9 | /** 10 | * 11 | * 从kafka或者其他源读数据 关联mysql查询信息,返回Bean 12 | * 13 | * u001,A1,2019-09-0210:10:11,1,北京市 14 | * u002,A1,2019-09-0210:11:11,1,辽宁省 15 | * u001,A1,2019-09-0210:11:11,2,北京市 16 | * u001,A1,2019-09-0210:11:30,3,北京市 17 | * u002,A1,2019-09-0210:12:11,2,辽宁省 18 | * u003,A2,2019-09-0210:13:11,1,山东省 19 | * u003,A22019-09-0210:13:20,2,山东省 20 | * u003,A2,2019-09-0210:14:20,3,山东省 21 | * u004,A1,2019-09-0210:15:20,1,北京市 22 | * u004,A1,2019-09-0210:15:20,2,北京市 23 | * u005,A1,2019-09-0210:15:20,1,河北省 24 | * u001,A22019-09-0210:16:11,1,北京市 25 | * u001,A2,2019-09-0210:16:11,2,北京市 26 | * u002,A1,2019-09-0210:18:11,2,辽宁省 27 | * u002,A1,2019-09-0210:19:11,3,辽宁省 28 | * * * 29 | * * * 30 | * * * id name last_update 31 | * * * A1 新人礼包 2019-10-15 11:36:36 32 | * * * A2 月末活动 2019-10-15 16:37:42 33 | * * * A3 周末活动 2019-10-15 11:44:23 34 | * * * A4 年度促销 2019-10-15 11:44:23 35 | * 36 | * 37 | * 38 | * 希望的得到的数据: 39 | * u001新人礼包,2019-09-0210:10:11, 1,北京市 40 | * u002 ,新人礼包,2019-09-0210:11:11, 1 ,辽宁省 41 | * 42 | */ 43 | public class MysqlActivityNameApplication { 44 | 45 | public static void main(String[] args) throws Exception { 46 | 47 | DataStream lines = FlinkUtilsV1.createKafkaStream(args,new SimpleStringSchema()); 48 | 49 | SingleOutputStreamOperator beans = lines.map(new MysqlToActivityBeanFunciton()); 50 | 51 | beans.print(); 52 | 53 | FlinkUtilsV1.getEnv().execute("HandleActivityNameApplication"); 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/AddMony.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase; 2 | 3 | import cn.northpark.flink.util.PhoenixUtilV2; 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations; 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType; 6 | 7 | import java.util.UUID; 8 | 9 | /** 10 | * @author bruce 11 | * @date 2022年06月26日 10:03:33 12 | */ 13 | public class AddMony { 14 | public static void main(String[] args) { 15 | 16 | String uid = UUID.randomUUID().toString().replace("-",""); 17 | //添加1对多的转发关系 18 | // A -B1 19 | // -B2 20 | // -B3 21 | for (int i = 0; i < 20; i++) { 22 | 23 | String rel_uid_ = UUID.randomUUID().toString().replace("-",""); 24 | WeiboRelations bean_by_ = new WeiboRelations(uid, RelType.TRANS_LINK,rel_uid_,1); 25 | addOne(bean_by_); 26 | } 27 | } 28 | 29 | /** 30 | * 添加一条转发/评论关系 31 | * @param bean_by 32 | */ 33 | private static void addOne(WeiboRelations bean_by) { 34 | //根据被转发关系构造一条主动转发关系 35 | WeiboRelations bean_self = new WeiboRelations(bean_by.getRel_user_id(), bean_by.getRel_type(), bean_by.getUser_id(),bean_by.getBy_type()==1?0:1); 36 | 37 | //分别插入2条数据 38 | 39 | String insert_rel_sql = "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " + 40 | " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_by.getUser_id()+"' ,'"+ bean_by.getRel_type()+"','"+ bean_by.getRel_user_id()+"',"+ bean_by.getBy_type()+" )"; 41 | 42 | 43 | String insert_rel_sql2 = "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " + 44 | " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_self.getUser_id()+"' ,'"+bean_self.getRel_type()+"','"+bean_self.getRel_user_id()+"',"+bean_self.getBy_type()+" )"; 45 | 46 | 47 | PhoenixUtilV2.insertData(insert_rel_sql); 48 | PhoenixUtilV2.insertData(insert_rel_sql2); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/udf/UserBrowseLog.java: -------------------------------------------------------------------------------- 1 | 2 | package cn.northpark.flink.table_sql_api.stream.sql.udf; 3 | 4 | import com.google.gson.annotations.Expose; 5 | 6 | import javax.annotation.Generated; 7 | 8 | @Generated("net.hexar.json2pojo") 9 | @SuppressWarnings("unused") 10 | public class UserBrowseLog { 11 | 12 | @Expose 13 | private String eventTime; 14 | @Expose 15 | private String eventType; 16 | @Expose 17 | private String productID; 18 | @Expose 19 | private int productPrice; 20 | @Expose 21 | private String userID; 22 | @Expose 23 | private Long eventTimeTimestamp; 24 | 25 | 26 | private String orderID; 27 | 28 | public String getEventTime() { 29 | return eventTime; 30 | } 31 | 32 | public void setEventTime(String eventTime) { 33 | this.eventTime = eventTime; 34 | } 35 | 36 | public String getEventType() { 37 | return eventType; 38 | } 39 | 40 | public void setEventType(String eventType) { 41 | this.eventType = eventType; 42 | } 43 | 44 | public String getProductID() { 45 | return productID; 46 | } 47 | 48 | public void setProductID(String productID) { 49 | this.productID = productID; 50 | } 51 | 52 | public int getProductPrice() { 53 | return productPrice; 54 | } 55 | 56 | public void setProductPrice(int productPrice) { 57 | this.productPrice = productPrice; 58 | } 59 | 60 | public String getUserID() { 61 | return userID; 62 | } 63 | 64 | public void setUserID(String userID) { 65 | this.userID = userID; 66 | } 67 | 68 | public Long getEventTimeTimestamp() { 69 | return eventTimeTimestamp; 70 | } 71 | 72 | public void setEventTimeTimestamp(Long eventTimeTimestamp) { 73 | this.eventTimeTimestamp = eventTimeTimestamp; 74 | } 75 | 76 | public String getOrderID() { 77 | return orderID; 78 | } 79 | 80 | public void setOrderID(String orderID) { 81 | this.orderID = orderID; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCountParam.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @author zhangyang 13 | * 使用Java API来开发Flink的实时处理应用程序. 14 | * wc统计的数据我们源自于socket 15 | * 端口从参数传入 16 | */ 17 | public class StreamingWordCountParam { 18 | 19 | 20 | public static void main(String[] args) throws Exception { 21 | 22 | 23 | // 获取参数 24 | int port = 0; 25 | 26 | try { 27 | ParameterTool tool = ParameterTool.fromArgs(args); 28 | port = tool.getInt("port"); 29 | } catch (Exception e) { 30 | System.err.println("端口未设置,使用默认端口9999"); 31 | port = 9999; 32 | } 33 | 34 | 35 | // step1 :获取执行环境 36 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 37 | 38 | 39 | // step2:读取数据 40 | DataStreamSource text = env.socketTextStream("localhost", port); 41 | 42 | 43 | // step3: transform 44 | text.flatMap(new FlatMapFunction>() { 45 | @Override 46 | public void flatMap(String value, Collector> collector) throws Exception { 47 | String[] tokens = value.toLowerCase().split(","); 48 | for(String token : tokens) { 49 | if(token.length() > 0) { 50 | collector.collect(new Tuple2(token,1)); 51 | } 52 | } 53 | } 54 | }).keyBy(0).timeWindow(Time.seconds(5)).sum(1).print().setParallelism(1); 55 | 56 | 57 | //step4 执行函数 58 | env.execute("StreamingWCJavaApp"); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @author zhangyang 13 | * 按照步骤来一步步拆分Task是如何划分的 14 | * wc统计的数据我们源自于socket 15 | */ 16 | public class StreamingWordCount { 17 | 18 | public static void main(String[] args) throws Exception { 19 | // step1 :获取执行环境 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | 22 | // step2:读取数据 23 | DataStreamSource text = env.socketTextStream("localhost", 4000); 24 | 25 | env.setParallelism(2); 26 | 27 | // 拆词 28 | SingleOutputStreamOperator words = text.flatMap(new FlatMapFunction() { 29 | @Override 30 | public void flatMap(String value, Collector out) throws Exception { 31 | String[] words = value.split(" "); 32 | for (String word : words) { 33 | out.collect(word); 34 | } 35 | } 36 | }); 37 | 38 | //把单词和1拼一块 39 | SingleOutputStreamOperator> wordAndOne = words.map(new MapFunction>() { 40 | @Override 41 | public Tuple2 map(String value) throws Exception { 42 | return Tuple2.of(value, 1); 43 | } 44 | }); 45 | 46 | //分组、累加 47 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 48 | 49 | 50 | //sink 51 | sumed.print().setParallelism(2); 52 | 53 | //execute 54 | env.execute("StreamingWordCount"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/DruidUtils.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.util; 2 | 3 | import com.alibaba.druid.pool.DruidDataSourceFactory; 4 | import lombok.extern.slf4j.Slf4j; 5 | 6 | import javax.sql.DataSource; 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | import java.sql.Connection; 10 | import java.sql.SQLException; 11 | import java.util.Properties; 12 | 13 | 14 | /** 15 | * @author zhangyang 16 | * Druid数据库连接池工具类的设计 17 | */ 18 | @Slf4j 19 | public class DruidUtils { 20 | /** 21 | * 默认配置文件名 22 | */ 23 | private transient static String confile = "druid.properties"; 24 | /** 25 | * 配置文件 26 | */ 27 | private transient static Properties p = null; 28 | /** 29 | * 唯一dateSource,保证全局只有一个数据库连接池 30 | */ 31 | private transient static DataSource dataSource = null; 32 | 33 | 34 | static { 35 | p = new Properties(); 36 | InputStream inputStream = null; 37 | try { 38 | // java应用 读取配置文件 39 | inputStream = DruidUtils.class.getClassLoader().getResourceAsStream(confile); 40 | p.load(inputStream); 41 | } catch (Exception e) { 42 | e.printStackTrace(); 43 | } finally { 44 | try { 45 | if (inputStream != null) { 46 | inputStream.close(); 47 | } 48 | } catch (IOException e) { 49 | // ignore 50 | } 51 | } // end finally 52 | 53 | try { 54 | //通过工厂类获取DataSource对象 55 | dataSource = DruidDataSourceFactory.createDataSource(p); 56 | } catch (Exception e) { 57 | log.error("获取连接异常 ", e); 58 | } 59 | 60 | } // end static 61 | 62 | private DruidUtils() { 63 | } 64 | 65 | /** 66 | * 获取连接 67 | * 68 | * @return 69 | */ 70 | public static Connection getConnection() throws SQLException { 71 | return dataSource.getConnection(); 72 | 73 | } 74 | 75 | 76 | /** 77 | * 关闭连接 78 | * 79 | * @param con 80 | * @date : 2017-10-16 10:08:10 81 | */ 82 | 83 | } 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/exactly/transactionway/FlinkKafkaToOracle.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.exactly.transactionway; 2 | 3 | import cn.northpark.flink.util.FlinkUtils; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple3; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.util.Collector; 11 | import org.apache.flink.util.StringUtils; 12 | 13 | import java.io.InputStream; 14 | import java.time.LocalDateTime; 15 | import java.util.UUID; 16 | 17 | /*** 18 | * Flink从kafka读取数据写入Oracle 并且实现exactly once 19 | * @author bruce 20 | */ 21 | public class FlinkKafkaToOracle { 22 | 23 | public static void main(String[] args) throws Exception{ 24 | 25 | InputStream is = FlinkKafkaToMysql.class.getClassLoader().getResourceAsStream("config.properties"); 26 | 27 | ParameterTool parameters = ParameterTool.fromPropertiesFile(is); 28 | 29 | DataStream kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 30 | 31 | 32 | SingleOutputStreamOperator> words = kafkaStream.flatMap(new FlatMapFunction>() { 33 | @Override 34 | public void flatMap(String value, Collector> out) throws Exception { 35 | if (!StringUtils.isNullOrWhitespaceOnly(value)) { 36 | 37 | // if ("AAA".equalsIgnoreCase(value)) { 38 | // System.out.println(1 / 0); 39 | // } 40 | 41 | out.collect(Tuple3.of(UUID.randomUUID().toString(), value, LocalDateTime.now().toString())); 42 | } 43 | } 44 | }); 45 | 46 | words.print(); 47 | 48 | 49 | words.addSink(new OracleTwoPhaseCommitSink()); 50 | 51 | 52 | FlinkUtils.getEnv().execute("FlinkKafkaToOracle"); 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/util/RabbitMQConFactory.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.util; 2 | 3 | import com.rabbitmq.client.Address; 4 | import com.rabbitmq.client.Connection; 5 | import com.rabbitmq.client.ConnectionFactory; 6 | 7 | import java.util.*; 8 | 9 | /** 10 | * 消息连接工厂,支持集群自动重连 11 | * 12 | * @author bruce 13 | * 14 | */ 15 | public class RabbitMQConFactory { 16 | 17 | /** 18 | * 缓存连接工厂,将建立的链接放入map缓存,为每个Storm的spout都建立独立的连接,其他用通用的。 19 | */ 20 | private static Map connectionMap = new HashMap(); 21 | 22 | private static ConnectionFactory factory=new ConnectionFactory(); 23 | private static List
addrs=new ArrayList
(); 24 | static { 25 | 26 | ResourceBundle bundle = ResourceBundle.getBundle("config"); 27 | if(bundle==null){ 28 | throw new IllegalArgumentException("找不到config.properties!"); 29 | } 30 | factory.setAutomaticRecoveryEnabled(true); 31 | factory.setUsername(bundle.getString("mq.user")); 32 | factory.setPassword(bundle.getString("mq.pass")); 33 | String address=bundle.getString("mq.host"); 34 | int port=Integer.parseInt(bundle.getString("mq.port")); 35 | 36 | Address address1= new Address(address,port); 37 | addrs.add(address1); 38 | 39 | // String []addressArray=addresses.split(","); 40 | // for(int i=0;i source = env.socketTextStream("localhost", 4000); 24 | 25 | //spark 1 26 | //spark 2 27 | //java 3 28 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 29 | @Override 30 | public Tuple2 map(String value) throws Exception { 31 | String[] lines = value.split(" "); 32 | return Tuple2.of(lines[0],Integer.parseInt(lines[1])); 33 | } 34 | }); 35 | 36 | //先分组 37 | KeyedStream, Tuple> keyed = map.keyBy(0); 38 | 39 | //按照分组后分窗口 40 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 41 | WindowedStream, Tuple, TimeWindow> window = keyed.window(SlidingProcessingTimeWindows.of(Time.seconds(10),Time.seconds(5))); 42 | 43 | 44 | SingleOutputStreamOperator> summed = window.sum(1); 45 | 46 | summed.print(); 47 | 48 | env.execute("SlidingWindow"); 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/SessionWindow.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 14 | 15 | /** 16 | * Session窗口 :以2条数据的时间差来划分窗口,时间差>n,则触发窗口 17 | * @author bruce 18 | */ 19 | public class SessionWindow { 20 | public static void main(String[] args) throws Exception { 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | DataStreamSource source = env.socketTextStream("localhost", 4000); 24 | 25 | //spark 1 26 | //spark 2 27 | //java 3 28 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 29 | @Override 30 | public Tuple2 map(String value) throws Exception { 31 | String[] lines = value.split(" "); 32 | return Tuple2.of(lines[0],Integer.parseInt(lines[1])); 33 | } 34 | }); 35 | 36 | //先分组 37 | KeyedStream, Tuple> keyed = map.keyBy(0); 38 | 39 | //按照分组后分窗口 40 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 41 | WindowedStream, Tuple, TimeWindow> window = keyed.window(ProcessingTimeSessionWindows.withGap( Time.seconds(5))); 42 | 43 | 44 | SingleOutputStreamOperator> summed = window.sum(1); 45 | 46 | summed.print(); 47 | 48 | env.execute("SessionWindow"); 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/TumblingWindow.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 14 | 15 | /** 16 | * 滚动窗口--先分组,达到N秒 划分窗口,窗口内的所有key组都会被执行 17 | * @author bruce 18 | */ 19 | public class TumblingWindow { 20 | public static void main(String[] args) throws Exception { 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | DataStreamSource source = env.socketTextStream("localhost", 4000); 24 | 25 | //spark 1 26 | //spark 2 27 | //java 3 28 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 29 | @Override 30 | public Tuple2 map(String value) throws Exception { 31 | String[] lines = value.split(" "); 32 | return Tuple2.of(lines[0],Integer.parseInt(lines[1])); 33 | } 34 | }); 35 | 36 | //先分组 37 | KeyedStream, Tuple> keyed = map.keyBy(0); 38 | 39 | //按照分组后分窗口 40 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.of(5, TimeUnit.SECONDS)); 41 | WindowedStream, Tuple, TimeWindow> window = keyed.window(TumblingProcessingTimeWindows.of(Time.seconds(5))); 42 | 43 | 44 | SingleOutputStreamOperator> summed = window.sum(1); 45 | 46 | summed.print(); 47 | 48 | env.execute("TumblingWindow"); 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/GlobalEntity.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | //定义车辆监控基本信息 4 | case class MonitorCarInfo(areaId:String,roadId:String,monitorId:String,cameraId:String,actionTime:Long,car:String,speed:Double) 5 | //定义车辆监控基本信息 + 限速信息 6 | case class NewMonitorCarInfo(areaId:String,roadId:String,monitorId:String,cameraId:String,actionTime:Long,car:String,speed:Double,speedLimit:Double) 7 | 8 | //定义车辆限速的信息 9 | case class MonitorLimitSpeedInfo(areaId:String,roadId:String,monitorId:String,limitSpeed:Double) 10 | 11 | //定义超速车辆的信息 12 | case class OverSpeedCarInfo(car:String,monitorId:String,roadId:String,realSpeed:Double,limitSpeed:Double,actionTime:Long) 13 | 14 | //定义卡扣平均速度信息 15 | case class MonitorAvgSpeedInfo(windowStartTime:String,windowEndTime:String,monitorId:String,avgSpeed:Double,carCount:Long) 16 | 17 | //定义最通畅的top5 卡扣信息 18 | case class Top5MonitorInfo(windowStartTime:String,windowEndTime:String,monitorId:String,hightSpeedCarCount:Long,middleSpeedCount:Long,normalSpeedCarCount:Long,lowSpeedCarCount:Long) 19 | 20 | //定义卡扣通过车辆数的统计对象 21 | case class MonitorSpeedClsCount(xhightSpeedCarCount:Long,xmiddleSpeedCount:Long,xnormalSpeedCarCount:Long,xlowSpeedCarCount:Long) extends Ordered[MonitorSpeedClsCount]{ 22 | var hightSpeedCarCount = xhightSpeedCarCount 23 | var middleSpeedCount = xmiddleSpeedCount 24 | var normalSpeedCarCount = xnormalSpeedCarCount 25 | var lowSpeedCarCount = xlowSpeedCarCount 26 | 27 | override def compare(that: MonitorSpeedClsCount): Int = { 28 | //先比较 高速 29 | if(this.hightSpeedCarCount != that.hightSpeedCarCount){ 30 | (this.hightSpeedCarCount - that.hightSpeedCarCount).toInt 31 | }else if(this.middleSpeedCount != that.middleSpeedCount){ 32 | (this.middleSpeedCount - that.middleSpeedCount).toInt 33 | }else if(this.normalSpeedCarCount != that.normalSpeedCarCount){ 34 | (this.normalSpeedCarCount - that.normalSpeedCarCount).toInt 35 | }else{ 36 | (this.lowSpeedCarCount - that.lowSpeedCarCount).toInt 37 | } 38 | } 39 | } 40 | 41 | //定义违法车辆信息 42 | case class ViolationCarInfo(car:String,violation:String,createTime:String,detail:String) 43 | 44 | //定义出警信息 45 | case class PoliceInfo(policeId:String,car:String,policeTime:Long,policeState:String) 46 | 47 | 48 | 49 | object GlobalEntity { 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/KafkaSourceV2.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 7 | import org.apache.flink.streaming.api.CheckpointingMode; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 12 | 13 | import java.util.Properties; 14 | 15 | /** 16 | * @author zhangyang 17 | * 使用Kafka作为数据源读取数据 exactly once 18 | */ 19 | public class KafkaSourceV2 { 20 | 21 | public static void main(String[] args) throws Exception { 22 | 23 | //1.环境 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | 27 | env.enableCheckpointing(5000); 28 | 29 | env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/backEnd")); 30 | 31 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 32 | 33 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,2000)); 34 | 35 | //精准一次 36 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 37 | 38 | Properties props = new Properties(); 39 | 40 | //指定Kafka的Broker地址 41 | props.setProperty( "bootstrap.servers", "localhost:9092"); 42 | //指定组ID 43 | props.setProperty("group.id", "bruce"); 44 | //如果没有记录偏移量,第一次从最开始消费 45 | props.setProperty("auto.offset.reset", "earliest") ; 46 | //kafka的消费者不自动提交偏移量 47 | props.setProperty("enable.auto.commit", "false"); 48 | 49 | //2.read 50 | FlinkKafkaConsumer kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props); 51 | 52 | 53 | DataStream lines = env.addSource(kafkaSource); 54 | 55 | //3.sink/transform 56 | lines.print(); 57 | 58 | //4.execute 59 | env.execute("KafkaSourceV2"); 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StateBackend2.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | 12 | public class StateBackend2 { 13 | public static void main(String[] args) throws Exception{ 14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | 16 | //只有开启了checkpoint 才会有重启策略 17 | env.enableCheckpointing(8000); 18 | 19 | //hdfs://localhost:9000/np-backend 20 | 21 | //设置重启策略为重启2次,间隔2秒 22 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2,2)); 23 | 24 | //设置StateBackend策略为本地文件系统 25 | // env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/np-stateBackend")); 26 | 27 | env.setStateBackend(new FsStateBackend("hdfs://localhost:9000/np-backend1")); 28 | 29 | //设置cancelJob或者异常退出Job以后不删除checkpoint数据 30 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 31 | 32 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 33 | 34 | SingleOutputStreamOperator> wordOne = lines.map(new MapFunction>() { 35 | 36 | @Override 37 | public Tuple2 map(String value) throws Exception { 38 | if (value.startsWith("jeyy")) { 39 | throw new RuntimeException("jeyy来了,程序出错了!!!"); 40 | } 41 | return Tuple2.of(value, 1); 42 | } 43 | }); 44 | 45 | 46 | SingleOutputStreamOperator> summed = wordOne.keyBy(0).sum(1); 47 | 48 | summed.print(); 49 | 50 | env.execute("StateBackend2"); 51 | 52 | 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/weiboAPP/hbase/AddOne.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.weiboAPP.hbase; 2 | 3 | import cn.northpark.flink.util.PhoenixUtilV2; 4 | import cn.northpark.flink.weiboAPP.hbase.bean.WeiboRelations; 5 | import cn.northpark.flink.weiboAPP.hbase.enums.RelType; 6 | 7 | import java.util.UUID; 8 | 9 | /** 10 | * @author bruce 11 | * @date 2022年06月26日 10:03:33 12 | */ 13 | public class AddOne { 14 | public static void main(String[] args) { 15 | 16 | //2.增加1条关系 17 | // String uid = UUID.randomUUID().toString().replace("-",""); 18 | // String rel_uid = UUID.randomUUID().toString().replace("-",""); 19 | // WeiboRelations bean_by = new WeiboRelations(uid,RelType.REPLY,rel_uid,1); 20 | // addOne(bean_by); 21 | 22 | String uid = UUID.randomUUID().toString().replace("-",""); 23 | //添加1对多的转发关系 24 | // A -B1 25 | // -B2 26 | // -B3 27 | for (int i = 0; i < 20; i++) { 28 | 29 | String rel_uid_ = UUID.randomUUID().toString().replace("-",""); 30 | WeiboRelations bean_by_ = new WeiboRelations(uid, RelType.TRANS_LINK,rel_uid_,1); 31 | addOne(bean_by_); 32 | } 33 | } 34 | 35 | /** 36 | * 添加一条转发/评论关系 37 | * @param bean_by 38 | */ 39 | private static void addOne(WeiboRelations bean_by) { 40 | //根据被转发关系构造一条主动转发关系 41 | WeiboRelations bean_self = new WeiboRelations(bean_by.getRel_user_id(), bean_by.getRel_type(), bean_by.getUser_id(),bean_by.getBy_type()==1?0:1); 42 | 43 | //分别插入2条数据 44 | 45 | String insert_rel_sql = "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " + 46 | " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_by.getUser_id()+"' ,'"+ bean_by.getRel_type()+"','"+ bean_by.getRel_user_id()+"',"+ bean_by.getBy_type()+" )"; 47 | 48 | 49 | String insert_rel_sql2 = "UPSERT INTO \"stt\".t_weibo_relations_v2 (ID,USER_ID,REL_TYPE,REL_USER_ID,BY_TYPE) " + 50 | " VALUES ( '" + UUID.randomUUID().toString()+"' ,'"+ bean_self.getUser_id()+"' ,'"+bean_self.getRel_type()+"','"+bean_self.getRel_user_id()+"',"+bean_self.getBy_type()+" )"; 51 | 52 | 53 | PhoenixUtilV2.insertData(insert_rel_sql); 54 | PhoenixUtilV2.insertData(insert_rel_sql2); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/StreamSqlWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 12 | import org.apache.flink.types.Row; 13 | import org.apache.flink.util.Collector; 14 | 15 | import java.util.Arrays; 16 | 17 | public class StreamSqlWordCount { 18 | public static void main(String[] args) throws Exception { 19 | 20 | //实时dataStream api 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | //实时Table执行上下文 24 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 25 | 26 | //word count java scala 27 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 28 | 29 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 30 | @Override 31 | public void flatMap(String value, Collector out) throws Exception { 32 | Arrays.stream(value.split(" ")).forEach(out::collect); 33 | } 34 | }); 35 | 36 | //注册程表 37 | tableEnv.registerDataStream("t_word_count",words,"word"); 38 | 39 | //写sql 40 | Table table = tableEnv.sqlQuery("select word,count(1) counts from t_word_count group by word"); 41 | 42 | DataStream> tuple2DataStream = tableEnv.toRetractStream(table, Row.class); 43 | 44 | // tuple2DataStream.print(); 45 | 46 | 47 | tuple2DataStream.filter(new FilterFunction>() { 48 | @Override 49 | public boolean filter(Tuple2 value) throws Exception { 50 | 51 | return value.f0; 52 | } 53 | }).print(); 54 | env.execute("StreamSqlWordCount"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # np-flink 2 | # flink详细学习实践 3 | 4 | 5 | ## 在 MacOS 上安装 Kafka 6 | 记录一下在 Mac 上安装和测试 kafka 的步骤。 7 | 8 | MacOS 上可以方便的使用 brew 进行安装。 9 | 10 | 安装 11 | 12 | 如果还没有安装Java, 可以先安装Java: 13 | `brew cask install java` 14 | 15 | 16 | 然后安装zookeeper和kafka。 17 | 18 | `brew install kafka` 19 | 20 | `brew install zookeeper` 21 | 22 | 修改 **/usr/local/etc/kafka/server.properties**, 找到 **listeners=PLAINTEXT://:9092** 那一行,把注释取消掉。 23 | 然后修改为: 24 | 25 | `############################# Socket Server Settings ############################# 26 | `# The address the socket server listens on. It will get the value returned from 27 | `# java.net.InetAddress.getCanonicalHostName() if not configured. 28 | `# FORMAT: 29 | `# listeners = listener_name://host_name:port 30 | `# EXAMPLE: 31 | `# listeners = PLAINTEXT://your.host.name:9092 32 | `listeners=PLAINTEXT://localhost:9092` 33 | 34 | 启动 35 | 36 | 如果想以服务的方式启动,那么可以: 37 | 38 | `$ brew services start zookeeper` 39 | 40 | `$ brew services start kafka` 41 | 42 | 如果只是临时启动,可以: 43 | `$ zkServer start` 44 | 45 | `$ kafka-server-start /usr/local/etc/kafka/server.properties` 46 | 47 | 创建Topic 48 | 49 | `$ kafka-topics --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic flink000` 50 | 51 | 查看所有topic 52 | 53 | ` 54 | kafka-topics --list --zookeeper localhost:2181 55 | ` 56 | 57 | 产生消息 58 | 59 | `$ kafka-console-producer --broker-list localhost:9092 --topic flink000` 60 | 61 | `>HELLO Kafka` 62 | 63 | 消费 64 | 65 | 简单方式: 66 | 67 | `$ kafka-console-consumer --bootstrap-server localhost:9092 --topic flink000 --from-beginning 68 | ` 69 | 70 | 如果使用消费组: 71 | 72 | `kafka-console-consumer --bootstrap-server localhost:9092 --topic flink000 --group test-consumer1 --from-beginning 73 | ` 74 | 75 | **_Producer_**:消息生产者。 76 | 77 | **_Broker_**:kafka集群中的服务器。 78 | 79 | **_Topic_**:消息的主题,可以理解为消息的分类,kafka的数据就保存在topic。在每个broker上都可以创建多个topic。 80 | 81 | **_Partition_**:Topic的分区,每个topic可以有多个分区,分区的作用是做负载,提高kafka的吞吐量。 82 | 83 | **_Replication_**:每一个分区都有多个副本,副本的作用是做备胎。当主分区(Leader)故障的时候会选择一个备胎(Follower)上位,成为Leader。在kafka中默认副本的最大数量是10个,且副本的数量不能大于Broker的数量,follower和leader绝对是在不同的机器,同一机器对同一个分区也只可能存放一个副本(包括自己)。 84 | 85 | **_Consumer_**:消息消费者。 86 | 87 | **_Consumer Group_**:我们可以将多个消费组组成一个消费者组,在kafka的设计中同一个分区的数据只能被消费者组中的某一个消费者消费。同一个消费者组的消费者可以消费同一个topic的不同分区的数据,这也是为了提高kafka的吞吐量! 88 | 89 | **_Zookeeper_**:kafka集群依赖zookeeper来保存集群的的元信息,来保证系统的可用性。 90 | 91 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/StreamTableWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 12 | import org.apache.flink.types.Row; 13 | import org.apache.flink.util.Collector; 14 | 15 | import java.util.Arrays; 16 | 17 | public class StreamTableWordCount { 18 | public static void main(String[] args) throws Exception { 19 | 20 | //实时dataStream api 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | //实时Table执行上下文 24 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 25 | 26 | //word count java scala 27 | DataStreamSource lines = env.socketTextStream("localhost", 4000); 28 | 29 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 30 | @Override 31 | public void flatMap(String value, Collector out) throws Exception { 32 | Arrays.stream(value.split(" ")).forEach(out::collect); 33 | } 34 | }); 35 | 36 | //将dataStream注册成表 37 | Table table = tableEnv.fromDataStream(words, "word"); 38 | 39 | System.out.printf("schema---", table.getSchema()); 40 | 41 | Table table2 = table.groupBy("word").select("word, count(1) as counts"); 42 | 43 | DataStream> tuple2DataStream = tableEnv.toRetractStream(table2, Row.class); 44 | 45 | // tuple2DataStream.print(); 46 | 47 | 48 | tuple2DataStream.filter(new FilterFunction>() { 49 | @Override 50 | public boolean filter(Tuple2 value) throws Exception { 51 | 52 | return value.f0; 53 | } 54 | }).print(); 55 | 56 | env.execute("StreamTableWordCount"); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/spark/scoreApp/sinkScore.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.spark.scoreApp; 2 | 3 | 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.Arrays; 8 | import java.util.List; 9 | import java.util.Objects; 10 | 11 | /** 12 | * @author bruce 13 | * @date 2022年06月15日 15:19:41 14 | * 编写Java程序,利用IO流向d:\\hadoop\score.txt写入5个同位3科成绩 15 | */ 16 | public class sinkScore { 17 | private static final String sinkDir = "c:///Users/Bruce/Desktop/5/score.txt"; 18 | 19 | 20 | public static void main(String[] args) { 21 | List list = Arrays.asList( 22 | String.join(",", "1", "马克", "3403", "家园的治理:环境科学概论", "92", "2022年6月15日"), 23 | String.join(",", "1", "马克", "B0021001", "军事理论", "88", "2022年6月15日"), 24 | String.join(",", "1", "马克", "3509", "创业创新领导力", "76", "2022年6月14日"), 25 | String.join(",", "2", "刘晓莉", "3403", "家园的治理:环境科学概论", "89", "2022年6月15日"), 26 | String.join(",", "2", "刘晓莉", "B0021001", "军事理论", "82", "2022年6月15日"), 27 | String.join(",", "2", "刘晓莉", "3509", "创业创新领导力", "93", "2022年6月14日"), 28 | String.join(",", "3", "王博罗", "3403", "家园的治理:环境科学概论", "66", "2022年6月15日"), 29 | String.join(",", "3", "王博罗", "B0021001", "军事理论", "99", "2022年6月15日"), 30 | String.join(",", "3", "王博罗", "3509", "创业创新领导力", "95", "2022年6月14日") 31 | 32 | ); 33 | FileWriter writer = null; 34 | try { 35 | 36 | File file = new File(sinkDir); 37 | if(!file.getParentFile().exists()){ 38 | boolean result = file.getParentFile().mkdirs(); 39 | if(!result){ 40 | throw new RuntimeException("创建文件路径失败"); 41 | } 42 | } 43 | writer = new FileWriter(file); 44 | 45 | for (String str : list) { 46 | writer.write(str); 47 | writer.write("\n"); 48 | } 49 | }catch (Exception e) { 50 | e.printStackTrace(); 51 | }finally { 52 | if(Objects.nonNull(writer)){ 53 | try { 54 | writer.flush(); 55 | writer.close(); 56 | } catch (IOException e) { 57 | e.printStackTrace(); 58 | } 59 | 60 | } 61 | } 62 | 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/spark/scoreApp/sinkScoreAppended.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.spark.scoreApp; 2 | 3 | 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.Arrays; 8 | import java.util.List; 9 | import java.util.Objects; 10 | 11 | /** 12 | * @author bruce 13 | * @date 2022年06月15日 15:19:41 14 | * 编写Java程序,利用IO流向d:\\hadoop\score.txt写入5个同位3科成绩 15 | * 追加模式写入 16 | */ 17 | public class sinkScoreAppended { 18 | private static final String sinkDir = "c:///Users/Bruce/Desktop/5/score.txt"; 19 | 20 | 21 | public static void main(String[] args) { 22 | List list = Arrays.asList( 23 | String.join(",", "4", "马云", "3403", "家园的治理:环境科学概论", "92", "2022年6月15日"), 24 | String.join(",", "4", "马云", "B0021001", "军事理论", "88", "2022年6月15日"), 25 | String.join(",", "4", "马云", "3509", "创业创新领导力", "76", "2022年6月14日"), 26 | String.join(",", "5", "赵散散", "3403", "家园的治理:环境科学概论", "89", "2022年6月15日"), 27 | String.join(",", "5", "赵散散", "B0021001", "军事理论", "82", "2022年6月15日"), 28 | String.join(",", "5", "赵散散", "3509", "创业创新领导力", "93", "2022年6月14日"), 29 | String.join(",", "6", "李科及", "3403", "家园的治理:环境科学概论", "66", "2022年6月15日"), 30 | String.join(",", "6", "李科及", "B0021001", "军事理论", "97", "2022年6月15日"), 31 | String.join(",", "6", "李科及", "3509", "创业创新领导力", "92", "2022年6月14日") 32 | 33 | ); 34 | FileWriter writer = null; 35 | try { 36 | 37 | File file = new File(sinkDir); 38 | if(!file.getParentFile().exists()){ 39 | boolean result = file.getParentFile().mkdirs(); 40 | if(!result){ 41 | throw new RuntimeException("创建文件路径失败"); 42 | } 43 | } 44 | //第二个参数为true则追加 45 | writer = new FileWriter(file,true); 46 | 47 | for (String str : list) { 48 | writer.write(str); 49 | writer.write("\n"); 50 | } 51 | }catch (Exception e) { 52 | e.printStackTrace(); 53 | }finally { 54 | if(Objects.nonNull(writer)){ 55 | try { 56 | writer.flush(); 57 | writer.close(); 58 | } catch (IOException e) { 59 | e.printStackTrace(); 60 | } 61 | 62 | } 63 | } 64 | 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StateBackend1.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | /** 12 | * @author zhangyang 13 | * 重启策略 14 | */ 15 | public class StateBackend1 { 16 | 17 | public static void main(String[] args) throws Exception { 18 | // step1 :获取执行环境 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | //只有开启了checkpoint 才会有重启策略 默认是不重启 22 | env.enableCheckpointing(5000);//每隔5s进行一次checkpoint 23 | //默认的重启策略是无限重启 Integer.MAX_VALUE 次 24 | 25 | //重启重试次数 26 | env.setRestartStrategy(org.apache.flink.api.common.restartstrategy.RestartStrategies.fixedDelayRestart(3,2000)); 27 | 28 | //设置状态存储的后端,一般写在flink的配置文件中 29 | // env.setStateBackend(new FsStateBackend("file:///Users/bruce/Documents/workspace/np-flink/np-backend")); 30 | env.setStateBackend(new FsStateBackend("hdfs://localhost:9000/np-backend")); 31 | 32 | 33 | //程序异常退出或者人为cancel以后,不删除checkpoint数据 34 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 35 | 36 | // step2:读取数据 37 | DataStreamSource text = env.socketTextStream("localhost", 4000); 38 | 39 | 40 | //把单词和1拼一块 41 | SingleOutputStreamOperator> wordAndOne = text.map(new MapFunction>() { 42 | @Override 43 | public Tuple2 map(String value) throws Exception { 44 | if(value.startsWith("jeyy")){ 45 | throw new RuntimeException("jeyy来了,发生异常!!"); 46 | } 47 | return Tuple2.of(value, 1); 48 | } 49 | }); 50 | 51 | //分组、累加 52 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 53 | 54 | 55 | //sink 56 | sumed.print(); 57 | 58 | //execute 59 | env.execute("StateBackend1"); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/monitorWarning/RTCarAnaly.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.monitorWarning 2 | 3 | import java.util.Properties 4 | 5 | import cn.northpark.flink.scala.transformApp.util._ 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 8 | import org.apache.flink.streaming.api.scala.function.WindowFunction 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer 12 | import org.apache.flink.util.Collector 13 | import org.apache.kafka.common.serialization.StringDeserializer 14 | 15 | /** 16 | * 实时车辆区域分布统计 17 | */ 18 | object RTCarAnaly { 19 | def main(args: Array[String]): Unit = { 20 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 21 | import org.apache.flink.streaming.api.scala._ 22 | 23 | //kafka 配置 24 | 25 | val props = new Properties() 26 | props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092") 27 | props.setProperty("key.deserializer",classOf[StringDeserializer].getName) 28 | props.setProperty("value.deserializer",classOf[StringDeserializer].getName) 29 | props.setProperty("group.id","group112601xx") 30 | 31 | val ds: DataStream[String] = env.addSource(new FlinkKafkaConsumer[String]("monitortopic1125",new SimpleStringSchema(),props).setStartFromEarliest()) 32 | 33 | val carDS: DataStream[MonitorCarInfo] = ds.map(line => { 34 | val arr: Array[String] = line.split("\t") 35 | MonitorCarInfo(arr(0), arr(1), arr(2), arr(3), arr(4).toLong, arr(5), arr(6).toDouble) 36 | }) 37 | 38 | //每个一分钟统计每个区域中的车辆总数 39 | carDS.keyBy(_.areaId) 40 | .timeWindow(Time.minutes(1)) 41 | .apply(new WindowFunction[MonitorCarInfo,String,String,TimeWindow] { 42 | //key : 当前区域,window:当前窗口对象,input : 当前窗口内的数据,out : 回收数据对象 43 | override def apply(key: String, window: TimeWindow, input: Iterable[MonitorCarInfo], out: Collector[String]): Unit = { 44 | val carSet = scala.collection.mutable.Set[String]() 45 | 46 | val iter: Iterator[MonitorCarInfo] = input.iterator 47 | while(iter.hasNext){ 48 | val mci: MonitorCarInfo = iter.next() 49 | carSet.add(mci.car) 50 | } 51 | out.collect(s"窗口起始时间:${window.getStart} - ${window.getEnd},当前区域:${key} ,车辆总数为:${carSet.size}") 52 | } 53 | }).print() 54 | 55 | env.execute() 56 | 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCountChain.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @author zhangyang 14 | * 按照步骤来一步步拆分Task是如何划分的 15 | * wc统计的数据我们源自于socket 16 | */ 17 | public class StreamingWordCountChain { 18 | 19 | public static void main(String[] args) throws Exception { 20 | // step1 :获取执行环境 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | // step2:读取数据 24 | DataStreamSource text = env.socketTextStream("localhost", 4000); 25 | 26 | 27 | // 拆词 28 | SingleOutputStreamOperator words = text.flatMap(new FlatMapFunction() { 29 | @Override 30 | public void flatMap(String value, Collector out) throws Exception { 31 | String[] words = value.split(" "); 32 | for (String word : words) { 33 | out.collect(word); 34 | } 35 | } 36 | }); 37 | 38 | //过滤 39 | SingleOutputStreamOperator filtered = words.filter(new FilterFunction() { 40 | @Override 41 | public boolean filter(String value) throws Exception { 42 | return value.startsWith("a"); 43 | } 44 | }).disableChaining();//将这个算子单独划分,生成一个Task,和其他的算子不再有operator chain 45 | //.startNewChain();//将这个开始划分,生成一个新的Task 46 | 47 | //把单词和1拼一块 48 | SingleOutputStreamOperator> wordAndOne = filtered.map(new MapFunction>() { 49 | @Override 50 | public Tuple2 map(String value) throws Exception { 51 | return Tuple2.of(value, 1); 52 | } 53 | }); 54 | 55 | //分组、累加 56 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 57 | 58 | 59 | //sink 60 | sumed.print().setParallelism(2); 61 | 62 | //execute 63 | env.execute("StreamingWordCount"); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/StreamingWordCountSharingGroup.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @author zhangyang 14 | * Flink资源槽,默认的名字 都是default 15 | */ 16 | public class StreamingWordCountSharingGroup { 17 | 18 | public static void main(String[] args) throws Exception { 19 | // step1 :获取执行环境 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | 22 | // step2:读取数据 23 | DataStreamSource text = env.socketTextStream("localhost", 4000); 24 | 25 | 26 | // 拆词 27 | SingleOutputStreamOperator words = text.flatMap(new FlatMapFunction() { 28 | @Override 29 | public void flatMap(String value, Collector out) throws Exception { 30 | String[] words = value.split(" "); 31 | for (String word : words) { 32 | out.collect(word); 33 | } 34 | } 35 | }).slotSharingGroup("ddd"); 36 | 37 | //过滤 38 | SingleOutputStreamOperator filtered = words.filter(new FilterFunction() { 39 | @Override 40 | public boolean filter(String value) throws Exception { 41 | return value.startsWith("a"); 42 | } 43 | });//.disableChaining();//将这个算子单独划分,生成一个Task,和其他的算子不再有operator chain 44 | //.startNewChain();//将这个开始划分,生成一个新的Task 45 | 46 | //把单词和1拼一块 47 | SingleOutputStreamOperator> wordAndOne = filtered.map(new MapFunction>() { 48 | @Override 49 | public Tuple2 map(String value) throws Exception { 50 | return Tuple2.of(value, 1); 51 | } 52 | }); 53 | 54 | //分组、累加 55 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 56 | 57 | 58 | //sink 59 | sumed.print(); 60 | 61 | //execute 62 | env.execute("StreamingWordCountSharingGroup"); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/exactly/overrideway/FlinkKafkaToRedis.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.exactly.overrideway; 2 | 3 | import cn.northpark.flink.util.FlinkUtils; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.api.java.tuple.Tuple3; 9 | import org.apache.flink.api.java.utils.ParameterTool; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.util.Collector; 13 | 14 | /*** 15 | * Flink从kafka读取数据写入redis 并且实现exactly once 16 | */ 17 | public class FlinkKafkaToRedis { 18 | 19 | public static void main(String[] args) throws Exception{ 20 | 21 | // ParameterTool parameters = ParameterTool.fromArgs(args); 22 | 23 | ParameterTool parameters = ParameterTool.fromPropertiesFile("/Users/bruce/Documents/workspace/np-flink/src/main/resources/config.properties"); 24 | 25 | DataStream kafkaStream = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 26 | 27 | 28 | // 拆词 29 | SingleOutputStreamOperator words = kafkaStream.flatMap(new FlatMapFunction() { 30 | @Override 31 | public void flatMap(String value, Collector out) throws Exception { 32 | String[] words = value.split(" "); 33 | for (String word : words) { 34 | out.collect(word); 35 | } 36 | } 37 | }); 38 | 39 | //把单词和1拼一块 40 | SingleOutputStreamOperator> wordAndOne = words.map(new MapFunction>() { 41 | @Override 42 | public Tuple2 map(String value) throws Exception { 43 | return Tuple2.of(value, 1); 44 | } 45 | }); 46 | 47 | //分组、累加 48 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 49 | 50 | sumed.map(new MapFunction, Tuple3>() { 51 | @Override 52 | public Tuple3 map(Tuple2 value) throws Exception { 53 | return Tuple3.of("NP-wordcount-sink-redis",value.f0,value.f1.toString() ); 54 | } 55 | }).addSink(new MyRedisSink()); 56 | 57 | FlinkUtils.getEnv().execute("FlinkKafkaToRedis"); 58 | 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/ConvertCSV2KafkaBean.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api; 2 | 3 | import cn.northpark.flink.bean.Covid; 4 | import cn.northpark.flink.util.KafkaString; 5 | import com.alibaba.fastjson.JSON; 6 | 7 | import java.io.BufferedReader; 8 | import java.io.FileNotFoundException; 9 | import java.io.FileReader; 10 | import java.io.IOException; 11 | 12 | /** 13 | * @author bruce 14 | * @date 2022年05月08日 22:52:34 15 | */ 16 | public class ConvertCSV2KafkaBean { 17 | public static void main(String[] args) { 18 | String csvFile = "C:\\Users\\Bruce\\Desktop\\today_province_2022_04_15.csv"; 19 | BufferedReader br = null; 20 | String line = ""; 21 | String cvsSplitBy = ","; 22 | 23 | try { 24 | br = new BufferedReader(new FileReader(csvFile)); 25 | br.readLine(); // 提前读一下就跳过了 26 | while ((line = br.readLine()) != null) { 27 | String[] split = line.split(","); 28 | String var0 = split[0]; 29 | String var1 = split[1]; 30 | String var2 = split[2]; 31 | String var3 = split[3] ; 32 | String var4 = split[4] ; 33 | String var5 = split[5] ; 34 | String var6 = split[6] ; 35 | String var7 = split[7] ; 36 | String var8 = split[8] ; 37 | String var9 = split[9] ; 38 | String var10 = split[10]; 39 | String var11 = split[11]; 40 | String var12 = split[12]; 41 | String var13 = split[13]; 42 | String var14 = split[14]; 43 | String var15 = split[15]; 44 | String var16 = split[16]; 45 | String var17 = split[17]; 46 | 47 | Covid bean = new Covid(var0,var1,var2,var3,var4,var5, 48 | var6,var7,var8,var9,var10 49 | ,var11,var12,var13,var14,var15,var16,var17); 50 | 51 | System.err.println(bean.toString()); 52 | 53 | 54 | 55 | KafkaString.sendKafkaString(KafkaString.buildBasicKafkaProperty(),"covid19", JSON.toJSONString(bean)); 56 | } 57 | 58 | } catch (FileNotFoundException e) { 59 | e.printStackTrace(); 60 | } catch (IOException e) { 61 | e.printStackTrace(); 62 | } finally { 63 | if (br != null) { 64 | try { 65 | br.close(); 66 | } catch (IOException e) { 67 | e.printStackTrace(); 68 | } 69 | } 70 | } 71 | 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/hadoop/MR/CarBean.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.hadoop.MR; 2 | 3 | 4 | import org.apache.commons.lang3.StringUtils; 5 | import org.apache.hadoop.io.Writable; 6 | 7 | import java.io.DataInput; 8 | import java.io.DataOutput; 9 | import java.io.IOException; 10 | 11 | /** 12 | * @author bruce 13 | * @date 2022年04月19日 21:29:12 14 | */ 15 | public class CarBean implements Writable { 16 | 17 | private String date; 18 | private String upSpeed; 19 | private String speed; 20 | private String no; 21 | 22 | 23 | public String getDate() { 24 | return date; 25 | } 26 | 27 | public void setDate(String date) { 28 | this.date = date; 29 | } 30 | 31 | public String getUpSpeed() { 32 | return upSpeed; 33 | } 34 | 35 | public void setUpSpeed(String upSpeed) { 36 | this.upSpeed = upSpeed; 37 | } 38 | 39 | public String getSpeed() { 40 | return speed; 41 | } 42 | 43 | public void setSpeed(String speed) { 44 | this.speed = speed; 45 | } 46 | 47 | public String getNo() { 48 | return no; 49 | } 50 | 51 | public void setNo(String no) { 52 | this.no = no; 53 | } 54 | 55 | @Override 56 | public String toString() { 57 | StringBuilder sb = new StringBuilder(); 58 | if (StringUtils.isNotEmpty(no)) { 59 | sb.append(no).append(" "); 60 | } 61 | 62 | if (StringUtils.isNotEmpty(date)) { 63 | sb.append(date).append(" "); 64 | } 65 | if (StringUtils.isNotEmpty(upSpeed)) { 66 | sb.append(upSpeed).append(" "); 67 | } 68 | if (StringUtils.isNotEmpty(speed)) { 69 | sb.append(speed).append(" "); 70 | } 71 | 72 | return sb.toString(); 73 | } 74 | @Override 75 | public void write(DataOutput dataOutput) throws IOException { 76 | dataOutput.writeUTF(date ); 77 | dataOutput.writeUTF(upSpeed ); 78 | dataOutput.writeUTF(speed ); 79 | dataOutput.writeUTF(no ); 80 | } 81 | 82 | @Override 83 | public void readFields(DataInput dataInput) throws IOException { 84 | this.date =dataInput.readUTF(); 85 | this.upSpeed =dataInput.readUTF(); 86 | this.speed =dataInput.readUTF(); 87 | this.no =dataInput.readUTF(); 88 | } 89 | 90 | public static void main(String[] args) { 91 | CarBean bean = new CarBean(); 92 | bean.setDate("2022-4-20"); 93 | bean.setSpeed("222"); 94 | bean.setUpSpeed("0.618"); 95 | bean.setNo("6188"); 96 | System.err.println(bean.toString()); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/table_sql_api/stream/sql/IpLocation.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.table_sql_api.stream.sql; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.apache.flink.api.java.tuple.Tuple4; 5 | import org.apache.flink.table.functions.FunctionContext; 6 | import org.apache.flink.table.functions.ScalarFunction; 7 | import org.apache.flink.types.Row; 8 | 9 | import java.io.BufferedReader; 10 | import java.io.File; 11 | import java.io.FileInputStream; 12 | import java.io.InputStreamReader; 13 | import java.util.List; 14 | 15 | public class IpLocation extends ScalarFunction { 16 | private List> lines = Lists.newArrayList(); 17 | 18 | @Override 19 | public void open(FunctionContext context) throws Exception { 20 | File cachedFile = context.getCachedFile("ip-rules"); 21 | 22 | BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(cachedFile))); 23 | 24 | String line = null; 25 | 26 | while ((line = bufferedReader.readLine()) != null) { 27 | String[] fields = line.split("[|]"); 28 | Long startNum = Long.parseLong(fields[2]); 29 | Long endNum = Long.parseLong(fields[3]); 30 | String province = fields[6]; 31 | String city = fields[7]; 32 | lines.add(Tuple4.of(startNum, endNum, province, city)); 33 | } 34 | 35 | } 36 | 37 | //必须 38 | public Row eval(String ip) { 39 | Long ipNum = ip2Long(ip); 40 | return binarySearch(ipNum); 41 | } 42 | 43 | public static Long ip2Long(String dottedIP) { 44 | String[] addrArray = dottedIP.split("\\."); 45 | long num = 0; 46 | for (int i=0;i=lines.get(middle).f0 && ipNum<=lines.get(middle).f1){ 63 | index = middle; 64 | } 65 | if(ipNum tp4 = lines.get(index); 73 | result = Row.of(tp4.f2,tp4.f3); 74 | } 75 | return result; 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/starrocks/bean/EventMsg.java: -------------------------------------------------------------------------------- 1 | 2 | package cn.northpark.flink.starrocks.bean; 3 | 4 | import com.google.gson.annotations.Expose; 5 | import com.google.gson.annotations.SerializedName; 6 | 7 | import javax.annotation.Generated; 8 | 9 | @Generated("net.hexar.json2pojo") 10 | @SuppressWarnings("unused") 11 | public class EventMsg { 12 | 13 | @SerializedName("_flush_time") 14 | private Long _flushTime; 15 | @SerializedName("_track_id") 16 | private Long _trackId; 17 | @SerializedName("anonymous_id") 18 | private String anonymousId; 19 | @SerializedName("distinct_id") 20 | private String distinctId; 21 | @Expose 22 | private String event; 23 | @Expose 24 | private Identities identities; 25 | @Expose 26 | private Lib lib; 27 | @Expose 28 | private Properties properties; 29 | @Expose 30 | private Long time; 31 | @Expose 32 | private String type; 33 | 34 | public Long get_flushTime() { 35 | return _flushTime; 36 | } 37 | 38 | public void set_flushTime(Long _flushTime) { 39 | this._flushTime = _flushTime; 40 | } 41 | 42 | public Long get_trackId() { 43 | return _trackId; 44 | } 45 | 46 | public void set_trackId(Long _trackId) { 47 | this._trackId = _trackId; 48 | } 49 | 50 | public String getAnonymousId() { 51 | return anonymousId; 52 | } 53 | 54 | public void setAnonymousId(String anonymousId) { 55 | this.anonymousId = anonymousId; 56 | } 57 | 58 | public String getDistinctId() { 59 | return distinctId; 60 | } 61 | 62 | public void setDistinctId(String distinctId) { 63 | this.distinctId = distinctId; 64 | } 65 | 66 | public String getEvent() { 67 | return event; 68 | } 69 | 70 | public void setEvent(String event) { 71 | this.event = event; 72 | } 73 | 74 | public Identities getIdentities() { 75 | return identities; 76 | } 77 | 78 | public void setIdentities(Identities identities) { 79 | this.identities = identities; 80 | } 81 | 82 | public Lib getLib() { 83 | return lib; 84 | } 85 | 86 | public void setLib(Lib lib) { 87 | this.lib = lib; 88 | } 89 | 90 | public Properties getProperties() { 91 | return properties; 92 | } 93 | 94 | public void setProperties(Properties properties) { 95 | this.properties = properties; 96 | } 97 | 98 | public Long getTime() { 99 | return time; 100 | } 101 | 102 | public void setTime(Long time) { 103 | this.time = time; 104 | } 105 | 106 | public String getType() { 107 | return type; 108 | } 109 | 110 | public void setType(String type) { 111 | this.type = type; 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/EventTimeSessionWindow.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 12 | import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows; 13 | import org.apache.flink.streaming.api.windowing.time.Time; 14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 15 | 16 | /** 17 | * Session窗口 :以2条数据的时间差来划分窗口,时间差>n,则触发窗口 18 | * @author bruce 19 | */ 20 | public class EventTimeSessionWindow { 21 | public static void main(String[] args) throws Exception { 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | //设置使用EventTime作为时间标准 25 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 26 | 27 | SingleOutputStreamOperator source = env.socketTextStream("localhost", 4000) 28 | 29 | //提取时间字段 30 | .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(0)) { 31 | @Override 32 | public long extractTimestamp(String line) { 33 | String[] fields = line.split(" "); 34 | return Long.parseLong(fields[0]); 35 | } 36 | }); 37 | 38 | 39 | //1000 spark 1 40 | //1999 spark 2 41 | //4999 java 3 42 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 43 | @Override 44 | public Tuple2 map(String value) throws Exception { 45 | String[] lines = value.split(" "); 46 | return Tuple2.of(lines[1],Integer.parseInt(lines[2])); 47 | } 48 | }); 49 | 50 | //先分组 51 | KeyedStream, Tuple> keyed = map.keyBy(0); 52 | 53 | //按照分组后分窗口 54 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 55 | WindowedStream, Tuple, TimeWindow> window = keyed.window(EventTimeSessionWindows.withGap( Time.seconds(5))); 56 | 57 | 58 | SingleOutputStreamOperator> summed = window.sum(1); 59 | 60 | summed.print(); 61 | 62 | env.execute("EventTimeSessionWindow"); 63 | 64 | 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/MakeData.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.io.PrintWriter 4 | import java.text.SimpleDateFormat 5 | import java.util.Properties 6 | 7 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator} 8 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} 9 | import org.apache.kafka.common.serialization.StringSerializer 10 | 11 | import scala.util.Random 12 | 13 | /** 14 | * 模拟生成数据 15 | * 1.将模拟的数据生成到文件中 16 | * 2.将模拟的数据生成到Kafka中 17 | * 区域id,道路id,卡扣id,摄像头id,拍摄时间,车辆信息,车辆速度 18 | */ 19 | object MakeData { 20 | def main(args: Array[String]): Unit = { 21 | //创建写入数据的文件 22 | val pw = new PrintWriter("C:\\Users\\Bruce\\Desktop\\3\\trafficdata") 23 | //创建kafka配置 24 | val props = new Properties() 25 | props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092") 26 | props.setProperty("key.serializer",classOf[StringSerializer].getName) 27 | props.setProperty("value.serializer",classOf[StringSerializer].getName) 28 | props.setProperty("auto.offset.reset","latest") 29 | //创建Kafka Producer 30 | val producer = new KafkaProducer[String,String](props) 31 | 32 | //模拟3000辆车 京Axxxxx 33 | val locations = Array[String]("京","津","冀","京","鲁","京","京","京","京","京") 34 | val random = new Random() 35 | val generator = new GaussianRandomGenerator(new JDKRandomGenerator()) 36 | 37 | for(i <- 1 to 30000){ 38 | //模拟车辆 39 | val car = locations(random.nextInt(10))+(65+random.nextInt(26)).toChar+random.nextInt(99999).formatted("%05d") 40 | //模拟每辆车通过的卡扣数 ,一辆车每天通过卡扣数可能是大部分都不超过100个卡扣 41 | val throuldMonitorCount = (generator.nextNormalizedDouble() * 100).abs.toInt 42 | for(j <- 0 until throuldMonitorCount){ 43 | //通过的区域 44 | val areaId = random.nextInt(8).formatted("%02d") 45 | //通过的道路 46 | val roadId = random.nextInt(50).formatted("%02d") 47 | //通过的卡扣 48 | val monitorId = random.nextInt(9999).formatted("%04d") 49 | //通过的摄像头 50 | val cameraId = random.nextInt(99999).formatted("%05d") 51 | //摄像头拍摄时间,转换成时间戳 52 | val yyyyMMddHHmmss =DateUtils.getCurrentDate() + " "+DateUtils.getRandomHour()+":"+DateUtils.getRandomMinutesOrSeconds+":"+DateUtils.getRandomMinutesOrSeconds 53 | val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") 54 | val actionTime = format.parse(yyyyMMddHHmmss).getTime 55 | 56 | //拍摄车辆速度 ,大部分车辆速度位于60左右 57 | val speed :Double = (generator.nextNormalizedDouble()*60).abs.formatted("%.2f").toDouble 58 | 59 | val info = s"${areaId}\t${roadId}\t${monitorId}\t${cameraId}\t${actionTime}\t${car}\t${speed}" 60 | println(info) 61 | //向文件中写入 62 | pw.println(info) 63 | //向kafka中写入 64 | producer.send( new ProducerRecord[String,String]("flink_traffic5",info)) 65 | } 66 | } 67 | pw.close() 68 | producer.close() 69 | 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/cn/northpark/spark/scoreStt.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.spark 2 | 3 | import java.util.Properties 4 | 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.rdd.RDD 7 | import org.apache.spark.sql.types.{DataTypes, StructField, StructType} 8 | import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession} 9 | 10 | /** 11 | * @author bruce 12 | * @date 2022年06月15日 16:18:43 13 | */ 14 | object scoreStt { 15 | 16 | def main(args: Array[String]): Unit = { 17 | 18 | 19 | val sparkSession: SparkSession = SparkSession.builder() 20 | .appName("scoreStt") 21 | .master("local") 22 | .getOrCreate() 23 | 24 | val sparkContext: SparkContext = sparkSession.sparkContext 25 | 26 | var path = "C:\\Users\\Bruce\\Desktop\\5\\score.txt" 27 | 28 | val rdd1: RDD[String] = sparkContext.textFile(path) 29 | 30 | // map处理返回bean 31 | val mapRDD: RDD[Row] = rdd1.map(line => { 32 | val strings: Array[String] = line.split(",") 33 | 34 | var row: Row = null 35 | 36 | try { 37 | val stuID: String = strings(0) 38 | val stuName: String = strings(1) 39 | val clzID: String = strings(2) 40 | val clzName: String = strings(3) 41 | val score: Double = strings(4).toDouble 42 | val time: String = strings(5) 43 | 44 | // 注意,这里的类型,以及后续的structtype类型需要一一匹配,否则就会出错 45 | row = Row(stuID, stuName, clzID, clzName,score,time ) 46 | } catch { 47 | case e:Exception =>{ 48 | e.printStackTrace() 49 | } 50 | } 51 | 52 | row 53 | }).filter(ele=> ele != null) 54 | 55 | // 创建结构化schema信息,注意这里要求是Seq,也就是有序集合, 56 | // 因为需要按照顺序去解析每个列的字段信息 57 | val structType: StructType = StructType(List( 58 | StructField("stuID", DataTypes.StringType), 59 | StructField("stuName", DataTypes.StringType, false), 60 | StructField("clzID", DataTypes.StringType, false), 61 | StructField("clzName", DataTypes.StringType, false), 62 | StructField("score", DataTypes.DoubleType, false), 63 | StructField("time", DataTypes.StringType, false) 64 | )) 65 | 66 | // 通过RDD以及对应的schema信息,创建dataFrame对象 67 | val dataFrame: DataFrame = sparkSession.createDataFrame(mapRDD, structType) 68 | 69 | // 打印schema信息 70 | dataFrame.printSchema() 71 | 72 | //注册临时表 73 | dataFrame.createTempView("tt_score") 74 | 75 | //查询所有数据并打印到控制台 76 | sparkSession.sql("select * from tt_score").show() 77 | 78 | //统计平均分并打印 79 | val avgScore = sparkSession.sql("select stuID,stuName,avg(score) from tt_score group by stuID,stuName ") 80 | avgScore.show() 81 | 82 | 83 | //把统计的平均分写入mysql 84 | val prop = new Properties() 85 | prop.put("user","root") 86 | prop.put("password","123456") 87 | avgScore.write.mode(SaveMode.Overwrite).jdbc("jdbc:mysql://localhost:3306/spark","tt_score",prop) 88 | 89 | sparkSession.close() 90 | 91 | //====================================================== 92 | } 93 | 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/main/scala/transformApp/util/MakeData1.scala: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.scala.transformApp.util 2 | 3 | import java.io.PrintWriter 4 | import java.text.SimpleDateFormat 5 | import java.util.Properties 6 | 7 | import org.apache.commons.math3.random.{GaussianRandomGenerator, JDKRandomGenerator} 8 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} 9 | import org.apache.kafka.common.serialization.StringSerializer 10 | 11 | import scala.util.Random 12 | 13 | /** 14 | * 模拟生成数据 15 | * 1.将模拟的数据生成到文件中 16 | * 2.将模拟的数据生成到Kafka中 17 | * 区域id,道路id,卡扣id,摄像头id,拍摄时间,车辆信息,车辆速度 18 | */ 19 | object MakeData1 { 20 | def main(args: Array[String]): Unit = { 21 | //创建写入数据的文件 22 | val pw = new PrintWriter("C:\\Users\\Bruce\\Desktop\\3\\trafficdata") 23 | //创建kafka配置 24 | val props = new Properties() 25 | props.setProperty("bootstrap.servers","node1:9092,node2:9092,node3:9092") 26 | props.setProperty("key.serializer",classOf[StringSerializer].getName) 27 | props.setProperty("value.serializer",classOf[StringSerializer].getName) 28 | 29 | //创建Kafka Producer 30 | val producer = new KafkaProducer[String,String](props) 31 | 32 | //模拟3000辆车 京Axxxxx 33 | val locations = Array[String]("京","津","冀","京","鲁","京","京","京","京","京") 34 | val monitorIds = Array[String]("01","02") 35 | val random = new Random() 36 | val generator = new GaussianRandomGenerator(new JDKRandomGenerator()) 37 | 38 | //往固定区域、道路、通道发送车辆消息 39 | for(i <- 1 to 3000){ 40 | //模拟车辆 41 | val car = locations(random.nextInt(10))+(65+random.nextInt(26)).toChar+random.nextInt(99999).formatted("%05d") 42 | //模拟每辆车通过的卡扣数 ,一辆车每天通过卡扣数可能是大部分都不超过100个卡扣 43 | val throuldMonitorCount = (generator.nextNormalizedDouble() * 100).abs.toInt 44 | for(j <- 0 until throuldMonitorCount){ 45 | //通过的区域 46 | val areaId = "02" 47 | //通过的道路 48 | val roadId = "02" 49 | //通过的通道 50 | val monitorId = monitorIds(random.nextInt(2)) 51 | //通过的摄像头 52 | val cameraId = random.nextInt(99999).formatted("%05d") 53 | //摄像头拍摄时间,转换成时间戳 54 | val yyyyMMddHHmmss =DateUtils.getCurrentDate() + " "+DateUtils.getRandomHour()+":"+DateUtils.getRandomMinutesOrSeconds+":"+DateUtils.getRandomMinutesOrSeconds 55 | val format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") 56 | val actionTime = format.parse(yyyyMMddHHmmss).getTime 57 | 58 | //拍摄车辆速度 ,大部分车辆速度位于60左右 59 | val speed :Double = (generator.nextNormalizedDouble()*60).abs.formatted("%.2f").toDouble 60 | 61 | val info = s"${areaId}\t${roadId}\t${monitorId}\t${cameraId}\t${actionTime}\t${car}\t${speed}" 62 | println(info) 63 | //向文件中写入 64 | pw.println(info) 65 | //向kafka中写入 66 | producer.send( new ProducerRecord[String,String]("flink_traffic5",info)) 67 | 68 | //2s发一条 69 | // Thread.sleep(2000) 70 | } 71 | } 72 | pw.close() 73 | producer.close() 74 | 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/EventTimeSlidingWindowWithWaterMark.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 12 | import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows; 13 | import org.apache.flink.streaming.api.windowing.time.Time; 14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 15 | 16 | /** 17 | * eventTime 滑动窗口 18 | * 带 watermark水位线 19 | * 20 | * @author bruce 21 | */ 22 | public class EventTimeSlidingWindowWithWaterMark { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | //设置使用EventTime作为时间标准 27 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 28 | 29 | SingleOutputStreamOperator source = env.socketTextStream("localhost", 4000) 30 | 31 | //提取时间字段 32 | .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(0)) { 33 | @Override 34 | public long extractTimestamp(String line) { 35 | String[] fields = line.split(" "); 36 | return Long.parseLong(fields[0]); 37 | } 38 | }); 39 | 40 | 41 | //1000 spark 1 42 | //1999 spark 2 43 | //4999 java 3 44 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 45 | @Override 46 | public Tuple2 map(String value) throws Exception { 47 | String[] lines = value.split(" "); 48 | return Tuple2.of(lines[1],Integer.parseInt(lines[2])); 49 | } 50 | }); 51 | 52 | //先分组 53 | KeyedStream, Tuple> keyed = map.keyBy(0); 54 | 55 | //按照分组后分窗口 56 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 57 | WindowedStream, Tuple, TimeWindow> window = keyed.window(SlidingEventTimeWindows.of(Time.seconds(6), Time.seconds(2))); 58 | 59 | 60 | SingleOutputStreamOperator> summed = window.sum(1); 61 | 62 | summed.print(); 63 | 64 | env.execute("EventTimeTumblingWindow"); 65 | 66 | 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/window/EventTimeTumblingWindow.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 12 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 13 | import org.apache.flink.streaming.api.windowing.time.Time; 14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 15 | 16 | /** 17 | * eventTime 滚动窗口 18 | * 如果使用的是并行的source,例如kafkaSource,创建kafka的Topic时有多个分区 19 | * 每一个Source的分区都要满足触发的条件,整个窗口才会被触发 20 | * @author bruce 21 | */ 22 | public class EventTimeTumblingWindow { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | //设置使用EventTime作为时间标准 27 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 28 | 29 | SingleOutputStreamOperator source = env.socketTextStream("localhost", 4000) 30 | 31 | //提取时间字段 32 | .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(0)) { 33 | @Override 34 | public long extractTimestamp(String line) { 35 | String[] fields = line.split(" "); 36 | return Long.parseLong(fields[0]); 37 | } 38 | }); 39 | 40 | 41 | //1000 spark 1 42 | //1999 spark 2 43 | //4999 java 3 44 | SingleOutputStreamOperator> map = source.map(new MapFunction>() { 45 | @Override 46 | public Tuple2 map(String value) throws Exception { 47 | String[] lines = value.split(" "); 48 | return Tuple2.of(lines[1],Integer.parseInt(lines[2])); 49 | } 50 | }); 51 | 52 | //先分组 53 | KeyedStream, Tuple> keyed = map.keyBy(0); 54 | 55 | //按照分组后分窗口 56 | // WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 57 | WindowedStream, Tuple, TimeWindow> window = keyed.window(TumblingEventTimeWindows.of(Time.seconds(5))); 58 | 59 | 60 | SingleOutputStreamOperator> summed = window.sum(1); 61 | 62 | summed.print(); 63 | 64 | env.execute("EventTimeTumblingWindow"); 65 | 66 | 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/OperatorStateAndKeyedState.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 12 | import org.apache.flink.util.Collector; 13 | 14 | import java.util.Properties; 15 | 16 | /** 17 | * @author zhangyang 18 | * 按照步骤来一步步拆分Task是如何划分的 19 | * wc统计的数据我们源自于socket 20 | */ 21 | public class OperatorStateAndKeyedState { 22 | 23 | public static void main(String[] args) throws Exception { 24 | 25 | //1.环境 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | env.enableCheckpointing(5000); 29 | 30 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); 31 | 32 | Properties props = new Properties(); 33 | 34 | //指定Kafka的Broker地址 35 | props.setProperty( "bootstrap.servers", "localhost:9092"); 36 | //指定组ID 37 | props.setProperty("group.id", "bruce"); 38 | //如果没有记录偏移量,第一次从最开始消费 39 | props.setProperty("auto.offset.reset", "earliest") ; 40 | //kafka的消费者不自动提交偏移量 41 | props.setProperty("enable. auto. commit", "false"); 42 | 43 | //2.read 44 | FlinkKafkaConsumer kafkaSource = new FlinkKafkaConsumer<>("flink000", new SimpleStringSchema(), props); 45 | 46 | 47 | DataStream lines = env.addSource(kafkaSource); 48 | 49 | // 拆词 50 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 51 | @Override 52 | public void flatMap(String value, Collector out) throws Exception { 53 | String[] words = value.split(" "); 54 | for (String word : words) { 55 | out.collect(word); 56 | } 57 | } 58 | }); 59 | 60 | //把单词和1拼一块 61 | SingleOutputStreamOperator> wordAndOne = words.map(new MapFunction>() { 62 | @Override 63 | public Tuple2 map(String value) throws Exception { 64 | return Tuple2.of(value, 1); 65 | } 66 | }); 67 | 68 | //分组、累加 69 | SingleOutputStreamOperator> sumed = wordAndOne.keyBy(0).sum(1);//.setParallelism(1); 70 | 71 | 72 | //sink 73 | sumed.print(); 74 | 75 | //execute 76 | env.execute("OperatorStateAndKeyedState"); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/cn/northpark/flink/project3/MyExactlyOnceParaFileSource.java: -------------------------------------------------------------------------------- 1 | package cn.northpark.flink.project3; 2 | 3 | import org.apache.flink.api.common.state.ListState; 4 | import org.apache.flink.api.common.state.ListStateDescriptor; 5 | import org.apache.flink.api.common.typeinfo.TypeHint; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.runtime.state.FunctionInitializationContext; 9 | import org.apache.flink.runtime.state.FunctionSnapshotContext; 10 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; 11 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 12 | 13 | import java.io.RandomAccessFile; 14 | 15 | public class MyExactlyOnceParaFileSource extends RichParallelSourceFunction> implements CheckpointedFunction { 16 | private String path; 17 | 18 | private boolean flag = true; 19 | 20 | private transient ListState offsetState; 21 | 22 | private Long offset = 0L; 23 | 24 | public MyExactlyOnceParaFileSource(String path) { 25 | this.path = path; 26 | } 27 | 28 | public MyExactlyOnceParaFileSource() { 29 | } 30 | 31 | @Override 32 | public void run(SourceContext ctx) throws Exception { 33 | Iterable iterable = offsetState.get(); 34 | while(iterable.iterator().hasNext()){ 35 | offset = offsetState.get().iterator().next(); 36 | } 37 | 38 | int index = getRuntimeContext().getIndexOfThisSubtask(); 39 | 40 | RandomAccessFile randomAccessFile = new RandomAccessFile(path +"/" +index +".txt","r"); 41 | 42 | randomAccessFile.seek(offset); 43 | 44 | final Object lock = ctx.getCheckpointLock(); 45 | 46 | while (flag){ 47 | String line = randomAccessFile.readLine(); 48 | if(line!=null){ 49 | line = new String(line.getBytes("ISO-8859-1"),"utf-8"); 50 | synchronized (lock){ 51 | offset = randomAccessFile.getFilePointer(); 52 | ctx.collect(Tuple2.of(index+"",line)); 53 | } 54 | }else{ 55 | Thread.sleep(2000); 56 | } 57 | } 58 | } 59 | 60 | @Override 61 | public void cancel() { 62 | flag = true; 63 | } 64 | 65 | @Override 66 | public void snapshotState(FunctionSnapshotContext context) throws Exception { 67 | //clear 68 | offsetState.clear(); 69 | 70 | //set offset 71 | offsetState.add(offset); 72 | } 73 | 74 | @Override 75 | public void initializeState(FunctionInitializationContext context) throws Exception { 76 | ListStateDescriptor stateDescriptor = new ListStateDescriptor("np-operator-state", 77 | TypeInformation.of(new TypeHint() { 78 | }) 79 | // Types.LONG 80 | // Long.class 81 | ); 82 | offsetState = context.getOperatorStateStore().getListState(stateDescriptor); 83 | } 84 | } 85 | --------------------------------------------------------------------------------