For a tutorial how to write a Flink batch application, check the
27 | * tutorials and examples on the Flink Website.
28 | *
29 | *
To package your application into a JAR file for execution,
30 | * change the main class in the POM.xml file to this class (simply search for 'mainClass')
31 | * and run 'mvn clean package' on the command line.
32 | */
33 | public class BatchJob {
34 |
35 | public static void main(String[] args) throws Exception {
36 | // set up the batch execution environment
37 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
38 |
39 | /*
40 | * Here, you can start creating your execution plan for Flink.
41 | *
42 | * Start with getting some data from the environment, like
43 | * env.readTextFile(textPath);
44 | *
45 | * then, transform the resulting DataSet using operations
46 | * like
47 | * .filter()
48 | * .flatMap()
49 | * .join()
50 | * .coGroup()
51 | *
52 | * and many more.
53 | * Have a look at the programming guide for the Java API:
54 | *
55 | * http://flink.apache.org/docs/latest/apis/batch/index.html
56 | *
57 | * and the examples
58 | *
59 | * http://flink.apache.org/docs/latest/apis/batch/examples.html
60 | *
61 | */
62 |
63 | // execute program
64 | env.execute("Flink Batch Java API Skeleton");
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/myflink/HotItems.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package myflink;
19 |
20 | import org.apache.flink.api.common.functions.AggregateFunction;
21 | import org.apache.flink.api.common.functions.FilterFunction;
22 | import org.apache.flink.api.common.state.ListState;
23 | import org.apache.flink.api.common.state.ListStateDescriptor;
24 | import org.apache.flink.api.java.io.PojoCsvInputFormat;
25 | import org.apache.flink.api.java.tuple.Tuple;
26 | import org.apache.flink.api.java.tuple.Tuple1;
27 | import org.apache.flink.api.java.typeutils.PojoTypeInfo;
28 | import org.apache.flink.api.java.typeutils.TypeExtractor;
29 | import org.apache.flink.configuration.Configuration;
30 | import org.apache.flink.core.fs.Path;
31 | import org.apache.flink.streaming.api.TimeCharacteristic;
32 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
33 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
34 | import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
35 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
36 | import org.apache.flink.streaming.api.windowing.time.Time;
37 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
38 | import org.apache.flink.util.Collector;
39 |
40 | import java.io.File;
41 | import java.net.URL;
42 | import java.sql.Timestamp;
43 | import java.util.ArrayList;
44 | import java.util.Comparator;
45 | import java.util.List;
46 |
47 | public class HotItems {
48 |
49 | public static void main(String[] args) throws Exception {
50 |
51 | // 创建 execution environment
52 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
53 | // 告诉系统按照 EventTime 处理
54 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
55 | // 为了打印到控制台的结果不乱序,我们配置全局的并发为1,改变并发对结果正确性没有影响
56 | env.setParallelism(1);
57 |
58 | // UserBehavior.csv 的本地文件路径, 在 resources 目录下
59 | URL fileUrl = HotItems.class.getClassLoader().getResource("UserBehavior.csv");
60 | Path filePath = Path.fromLocalFile(new File(fileUrl.toURI()));
61 | // 抽取 UserBehavior 的 TypeInformation,是一个 PojoTypeInfo
62 | PojoTypeInfo pojoType = (PojoTypeInfo) TypeExtractor.createTypeInfo(UserBehavior.class);
63 | // 由于 Java 反射抽取出的字段顺序是不确定的,需要显式指定下文件中字段的顺序
64 | String[] fieldOrder = new String[]{"userId", "itemId", "categoryId", "behavior", "timestamp"};
65 | // 创建 PojoCsvInputFormat
66 | PojoCsvInputFormat csvInput = new PojoCsvInputFormat<>(filePath, pojoType, fieldOrder);
67 |
68 |
69 | env
70 | // 创建数据源,得到 UserBehavior 类型的 DataStream
71 | .createInput(csvInput, pojoType)
72 | // 抽取出时间和生成 watermark
73 | .assignTimestampsAndWatermarks(new AscendingTimestampExtractor() {
74 | @Override
75 | public long extractAscendingTimestamp(UserBehavior userBehavior) {
76 | // 原始数据单位秒,将其转成毫秒
77 | return userBehavior.timestamp * 1000;
78 | }
79 | })
80 | // 过滤出只有点击的数据
81 | .filter(new FilterFunction() {
82 | @Override
83 | public boolean filter(UserBehavior userBehavior) throws Exception {
84 | // 过滤出只有点击的数据
85 | return userBehavior.behavior.equals("pv");
86 | }
87 | })
88 | .keyBy("itemId")
89 | .timeWindow(Time.minutes(60), Time.minutes(5))
90 | .aggregate(new CountAgg(), new WindowResultFunction())
91 | .keyBy("windowEnd")
92 | .process(new TopNHotItems(3))
93 | .print();
94 |
95 | env.execute("Hot Items Job");
96 | }
97 |
98 | /** 求某个窗口中前 N 名的热门点击商品,key 为窗口时间戳,输出为 TopN 的结果字符串 */
99 | public static class TopNHotItems extends KeyedProcessFunction {
100 |
101 | private final int topSize;
102 |
103 | public TopNHotItems(int topSize) {
104 | this.topSize = topSize;
105 | }
106 |
107 | // 用于存储商品与点击数的状态,待收齐同一个窗口的数据后,再触发 TopN 计算
108 | private ListState itemState;
109 |
110 | @Override
111 | public void open(Configuration parameters) throws Exception {
112 | super.open(parameters);
113 | ListStateDescriptor itemsStateDesc = new ListStateDescriptor<>(
114 | "itemState-state",
115 | ItemViewCount.class);
116 | itemState = getRuntimeContext().getListState(itemsStateDesc);
117 | }
118 |
119 | @Override
120 | public void processElement(
121 | ItemViewCount input,
122 | Context context,
123 | Collector collector) throws Exception {
124 |
125 | // 每条数据都保存到状态中
126 | itemState.add(input);
127 | // 注册 windowEnd+1 的 EventTime Timer, 当触发时,说明收齐了属于windowEnd窗口的所有商品数据
128 | context.timerService().registerEventTimeTimer(input.windowEnd + 1);
129 | }
130 |
131 | @Override
132 | public void onTimer(
133 | long timestamp, OnTimerContext ctx, Collector out) throws Exception {
134 | // 获取收到的所有商品点击量
135 | List allItems = new ArrayList<>();
136 | for (ItemViewCount item : itemState.get()) {
137 | allItems.add(item);
138 | }
139 | // 提前清除状态中的数据,释放空间
140 | itemState.clear();
141 | // 按照点击量从大到小排序
142 | allItems.sort(new Comparator() {
143 | @Override
144 | public int compare(ItemViewCount o1, ItemViewCount o2) {
145 | return (int) (o2.viewCount - o1.viewCount);
146 | }
147 | });
148 | // 将排名信息格式化成 String, 便于打印
149 | StringBuilder result = new StringBuilder();
150 | result.append("====================================\n");
151 | result.append("时间: ").append(new Timestamp(timestamp-1)).append("\n");
152 | for (int i=0; i {
171 |
172 | @Override
173 | public void apply(
174 | Tuple key, // 窗口的主键,即 itemId
175 | TimeWindow window, // 窗口
176 | Iterable aggregateResult, // 聚合函数的结果,即 count 值
177 | Collector collector // 输出类型为 ItemViewCount
178 | ) throws Exception {
179 | Long itemId = ((Tuple1) key).f0;
180 | Long count = aggregateResult.iterator().next();
181 | collector.collect(ItemViewCount.of(itemId, window.getEnd(), count));
182 | }
183 | }
184 |
185 | /** COUNT 统计的聚合函数实现,每出现一条记录加一 */
186 | public static class CountAgg implements AggregateFunction {
187 |
188 | @Override
189 | public Long createAccumulator() {
190 | return 0L;
191 | }
192 |
193 | @Override
194 | public Long add(UserBehavior userBehavior, Long acc) {
195 | return acc + 1;
196 | }
197 |
198 | @Override
199 | public Long getResult(Long acc) {
200 | return acc;
201 | }
202 |
203 | @Override
204 | public Long merge(Long acc1, Long acc2) {
205 | return acc1 + acc2;
206 | }
207 | }
208 |
209 | /** 商品点击量(窗口操作的输出类型) */
210 | public static class ItemViewCount {
211 | public long itemId; // 商品ID
212 | public long windowEnd; // 窗口结束时间戳
213 | public long viewCount; // 商品的点击量
214 |
215 | public static ItemViewCount of(long itemId, long windowEnd, long viewCount) {
216 | ItemViewCount result = new ItemViewCount();
217 | result.itemId = itemId;
218 | result.windowEnd = windowEnd;
219 | result.viewCount = viewCount;
220 | return result;
221 | }
222 | }
223 |
224 | /** 用户行为数据结构 **/
225 | public static class UserBehavior {
226 | public long userId; // 用户ID
227 | public long itemId; // 商品ID
228 | public int categoryId; // 商品类目ID
229 | public String behavior; // 用户行为, 包括("pv", "buy", "cart", "fav")
230 | public long timestamp; // 行为发生的时间戳,单位秒
231 | }
232 | }
233 |
--------------------------------------------------------------------------------
/src/main/java/myflink/SocketWindowWordCount.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package myflink;
19 |
20 | import org.apache.flink.api.common.functions.FlatMapFunction;
21 | import org.apache.flink.api.java.tuple.Tuple2;
22 | import org.apache.flink.streaming.api.datastream.DataStream;
23 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
24 | import org.apache.flink.streaming.api.windowing.time.Time;
25 | import org.apache.flink.util.Collector;
26 |
27 | public class SocketWindowWordCount {
28 |
29 | public static void main(String[] args) throws Exception {
30 |
31 | // 创建 execution environment
32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |
34 | // 通过连接 socket 获取输入数据,这里连接到本地9000端口,如果9000端口已被占用,请换一个端口
35 | DataStream text = env.socketTextStream("localhost", 9000, "\n");
36 |
37 | // 解析数据,按 word 分组,开窗,聚合
38 | DataStream> windowCounts = text
39 | .flatMap(new FlatMapFunction>() {
40 | @Override
41 | public void flatMap(String value, Collector> out) {
42 | for (String word : value.split("\\s")) {
43 | out.collect(Tuple2.of(word, 1));
44 | }
45 | }
46 | })
47 | .keyBy(0)
48 | .timeWindow(Time.seconds(5))
49 | .sum(1);
50 |
51 | // 将结果打印到控制台,注意这里使用的是单线程打印,而非多线程
52 | windowCounts.print().setParallelism(1);
53 |
54 | env.execute("Socket Window WordCount");
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/myflink/StreamingJob.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package myflink;
20 |
21 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22 |
23 | /**
24 | * Skeleton for a Flink Streaming Job.
25 | *
26 | *
For a tutorial how to write a Flink streaming application, check the
27 | * tutorials and examples on the Flink Website.
28 | *
29 | *
To package your application into a JAR file for execution, run
30 | * 'mvn clean package' on the command line.
31 | *
32 | *
If you change the name of the main class (with the public static void main(String[] args))
33 | * method, change the respective entry in the POM.xml file (simply search for 'mainClass').
34 | */
35 | public class StreamingJob {
36 |
37 | public static void main(String[] args) throws Exception {
38 | // set up the streaming execution environment
39 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
40 |
41 | /*
42 | * Here, you can start creating your execution plan for Flink.
43 | *
44 | * Start with getting some data from the environment, like
45 | * env.readTextFile(textPath);
46 | *
47 | * then, transform the resulting DataStream using operations
48 | * like
49 | * .filter()
50 | * .flatMap()
51 | * .join()
52 | * .coGroup()
53 | *
54 | * and many more.
55 | * Have a look at the programming guide for the Java API:
56 | *
57 | * http://flink.apache.org/docs/latest/apis/streaming/index.html
58 | *
59 | */
60 |
61 | // execute program
62 | env.execute("Flink Streaming Java API Skeleton");
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------