├── README.md
├── bin
├── runAll.sh
├── runFlinkBenchmark.sh
└── runSparkBenchmark.sh
├── common
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── intel
│ │ └── streaming_benchmark
│ │ └── common
│ │ ├── BenchLogUtil.java
│ │ ├── ConfigLoader.java
│ │ ├── DateUtils.java
│ │ └── StreamBenchConfig.java
│ └── scala
│ └── com
│ └── intel
│ └── streaming_benchmark
│ └── common
│ ├── QueryConfig.scala
│ ├── Schema.scala
│ └── TableSchemaProvider.scala
├── conf
├── benchmarkConf.yaml
├── dataGenHosts
├── env
└── queriesToRun
├── dataGen
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── intel
│ │ └── streaming_benchmark
│ │ ├── Datagen.java
│ │ └── utils
│ │ ├── ConfigLoader.java
│ │ ├── Constants.java
│ │ └── GetProducer.java
│ └── scala
│ └── com
│ └── intel
│ └── streaming_benchmark
│ ├── ClickProducer.scala
│ └── click.scala
├── flink
├── conf
│ └── benchmarkConf.yaml
├── log
│ ├── q1.sql.log
│ ├── q10.sql.log
│ ├── q11.sql.log
│ ├── q12.sql.log
│ ├── q2.sql.log
│ ├── q3.sql.log
│ ├── q4.sql.log
│ ├── q5.sql.log
│ ├── q6.sql.log
│ ├── q7.sql.log
│ ├── q8.sql.log
│ └── q9.sql.log
├── pom.xml
├── query
│ ├── q1.sql
│ ├── q10.sql
│ ├── q11.sql
│ ├── q12.sql
│ ├── q2.sql
│ ├── q3.sql
│ ├── q4.sql
│ ├── q5.sql
│ ├── q6.sql
│ ├── q7.sql
│ ├── q8.sql
│ └── q9.sql
├── result
│ └── result.log
└── src
│ └── main
│ └── java
│ └── com
│ └── intel
│ └── streaming_benchmark
│ ├── flink
│ └── Benchmark.java
│ └── utils
│ └── FlinkBenchConfig.java
├── pom.xml
├── spark
├── conf
│ └── benchmarkConf.yaml
├── log
│ ├── q1.sql.log
│ ├── q2.sql.log
│ ├── q3.sql.log
│ ├── q4.sql.log
│ ├── q5.sql.log
│ ├── q6.sql.log
│ ├── q7.sql.log
│ ├── q8.sql.log
│ └── q9.sql.log
├── pom.xml
├── query
│ ├── q1.sql
│ ├── q2.sql
│ ├── q3.sql
│ ├── q4.sql
│ ├── q5.sql
│ ├── q6.sql
│ ├── q7.sql
│ ├── q8.sql
│ └── q9.sql
├── result
│ └── result.log
└── src
│ └── main
│ └── java
│ └── com
│ └── intel
│ └── streaming_benchmark
│ ├── spark
│ └── Benchmark.java
│ └── utils
│ ├── SchemaProvider.java
│ └── SparkBenchConfig.java
└── utils
└── dataGenerator.sh
/README.md:
--------------------------------------------------------------------------------
1 | # Streaming_benchmark
2 | Streaming Benchmark is designed to measure the performance of stream processing system such as flink and spark. Three use cases are simulated (User Visit Session Analysis, Evaluation of Real-time Advertising and Shopping Record Analysis). Raw data is generated and stored in Kafka. Streams map into streaming tables and queries act on these tables.
3 |
4 | ## Building
5 | ```
6 | mvn clean package
7 | ```
8 | ## Prerequisites
9 | You should have Apache Kafka, Apache zookeeper, Apache Spark and Flink-1.9 installed in your cluster.
10 |
11 | ## Setup
12 | 1. Clone the project into your master.
13 | 2. Update conf/benchmarkConf.yaml (The properties of Kafka, Zookeeper, benchmark...)
14 | ```
15 | streambench.zkHost ip1:2181,ip2:2181,ip3:2181...
16 | streambench.kafka.brokerList ip1:port1,ip1:port2...
17 | streambench.kafka.consumerGroup benchmark(default)
18 | ```
19 | 3. Update flink/conf/benchmarkConf.yaml (The properties of flink)
20 | ```
21 | streambench.flink.checkpointDuration 5000
22 | streambench.flink.timeType EventTime(Use EventTime or ProcessTime)
23 | ```
24 | 4. Update conf/dataGenHosts (The hosts where data will be generated; suggest to generate data on kafka node)
25 | ```
26 | ip1
27 | ip2
28 | ...
29 | ```
30 | 5. Update conf/queriesToRun (The queries will be run)
31 | ```
32 | q1.sql
33 | q2.sql
34 | q3.sql
35 | ...
36 | ```
37 | 6. Update conf/env
38 | ```
39 | export DATAGEN_TIME=100 (Running time for each query)
40 | export THREAD_PER_NODE=10(The number of thread to generate data for per node.)
41 | export FLINK_HOME={FLINK_HOME}
42 | export SPARK_HOME={SPARK_HOME}
43 | ```
44 | 7. Copy the project to every node which will generate data (the same hosts in conf/dataGenHosts) and ensure that the master node can log in these hosts without password.
45 |
46 | ## Run Benchmark
47 | Start Zookeeper, kafka, Spark, Flink first.
48 | Run flink benchmark: `sh bin/runFlinkBenchmark.sh`.
49 | Run spark benchmark: `sh bin/runSparkBenchmark.sh`.
50 | Run both flink and spark benchmark: `sh bin/runAll.sh`.
51 |
52 | ## Result
53 | The results will be save on flink/result/result.log and spark/result/result.log and the format of result is just like below:
54 | ```
55 | Finished time: 2019-10-30 19:07:26; q1.sql Runtime: 58s TPS:10709265
56 | Finished time: 2019-10-30 19:08:37; q2.sql Runtime: 57s TPS:8061793
57 | Finished time: 2019-10-30 19:09:51; q5.sql Runtime: 57s TPS:4979921
58 | ```
59 |
--------------------------------------------------------------------------------
/bin/runAll.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curDir=$(cd `dirname $0`;pwd)
4 | rootDir=$(dirname $curDir)
5 |
6 | if [ -e $rootDir/conf/env ]; then
7 | source $rootDir/conf/env
8 | fi
9 |
10 | mainClass1=com.intel.streaming_benchmark.flink.Benchmark
11 | mainClass2=com.intel.streaming_benchmark.spark.Benchmark
12 | dataGenClass=com.intel.streaming_benchmark.Datagen
13 | HOSTNAME=`hostname`
14 |
15 | echo "Run Flink benchmark!"
16 | for sql in `cat $rootDir/conf/queriesToRun`;
17 | do
18 | echo "Data generator start!"
19 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql flink"; done
20 | echo "RUNING $sql"
21 | nohup $FLINK_HOME/bin/flink run -c $mainClass1 $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 &
22 | sleep $DATAGEN_TIME
23 | FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true`
24 | $FLINK_HOME/bin/flink cancel $FLINK_ID
25 | echo $FLINK_ID
26 | sleep 10
27 | done
28 |
29 | sleep 30
30 |
31 | echo "Run Spark benchmark!"
32 | for sql in `cat $rootDir/conf/queriesToRun`;
33 | do
34 | echo "Data generator start!"
35 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql spark"; done
36 | echo "RUNING $sql"
37 | nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 &
38 | sleep $DATAGEN_TIME
39 | done
--------------------------------------------------------------------------------
/bin/runFlinkBenchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curDir=$(cd `dirname $0`;pwd)
4 | rootDir=$(dirname $curDir)
5 |
6 | if [ -e $rootDir/conf/env ]; then
7 | source $rootDir/conf/env
8 | fi
9 |
10 | mainClass=com.intel.streaming_benchmark.flink.Benchmark
11 | dataGenClass=com.intel.streaming_benchmark.Datagen
12 |
13 | for sql in `cat $rootDir/conf/queriesToRun`;
14 | do
15 | echo "Data generator start!"
16 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql flink"; done
17 | echo "RUNING $sql"
18 | nohup $FLINK_HOME/bin/flink run -c $mainClass $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 &
19 | sleep $DATAGEN_TIME
20 | FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true`
21 | $FLINK_HOME/bin/flink cancel $FLINK_ID
22 | echo $FLINK_ID
23 | sleep 10
24 | done
25 |
26 |
--------------------------------------------------------------------------------
/bin/runSparkBenchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curDir=$(cd `dirname $0`;pwd)
4 | rootDir=$(dirname $curDir)
5 |
6 | if [ -e $rootDir/conf/env ]; then
7 | source $rootDir/conf/env
8 | fi
9 |
10 | mainClass=com.intel.streaming_benchmark.spark.Benchmark
11 | dataGenClass=com.intel.streaming_benchmark.Datagen
12 | HOSTNAME=`hostname`
13 |
14 | for sql in `cat $rootDir/conf/queriesToRun`;
15 | do
16 | echo "Data generator start!"
17 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql spark"; done
18 | echo "RUNING $sql"
19 | nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 &
20 | # $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME
21 | sleep $DATAGEN_TIME
22 | done
23 |
--------------------------------------------------------------------------------
/common/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | streaming_benchmark
7 | com.intel.streaming_benchmark
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | common
13 |
14 |
15 |
16 |
17 | org.codehaus.mojo
18 | build-helper-maven-plugin
19 | 1.4
20 |
21 |
22 | add-source
23 | generate-sources
24 |
25 | add-source
26 |
27 |
28 |
29 | src/main/scala
30 | src/main/java
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | net.alchim31.maven
39 | scala-maven-plugin
40 | 3.2.2
41 |
42 |
43 | scala-compile-first
44 | process-resources
45 |
46 | add-source
47 | compile
48 |
49 |
50 |
51 | scala-test-compile
52 | process-test-resources
53 |
54 | testCompile
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/BenchLogUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.intel.streaming_benchmark.common;
19 |
20 | import java.io.File;
21 | import java.io.PrintWriter;
22 |
23 | public class BenchLogUtil {
24 | private static PrintWriter out;
25 |
26 | public static void init() throws Exception {
27 | File file = new File("/tmp/benchlog-flink.txt");
28 | out = new PrintWriter(file);
29 | }
30 |
31 | public static void logMsg(String msg) {
32 | try {
33 | if (out == null) {
34 | init();
35 | }
36 | } catch (Exception e) {
37 | e.printStackTrace();
38 | }
39 | out.println(msg);
40 | out.flush();
41 | System.out.println(msg);
42 | }
43 |
44 | public static void close() {
45 | if (out != null) {
46 | out.close();
47 | }
48 | }
49 |
50 | public static void handleError(String msg) {
51 | System.err.println(msg);
52 | System.exit(1);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/ConfigLoader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.intel.streaming_benchmark.common;
19 |
20 | import java.io.BufferedReader;
21 | import java.io.FileNotFoundException;
22 | import java.io.FileReader;
23 | import java.io.IOException;
24 | import java.util.HashMap;
25 | import java.util.Map;
26 |
27 | public class ConfigLoader {
28 | private String ConfigFileName = null;
29 | private Map store;
30 |
31 | public ConfigLoader(String filename){
32 | ConfigFileName = filename;
33 | store = new HashMap();
34 | // Load and parse config
35 | try {
36 | BufferedReader br = new BufferedReader(new FileReader(filename));
37 | String line = br.readLine();
38 | while(line != null){
39 | if ((line.length()>0) && (line.charAt(0)!='#')) {
40 | String[] words = line.split("\\s+");
41 | if (words.length == 2) {
42 | String key = words[0];
43 | String value = words[1];
44 | store.put(key, value);
45 | } else if (words.length == 1) {
46 | String key = words[0];
47 | store.put(key, "");
48 | } else {
49 | if (!line.startsWith("streambench"))
50 | System.out.println("Warning: unknown config parsed, skip:" + line);
51 | }
52 | }
53 | line = br.readLine();
54 | }
55 | } catch (FileNotFoundException e) {
56 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
57 | } catch (IOException e) {
58 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
59 | e.printStackTrace();
60 | }
61 | }
62 |
63 | public String getProperty(String key){
64 | if (store.containsKey(key))
65 | return (String) store.get(key);
66 | else {
67 | System.out.println("ERROR: Unknown config key:" + key);
68 | return null;
69 | }
70 | }
71 |
72 | public void merge(String fileName){
73 |
74 | try{
75 | BufferedReader br = new BufferedReader(new FileReader(fileName));
76 | String line = br.readLine();
77 | while(line != null) {
78 | if ((line.length() > 0) && (line.charAt(0) != '#')) {
79 | String[] words = line.split("\\s+");
80 | String key = words[0];
81 | String value = words[1];
82 | if(store.containsKey(key)){
83 | store.replace(key,value);
84 | }else {
85 | store.put(key, value);
86 | }
87 | }
88 | line = br.readLine();
89 | }
90 | }catch (FileNotFoundException e) {
91 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
92 | } catch (IOException e) {
93 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
94 | e.printStackTrace();
95 | }
96 |
97 |
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/DateUtils.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.common;
2 |
3 | import java.io.ByteArrayOutputStream;
4 | import java.io.File;
5 | import java.io.FileInputStream;
6 | import java.text.SimpleDateFormat;
7 | import java.util.Calendar;
8 | import java.util.Date;
9 | import java.util.Random;
10 |
11 | /**
12 | * Time data format converter
13 | */
14 | public class DateUtils {
15 | public static final int dayOfMillis = 86400000;
16 | public static final String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
17 | public static final String DATE_FORMAT = "yyyy-MM-dd";
18 | public static final String DATEKEY_FORMAT = "yyyyMMdd";
19 |
20 | /**
21 | * Convert millisecond timestamps into: yyyy-MM-dd HH:mm:ss
22 | *
23 | * @param time
24 | * @return
25 | */
26 | public static String parseLong2String(long time) {
27 | return parseLong2String(time, TIME_FORMAT);
28 | }
29 |
30 | /**
31 | * Convert millisecond timestamps into defined date format
32 | *
33 | * @param time
34 | * @param pattern
35 | * @return
36 | */
37 | public static String parseLong2String(long time, String pattern) {
38 | return parseLong2String(time, new SimpleDateFormat(pattern));
39 | }
40 |
41 | /**
42 | * Convert millisecond timestamps into date according to formatter
43 | *
44 | * @param time
45 | * @param sdf
46 | * @return
47 | */
48 | public static String parseLong2String(long time, SimpleDateFormat sdf) {
49 | Calendar cal = Calendar.getInstance();
50 | cal.setTimeInMillis(time);
51 | return sdf.format(cal.getTime());
52 | }
53 |
54 | /**
55 | * Convert string time into long timestamps
56 | *
57 | * @param date time type,format:yyyy-MM-dd HH:mm:ss
58 | * @return
59 | */
60 | public static long parseString2Long(String date) {
61 | return parseString2Long(date, TIME_FORMAT);
62 | }
63 |
64 | /**
65 | * Convert string time into long timestamps according to the time format string
66 | *
67 | * @param date
68 | * @param pattern
69 | * @return
70 | */
71 | public static long parseString2Long(String date, String pattern) {
72 | return parseString2Long(date, new SimpleDateFormat(pattern));
73 | }
74 |
75 | /**
76 | * Convert string time into long timestamps according to the time format string
77 | *
78 | * @param date
79 | * @param sdf
80 | * @return
81 | */
82 | public static long parseString2Long(String date, SimpleDateFormat sdf) {
83 | try {
84 | return sdf.parse(date).getTime();
85 | } catch (Exception e) {
86 | throw new RuntimeException(e);
87 | }
88 | }
89 |
90 | /**
91 | * Convert long timestamps into the value according to the time type
92 | *
93 | * @param millis milliseconds timestamp
94 | * @param type time type
95 | * @return
96 | */
97 | public static int getSpecificDateValueOfDateTypeEnum(long millis, DateTypeEnum type) {
98 | Calendar cal = Calendar.getInstance();
99 | cal.setTimeInMillis(millis);
100 | switch (type) {
101 | case YEAR:
102 | return cal.get(Calendar.YEAR);
103 | case MONTH:
104 | return cal.get(Calendar.MONTH) + 1;
105 | case DAY:
106 | return cal.get(Calendar.DAY_OF_MONTH);
107 | case HOUR:
108 | return cal.get(Calendar.HOUR_OF_DAY);
109 | case MINUTE:
110 | return cal.get(Calendar.MINUTE);
111 | case SECOND:
112 | return cal.get(Calendar.SECOND);
113 | case MILLISECOND:
114 | return cal.get(Calendar.MILLISECOND);
115 | }
116 |
117 | throw new IllegalArgumentException("Parameter exception");
118 | }
119 |
120 | /**
121 | * get the date of the day,format:yyyy-MM-dd
122 | *
123 | * @return Date of the day
124 | */
125 | public static String getTodayDate() {
126 | return new SimpleDateFormat(DATE_FORMAT).format(new Date());
127 | }
128 |
129 | /**
130 | * Get a random milliseconds timestamps of today
131 | *
132 | * @param random
133 | * @return
134 | */
135 | public static long getRandomTodayTimeOfMillis(Random random) {
136 | Calendar cal = Calendar.getInstance();
137 | cal.set(Calendar.HOUR_OF_DAY, 0);
138 | cal.set(Calendar.MINUTE, 0);
139 | cal.set(Calendar.SECOND, 0);
140 | cal.set(Calendar.MILLISECOND, 0);
141 | if (random.nextDouble() <= 0.7) {
142 | // [0-21] => 70%
143 | int millis = dayOfMillis / 8 * 7;
144 | cal.add(Calendar.MILLISECOND, 1 + random.nextInt(millis));
145 | } else {
146 | // [1-23] => 30%
147 | int millis = dayOfMillis / 24;
148 | cal.add(Calendar.MILLISECOND, millis + random.nextInt(millis * 23));
149 | }
150 | return cal.getTimeInMillis();
151 | }
152 |
153 | /**
154 | * Time type
155 | */
156 | public static enum DateTypeEnum {
157 | YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND
158 | }
159 |
160 | /**
161 | * Judge if time1 is before time2
162 | *
163 | * @param time1
164 | * @param time2
165 | * @return Judgement result
166 | */
167 | public static boolean before(String time1, String time2) {
168 | try {
169 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
170 | Date dateTime1 = sdf.parse(time1);
171 | Date dateTime2 = sdf.parse(time2);
172 |
173 | if (dateTime1.before(dateTime2)) {
174 | return true;
175 | }
176 | } catch (Exception e) {
177 | e.printStackTrace();
178 | }
179 | return false;
180 | }
181 |
182 | /**
183 | * Judge if time1 is after time2
184 | *
185 | * @param time1
186 | * @param time2
187 | * @return Judgement result
188 | */
189 | public static boolean after(String time1, String time2) {
190 | try {
191 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
192 | Date dateTime1 = sdf.parse(time1);
193 | Date dateTime2 = sdf.parse(time2);
194 |
195 | if (dateTime1.after(dateTime2)) {
196 | return true;
197 | }
198 | } catch (Exception e) {
199 | e.printStackTrace();
200 | }
201 | return false;
202 | }
203 |
204 | /**
205 | * Calculate time difference(Unit: second)
206 | *
207 | * @param time1
208 | * @param time2
209 | * @return difference
210 | */
211 | public static int minus(String time1, String time2) {
212 | try {
213 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
214 | Date datetime1 = sdf.parse(time1);
215 | Date datetime2 = sdf.parse(time2);
216 |
217 | long millisecond = datetime1.getTime() - datetime2.getTime();
218 |
219 | return Integer.valueOf(String.valueOf(millisecond / 1000));
220 | } catch (Exception e) {
221 | e.printStackTrace();
222 | }
223 | return 0;
224 | }
225 |
226 | /**
227 | *get year, month, day and hour
228 | *
229 | * @param datetime time(yyyy-MM-dd HH:mm:ss)
230 | * @return result(yyyy-MM-dd_HH)
231 | */
232 | public static String getDateHour(String datetime) {
233 | String date = datetime.split(" ")[0];
234 | String hourMinuteSecond = datetime.split(" ")[1];
235 | String hour = hourMinuteSecond.split(":")[0];
236 | return date + "_" + hour;
237 | }
238 |
239 | /**
240 | * get the date of yesterday(yyyy-MM-dd)
241 | *
242 | * @return the date of yesterday
243 | */
244 | public static String getYesterdayDate() {
245 | Calendar cal = Calendar.getInstance();
246 | cal.setTime(new Date());
247 | cal.add(Calendar.DAY_OF_YEAR, -1);
248 |
249 | Date date = cal.getTime();
250 |
251 | SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
252 | return sdf.format(date);
253 | }
254 |
255 | /**
256 | * format date,reserve minute
257 | * yyyyMMddHHmm
258 | *
259 | * @param date
260 | * @return
261 | */
262 | public static String formatTimeMinute(Date date) {
263 | SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm");
264 | return sdf.format(date);
265 | }
266 |
267 | public static String fileToString(File file) throws Exception{
268 | FileInputStream inStream = new FileInputStream(file);
269 | ByteArrayOutputStream outStream = new ByteArrayOutputStream();
270 | try {
271 |
272 | Boolean reading = true;
273 | while (reading) {
274 | int c = inStream.read();
275 | if(c == -1){
276 | reading = false;
277 | }else{
278 | outStream.write(c);
279 | }
280 | }
281 | outStream.flush();
282 | }catch (Exception e){
283 | System.err.println(e.getMessage());
284 | }finally {
285 | inStream.close();
286 | }
287 | return new String(outStream.toByteArray(), "UTF-8");
288 | }
289 |
290 | }
291 |
--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/StreamBenchConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.intel.streaming_benchmark.common;
19 |
20 | /**
21 | * All name of configurations used in StreamBench are defined here. Later I plan to refactor
22 | * property name. With this mapping layer, the underlying Java/Scala code don't need to be
23 | * changed.
24 | */
25 | public class StreamBenchConfig {
26 | // =====================================
27 | // General StreamBench Conf
28 | // =====================================
29 | public static String ZK_HOST = "streambench.zkHost";
30 |
31 | public static String CONSUMER_GROUP = "streambench.kafka.consumerGroup";
32 |
33 | public static String KAFKA_BROKER_LIST = "streambench.kafka.brokerList";
34 |
35 | public static String DATAGEN_TIME = "streambench.dataGen.time";
36 |
37 | public static String DATAGEN_THROUGHPUT = "streambench.dataGen.throughput";
38 |
39 |
40 |
41 | // =====================================
42 | // Data Generator Related Conf
43 | // =====================================
44 | // public static String DATAGEN_RECORDS_PRE_INTERVAL = "hibench.streambench.datagen.recordsPerInterval";
45 | //
46 | // public static String DATAGEN_INTERVAL_SPAN = "hibench.streambench.datagen.intervalSpan";
47 | //
48 | // public static String DATAGEN_TOTAL_RECORDS = "hibench.streambench.datagen.totalRecords";
49 | //
50 | // public static String DATAGEN_TOTAL_ROUNDS = "hibench.streambench.datagen.totalRounds";
51 | //
52 | // public static String DATAGEN_RECORD_LENGTH = "hibench.streambench.datagen.recordLength";
53 | //
54 | // public static String DATAGEN_PRODUCER_NUMBER = "hibench.streambench.datagen.producerNumber";
55 | // =====================================
56 | // Spark Streaming Related Conf
57 | // =====================================
58 | // public static String SPARK_BATCH_INTERVAL = "hibench.streambench.spark.batchInterval";
59 | //
60 | // public static String SPARK_CHECKPOINT_PATH = "hibench.streambench.spark.checkpointPath";
61 | //
62 | // public static String SPARK_ENABLE_WAL = "hibench.streambench.spark.enableWAL";
63 | //
64 | // public static String SPARK_USE_DIRECT_MODE = "hibench.streambench.spark.useDirectMode";
65 | //
66 | // public static String SPARK_STORAGE_LEVEL = "hibench.streambench.spark.storageLevel";
67 | //
68 | // public static String SPARK_RECEIVER_NUMBER = "hibench.streambench.spark.receiverNumber";
69 |
70 | // ======================================
71 | // Flink Related Conf
72 | // ======================================
73 |
74 |
75 | public static String FLINK_CHECKPOINTDURATION = "streambench.flink.checkpointDuration";
76 |
77 | public static String FLINK_RESULT_DIR = "streambench.flink.result.dir";
78 |
79 | public static String FLINK_TIMETYPE = "streambench.flink.timeType";
80 |
81 |
82 | public static String SQL_LOCATION= "streambench.flink.sqlLocation";
83 |
84 |
85 |
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/QueryConfig.scala:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.common
2 |
3 | object QueryConfig {
4 | val queryScene: Map[String, String] = Map(
5 | "q1.sql" -> "Shopping_record",
6 | "q2.sql" -> "Real_time_Advertising",
7 | "q3.sql" -> "Real_time_Advertising",
8 | "q4.sql" -> "Real_time_Advertising",
9 | "q5.sql" -> "User_visit_session_record",
10 | "q6.sql" -> "User_visit_session_record",
11 | "q7.sql" -> "User_visit_session_record",
12 | "q8.sql" -> "User_visit_session_record",
13 | "q9.sql" -> "Real_time_Advertising",
14 | "q10.sql" -> "User_visit_session_record",
15 | "q11.sql" -> "User_visit_session_record",
16 | "q12.sql" -> "User_visit_session_record"
17 | )
18 |
19 | val queryTables: Map[String, String] = Map(
20 | "q1.sql" -> "shopping",
21 | "q2.sql" -> "click",
22 | "q3.sql" -> "imp",
23 | "q4.sql" -> "dau,click",
24 | "q5.sql" -> "userVisit",
25 | "q6.sql" -> "userVisit",
26 | "q7.sql" -> "userVisit",
27 | "q8.sql" -> "userVisit",
28 | "q9.sql" -> "dau,click",
29 | "q10.sql" -> "userVisit",
30 | "q11.sql" -> "userVisit",
31 | "q12.sql" -> "userVisit"
32 | )
33 |
34 | def getScene(query: String): String ={
35 | if (queryScene.contains(query)) {
36 | queryScene(query)
37 | } else {
38 | throw new IllegalArgumentException(s"$query does not exist!")
39 | }
40 | }
41 |
42 | def getTables(query: String): String ={
43 | if (queryTables.contains(query)) {
44 | queryTables(query)
45 | } else {
46 | throw new IllegalArgumentException(s"$query does not exist!")
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/Schema.scala:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.common
2 |
3 | trait Schema {
4 |
5 | def getFieldNames: Array[String]
6 |
7 | def getFieldTypes: Array[String]
8 |
9 |
10 | }
11 |
12 |
--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/TableSchemaProvider.scala:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.common
2 |
3 | case class Column(
4 | name: String,
5 | index: Int,
6 | types: String
7 |
8 | )
9 |
10 | trait TableSchema extends Schema {
11 |
12 | val columns: Array[Column]
13 |
14 | def getFieldNames: Array[String] = columns.map(_.name)
15 |
16 | def getFieldTypes: Array[String] =
17 | columns.map(column => column.types)
18 |
19 | }
20 |
21 | object Shopping extends TableSchema {
22 |
23 | override val columns = Array[Column](
24 | Column("userId", 0, "String"),
25 | Column("commodity", 1, "String"),
26 | Column("times", 2, "LONG")
27 | )
28 | }
29 |
30 | object Click extends TableSchema {
31 |
32 | override val columns = Array[Column](
33 | Column("click_time", 0, "Long"),
34 | Column("strategy", 1, "String"),
35 | Column("site", 2, "String"),
36 | Column("pos_id", 3, "String"),
37 | Column("poi_id", 4, "String"),
38 | Column("device_id", 5, "String")
39 | )
40 | }
41 |
42 | object Imp extends TableSchema {
43 |
44 | override val columns = Array[Column](
45 | Column("imp_time", 0, "Long"),
46 | Column("strategy", 1, "String"),
47 | Column("site", 2, "String"),
48 | Column("pos_id", 3, "String"),
49 | Column("poi_id", 4, "String"),
50 | Column("cost", 5, "Double"),
51 | Column("device_id", 6, "String")
52 | )
53 | }
54 |
55 | object Dau extends TableSchema {
56 |
57 | override val columns = Array[Column](
58 | Column("dau_time", 0, "Long"),
59 | Column("device_id", 1, "String")
60 | )
61 | }
62 |
63 | object UserVisit extends TableSchema {
64 |
65 | override val columns = Array[Column](
66 | Column("date", 0, "String"),
67 | Column("userId", 1, "Long"),
68 | Column("sessionId", 2, "String"),
69 | Column("pageId", 3, "Long"),
70 | Column("actionTime", 4, "String"),
71 | Column("searchKeyword", 5, "String"),
72 | Column("clickCategoryId", 6, "String"),
73 | Column("clickProductId", 7, "String"),
74 | Column("orderCategoryIds", 8, "String"),
75 | Column("orderProductIds", 9, "String"),
76 | Column("payCategoryIds", 10, "String"),
77 | Column("payProductIds", 11, "String"),
78 | Column("cityId", 12, "String")
79 | )
80 | }
81 |
82 | object TableSchemaProvider {
83 | val schemaMap: Map[String, Schema] = Map(
84 | "shopping" -> Shopping,
85 | "click" -> Click,
86 | "imp" -> Imp,
87 | "dau" -> Dau,
88 | "userVisit" -> UserVisit
89 | )
90 |
91 | def getSchema(tableName: String): Schema = {
92 | if (schemaMap.contains(tableName)) {
93 | schemaMap(tableName)
94 | } else {
95 | throw new IllegalArgumentException(s"$tableName does not exist!")
96 | }
97 | }
98 |
99 | }
100 |
--------------------------------------------------------------------------------
/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
1 | streambench.zkHost 10.1.2.166:2181
2 | streambench.kafka.brokerList 10.1.2.143:9093,10.1.2.143:9094,10.1.2.143:9095,10.1.2.143:9096,10.1.2.143:9097,10.1.2.143:9098,10.1.2.143:9099,10.1.2.143:9100,10.1.2.143:9101,10.1.2.143:9102,10.1.2.159:9093,10.1.2.159:9094,10.1.2.159:9095,10.1.2.159:9096,10.1.2.159:9097,10.1.2.159:9098,10.1.2.159:9099,10.1.2.159:9100,10.1.2.159:9101,10.1.2.159:9102,10.1.2.166:9093,10.1.2.166:9094,10.1.2.166:9095,10.1.2.166:9096,10.1.2.166:9097,10.1.2.166:9098,10.1.2.166:9099,10.1.2.166:9100,10.1.2.166:9101,10.1.2.166:9102,10.1.2.164:9093,10.1.2.164:9094,10.1.2.164:9095,10.1.2.164:9096,10.1.2.164:9097,10.1.2.164:9098,10.1.2.164:9099,10.1.2.164:9100,10.1.2.164:9101,10.1.2.164:9102
3 | streambench.kafka.consumerGroup kafka_to_hdfs2
4 |
--------------------------------------------------------------------------------
/conf/dataGenHosts:
--------------------------------------------------------------------------------
1 | 10.1.2.143
2 | 10.1.2.159
3 | 10.1.2.164
4 | 10.1.2.166
--------------------------------------------------------------------------------
/conf/env:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BASE_HOME=$(cd `dirname $0`;pwd)
4 | BENCH_HOME=$(dirname $BASE_HOME)
5 |
6 | export DATAGEN_TIME=200
7 | export THREAD_PER_NODE=10
8 | export CONF=$BENCH_HOME/conf/benchmarkConf.yaml
9 | export FLINK_HOME=/opt/Beaver/flink
10 | export SPARK_HOME=/opt/Beaver/spark
11 |
--------------------------------------------------------------------------------
/conf/queriesToRun:
--------------------------------------------------------------------------------
1 | q9.sql
2 |
--------------------------------------------------------------------------------
/dataGen/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | streaming_benchmark
7 | com.intel.streaming_benchmark
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | dataGen
13 |
14 |
15 | com.alibaba
16 | fastjson
17 | 1.2.58
18 |
19 |
20 |
21 | org.apache.kafka
22 | kafka_2.11
23 | 0.10.2.1
24 |
25 |
26 |
27 |
28 | com.intel.streaming_benchmark
29 | common
30 | 1.0-SNAPSHOT
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | org.apache.maven.plugins
39 | maven-shade-plugin
40 | 2.4.3
41 |
42 |
43 |
44 |
45 | *:*
46 |
47 | META-INF/*.SF
48 | META-INF/*.DSA
49 | META-INF/*.RSA
50 |
51 |
52 |
53 |
54 |
55 | junit:junit
56 | org.slf4j:slf4j-simple
57 | org.slf4j:slf4j-log4j12
58 | com.101tec:zkclient
59 | com.github.sgroschupf:zkclient
60 | org.apache.httpcomponents:httpclient
61 |
62 |
63 |
64 |
65 |
66 | package
67 |
68 | shade
69 |
70 |
71 |
72 |
73 |
74 | net.alchim31.maven
75 | scala-maven-plugin
76 | 3.2.0
77 |
78 |
79 | compile-scala
80 | compile
81 |
82 | add-source
83 | compile
84 |
85 |
86 |
87 | test-compile-scala
88 | test-compile
89 |
90 | add-source
91 | testCompile
92 |
93 |
94 |
95 |
96 | 2.11.8
97 |
98 |
99 |
100 | org.codehaus.mojo
101 | build-helper-maven-plugin
102 | 1.4
103 |
104 |
105 | add-source
106 | generate-sources
107 |
108 | add-source
109 |
110 |
111 |
112 | ../common/src/main/scala
113 | ../common/src/main/java
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 | org.apache.maven.plugins
122 | maven-compiler-plugin
123 |
124 | 1.8
125 | 1.8
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | maven-clean-plugin
134 | 3.1.0
135 |
136 |
137 |
138 | maven-resources-plugin
139 | 3.0.2
140 |
141 |
142 | maven-compiler-plugin
143 | 3.8.0
144 |
145 |
146 | maven-surefire-plugin
147 | 2.22.1
148 |
149 |
150 | maven-jar-plugin
151 | 3.0.2
152 |
153 |
154 | maven-install-plugin
155 | 2.5.2
156 |
157 |
158 | maven-deploy-plugin
159 | 2.8.2
160 |
161 |
162 |
163 | maven-site-plugin
164 | 3.7.1
165 |
166 |
167 | net.alchim31.maven
168 | scala-maven-plugin
169 | 3.2.2
170 |
171 |
172 | scala-compile-first
173 | process-resources
174 |
175 | add-source
176 | compile
177 |
178 |
179 |
180 | scala-test-compile
181 | process-test-resources
182 |
183 | testCompile
184 |
185 |
186 |
187 |
188 |
189 | org.apache.maven.plugins
190 | maven-compiler-plugin
191 | 3.2
192 |
193 | 1.8
194 | 1.8
195 | UTF-8
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/Datagen.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark;
2 |
3 | import com.intel.streaming_benchmark.common.ConfigLoader;
4 | import com.intel.streaming_benchmark.common.QueryConfig;
5 | import com.intel.streaming_benchmark.utils.GetProducer;
6 |
7 | import java.util.concurrent.ExecutorService;
8 | import java.util.concurrent.Executors;
9 |
10 | public class Datagen {
11 | public static void main(String[] args) {
12 |
13 | System.out.println("------------------Already input args[]------------------");
14 | //the time to generate data
15 | Long time = Long.valueOf(args[0]);
16 | System.out.println("------------------time: " + time + "s-------------------");
17 | //the topic of Kafka
18 | String sqlName = args[2];
19 | System.out.println("------------------sql: " + sqlName + "------------------");
20 | String scene = QueryConfig.getScene(sqlName);
21 |
22 | ConfigLoader configLoader = new ConfigLoader(args[3]);
23 | System.out.println("------------------config: " + args[3] + "---------------");
24 | //the number of thread for datagen
25 | int producerNumber = Integer.valueOf(args[1]);
26 | System.out.println("----------Thread_per_node:" + producerNumber + "--------");
27 | ExecutorService pool = Executors.newFixedThreadPool(producerNumber);
28 | for(int i = 0; i < producerNumber; i++){
29 | pool.execute(new GetProducer(scene, time, configLoader));
30 | }
31 | System.out.println("============ StreamBench Data Generator ============");
32 | pool.shutdown();
33 | System.out.println("======== StreamBench Data Generator Finished ========");
34 |
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/ConfigLoader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.intel.streaming_benchmark.utils;
19 |
20 | import java.io.BufferedReader;
21 | import java.io.FileNotFoundException;
22 | import java.io.FileReader;
23 | import java.io.IOException;
24 | import java.util.HashMap;
25 | import java.util.Map;
26 |
27 | public class ConfigLoader {
28 | private String ConfigFileName = null;
29 | private Map store;
30 |
31 | public ConfigLoader(String filename){
32 | ConfigFileName = filename;
33 | store = new HashMap();
34 | // Load and parse config
35 | try {
36 | BufferedReader br = new BufferedReader(new FileReader(filename));
37 | String line = br.readLine();
38 | while(line != null){
39 | if ((line.length()>0) && (line.charAt(0)!='#')) {
40 | String[] words = line.split("\\s+");
41 | if (words.length == 2) {
42 | String key = words[0];
43 | String value = words[1];
44 | store.put(key, value);
45 | } else if (words.length == 1) {
46 | String key = words[0];
47 | store.put(key, "");
48 | } else {
49 | if (!line.startsWith("hibench"))
50 | System.out.println("Warning: unknown config parsed, skip:" + line);
51 | }
52 | }
53 | line = br.readLine();
54 | }
55 | } catch (FileNotFoundException e) {
56 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
57 | } catch (IOException e) {
58 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
59 | e.printStackTrace();
60 | }
61 | }
62 |
63 | public String getProperty(String key){
64 | if (store.containsKey(key))
65 | return (String) store.get(key);
66 | else {
67 | System.out.println("ERROR: Unknown config key:" + key);
68 | return null;
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/Constants.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.utils;
2 |
3 | /**
4 | * Project Basic dataUtil.Constants
5 | */
6 | public interface Constants {
7 |
8 | String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR = "|";
9 | String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR_ESCAOE = "\\|";
10 |
11 | /**
12 | * Project Configuration dataUtil.Constants
13 | */
14 | String JDBC_DRIVER = "jdbc.driver";
15 | String JDBC_DATASOURCE_SIZE = "jdbc.datasource.size";
16 | String JDBC_URL = "jdbc.url";
17 | String JDBC_USER = "jdbc.user";
18 | String JDBC_PASSWORD = "jdbc.password";
19 |
20 | String SPARK_SQL_JDBC_URL = "spark.sql.jdbc.url";
21 | String SPARK_SQL_JDBC_URL_PROD = "spark.sql.jdbc.url.prod";
22 |
23 | String SPARK_LOCAL = "spark.local";
24 |
25 | String KAFKA_METADATA_BROKER_LIST = "metadata.broker.list";
26 | String KAFKA_TOPICS = "kafka.topics";
27 | String KAFKA_ZOOKEEPER_URL = "zookeeper.connect.url";
28 |
29 |
30 | /**
31 | * Spark Application dataUtil.Constants
32 | */
33 | String SPARK_APP_NAME_SESSION = "UserVisitSessionAnalyzeSpark_";
34 | String SPARK_APP_NAME_PRODUCT = "AreaTop3ProductSpark_";
35 | String SPARK_APP_NAME_AD = "AdClickRealTimeStateSpark";
36 |
37 | String FIELD_ACTION_TIME = "action_time";
38 | String FIELD_SESSION_ID = "session_id";
39 | String FIELD_SEARCH_KEYWORDS = "search_keyword";
40 | String FIELD_CLICK_CATEGORY_ID = "click_category_id";
41 | String FIELD_AGE = "age";
42 | String FIELD_PROFESSIONAL = "professional";
43 | String FIELD_CITY = "city";
44 | String FIELD_SEX = "sex";
45 |
46 |
47 | String FIELD_CATEGORY_ID = "categoryId";
48 | String FIELD_CLICK_COUNT = "clickCount";
49 | String FIELD_ORDER_COUNT = "orderCount";
50 | String FIELD_PAY_COUNT = "payCount";
51 |
52 | String SESSION_COUNT = "session_count";
53 |
54 | String TIME_PERIOD_1s_4s = "1s_4s";
55 | String TIME_PERIOD_4s_7s = "4s_7s";
56 | String TIME_PERIOD_7s_10s = "7s_10s";
57 | String TIME_PERIOD_10s_30s = "10s_30s";
58 | String TIME_PERIOD_30s_60s = "30s_60s";
59 | String TIME_PERIOD_1m_3m = "1m_3m";
60 | String TIME_PERIOD_3m_10m = "3m_10m";
61 | String TIME_PERIOD_10m_30m = "10m_30m";
62 | String TIME_PERIOD_30m = "30m";
63 |
64 | String STEP_PERIOD_1_3 = "1_3";
65 | String STEP_PERIOD_4_6 = "4_6";
66 | String STEP_PERIOD_7_9 = "7_9";
67 | String STEP_PERIOD_10_29 = "10_29";
68 | String STEP_PERIOD_30_59 = "30_59";
69 | String STEP_PERIOD_60 = "60";
70 |
71 | /**
72 | * Source Table Column Names
73 | */
74 | String UVA_FIELD_USER_ID = "user_id";
75 | String UVA_FIELD_DATE = "date";
76 | String UVA_FIELD_SESSION_ID = "session_id";
77 | String UVA_FIELD_ACTION_TIME = "action_time";
78 |
79 | /**
80 | * Task dataUtil.Constants
81 | */
82 | String PARAM_SAMPLE_TYPE = "sampleType";
83 | String PARAM_SESSION_RATIO = "sessionRatio";
84 | String PARAM_START_DATE = "startDate";
85 | String PARAM_END_DATE = "endDate";
86 | String PARAM_START_AGE = "startAge";
87 | String PARAM_END_AGE = "endAge";
88 | String PARAM_PROFESSIONALS = "professionals";
89 | String PARAM_CITIES = "cities";
90 | String PARAM_SEX = "sex";
91 | String PARAM_KEYWORDS = "keywords";
92 | String PARAM_CATEGORY_IDS = "categoryIds";
93 | String FIELD_VISIT_LENGTH = "visitLength";
94 | String FIELD_STEP_LENGTH = "stepLength";
95 | String FIELD_START_TIME = "startTime";
96 | }
97 |
--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/GetProducer.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.utils;
2 |
3 | import com.alibaba.fastjson.JSONObject;
4 | import com.intel.streaming_benchmark.ClickProducer;
5 | import com.intel.streaming_benchmark.common.ConfigLoader;
6 | import com.intel.streaming_benchmark.common.StreamBenchConfig;
7 | import org.apache.kafka.clients.producer.KafkaProducer;
8 | import org.apache.kafka.clients.producer.ProducerConfig;
9 | import org.apache.kafka.clients.producer.ProducerRecord;
10 | import java.net.InetAddress;
11 | import java.text.SimpleDateFormat;
12 | import java.util.Properties;
13 | import java.util.Random;
14 |
15 | public class GetProducer extends Thread{
16 |
17 | private String topic;
18 | private Long time;
19 | private ConfigLoader cl;
20 | public GetProducer(String topic, Long time , ConfigLoader cl){
21 |
22 | super();
23 | this.topic = topic;
24 | this.time = time;
25 | this.cl = cl;
26 | }
27 |
28 | @Override
29 | public void run() {
30 |
31 | System.out.println(Thread.currentThread().getName() + "=======");
32 |
33 | if (topic.equals("Shopping_record")){
34 | datagenTopic1(cl);
35 | }
36 | else if(topic.equals("Real_time_Advertising")){
37 | datagenTopic2(cl);
38 | }
39 | else if(topic.equals("User_visit_session_record")){
40 | new ClickProducer(time, cl).run();
41 | }else{
42 | System.out.println("No such scene!");
43 | }
44 |
45 | }
46 |
47 | private KafkaProducer createProducer(ConfigLoader cl) {
48 |
49 | Properties properties = new Properties();
50 | properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
51 | properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
52 | properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST));
53 | return new KafkaProducer<>(properties);
54 | }
55 |
56 | private void datagenTopic1(ConfigLoader cl) {
57 |
58 | String[] commodities = {"milk", "bag", "book","desk","sweet", "food", "disk","pen", "shoe", "animal","phone", "paper", "cup", "light", "glass", "power", "GameBoy", "chopsticks"};
59 | Random random = new Random();
60 | KafkaProducer producer = createProducer(cl);
61 | SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
62 | long start = System.currentTimeMillis();
63 | Boolean flag = true;
64 | Long count = 0L;
65 | Long totalLength = 0L;
66 | String threadName = Thread.currentThread().getName();
67 |
68 | try {
69 |
70 | InetAddress address = InetAddress.getLocalHost();
71 | String hostName = address.getHostName().toString();
72 | while(flag){
73 | byte[] message = (hostName + "_" + count + "_" + threadName + "," + commodities[random.nextInt(commodities.length)] +"," + System.currentTimeMillis()).getBytes();
74 | producer.send(new ProducerRecord("shopping", message));
75 | count = count + 1;
76 | totalLength = totalLength + message.length;
77 | if((System.currentTimeMillis() - start) > time*1000){
78 | flag = false;
79 | }
80 | }
81 | }catch (Exception e){
82 | e.printStackTrace();
83 | }
84 |
85 | producer.close();
86 | }
87 |
88 | private void datagenTopic2(ConfigLoader cl){
89 | Long count = 0L;
90 | Long totalLength = 0L;
91 |
92 | KafkaProducer producer = createProducer(cl);
93 | long start = System.currentTimeMillis();
94 | Boolean flag = true;
95 |
96 | Random random = new Random();
97 | String strategy_all[] ={"t1","t2","t3","t4","t5","t6"};//t1:strategy1, t2:strategy2,,, t6:strategy6
98 | String site_all[] ={"1","2","3"};//1:baidu media,2:toutiao media,3: weibo media
99 | String pos_id_all[] ={"a","b","c"};//a:ad space,b:ad space,c:ad space
100 | String poi_id_all[] ={"1001","1002","1003"};//1001:ad material,1002:ad material,1003:ad material
101 | String cost_all[] ={"0.01","0.02","0.03"};//cost
102 | String device_id_all[] ={"aaaaa","bbbbb","ccccc","ddddd","eeeee","fffff","ggggg"};//device
103 | while(flag){
104 |
105 | try{
106 | JSONObject imp = new JSONObject();
107 | imp.put("imp_time",Long.valueOf(System.currentTimeMillis()));
108 | imp.put("strategy",strategy_all[random.nextInt(strategy_all.length-1)]);
109 | imp.put("site",pos_id_all[random.nextInt(site_all.length-1)]);
110 | imp.put("pos_id",strategy_all[random.nextInt(pos_id_all.length-1)]);
111 | imp.put("poi_id",poi_id_all[random.nextInt(poi_id_all.length-1)]);
112 | imp.put("cost",cost_all[random.nextInt(cost_all.length-1)]);
113 | imp.put("device_id",device_id_all[random.nextInt(device_id_all.length-1)]);
114 | //send exposure log
115 | byte[] imp_message = imp.toJSONString().getBytes();
116 | producer.send(new ProducerRecord("imp",imp_message));
117 | count++;
118 | totalLength = totalLength + imp_message.length;
119 |
120 | if (random.nextInt(4) ==1){//the probablity of triggerring Click
121 | JSONObject click =imp;
122 | click.remove("imp_time");
123 | click.remove("cost");
124 | click.put("click_time",Long.valueOf(System.currentTimeMillis()));
125 | byte[] click_message = click.toJSONString().getBytes();
126 | producer.send(new ProducerRecord("click",click_message));
127 | count++;
128 | totalLength = totalLength + click_message.length;
129 |
130 | if (random.nextInt(2) ==1){//dau time,?50
131 | JSONObject dau = new JSONObject();
132 | dau.put("dau_time",Long.valueOf(System.currentTimeMillis()));
133 | dau.put("device_id",click.get("device_id").toString());
134 | byte[] dau_message = dau.toJSONString().getBytes();
135 | producer.send(new ProducerRecord("dau",dau_message));
136 | count++;
137 | totalLength = totalLength + dau_message.length;
138 | }
139 | }
140 | if((System.currentTimeMillis() - start) > time*1000){
141 | flag = false;
142 | }
143 |
144 | }catch (Exception e){
145 | e.printStackTrace();
146 | }
147 | }
148 | }
149 |
150 |
151 | }
152 |
--------------------------------------------------------------------------------
/dataGen/src/main/scala/com/intel/streaming_benchmark/ClickProducer.scala:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark
2 |
3 | import java.net.InetAddress
4 | import java.util.Properties
5 | import com.alibaba.fastjson.JSONObject
6 | import com.intel.streaming_benchmark.click.{cityTypeSize, citys, keywordSize, keywords, productNumbers, professionalTypeSize, professionals, random, sexTypeSize, sexs, userNumbers}
7 | import com.intel.streaming_benchmark.common.{ConfigLoader, DateUtils, StreamBenchConfig}
8 | import com.intel.streaming_benchmark.utils.Constants
9 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
10 |
11 | import scala.collection.mutable.ArrayBuffer
12 |
13 | class ClickProducer(val time:Long, val cl: ConfigLoader){
14 | var total = 0L
15 | var length = 0L
16 | var threadName = Thread.currentThread().getName
17 | var hostName = InetAddress.getLocalHost.getHostName
18 | var seed = 0
19 | def run(): Unit = {
20 | // mockUserInfo()
21 | // mockProductInfo
22 | mockUserVisitAction(time)
23 |
24 | }
25 |
26 | private def createProducer = {
27 | val properties = new Properties
28 | properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
29 | properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
30 | properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST))
31 | new KafkaProducer[Array[Byte], Array[Byte]](properties)
32 | }
33 |
34 |
35 | /**
36 | * Simulation code for generating user information
37 | *
38 | * @param
39 | * @return
40 | */
41 | def mockUserInfo() = {
42 | val producer = createProducer
43 | for (i <- 0 until userNumbers) {
44 | val userId = i.toLong
45 | val age = (total % 60).toInt
46 | val userName = s"user_${i}"
47 | val name = s"name_${i}"
48 | val professional = professionals((total % professionalTypeSize).toInt)
49 | val city = citys((total%cityTypeSize).toInt)._2
50 | val sex = sexs((total % sexTypeSize).toInt)
51 | producer.send(new ProducerRecord("userInfo", UserInfo(
52 | userId, userName, name,
53 | age, professional, city, sex).formatted(",").getBytes()))
54 | }
55 | }
56 |
57 | /**
58 | * Simulation code for generating data of userVisitAction
59 | *
60 | * @param
61 | * @return
62 | */
63 |
64 | def mockUserVisitAction(time: Long) = {
65 | val date: String = DateUtils.getTodayDate()
66 | val producer = createProducer
67 | val start: Long = System.currentTimeMillis()
68 |
69 | // get action time according the time of last action
70 | def getCurrentActionTime(preActionTime: Long): Long = {
71 | preActionTime + total % 60
72 | }
73 |
74 | // generate a produceID and productCategoryNumber
75 | def generateProduceAndCategoryId(): (Long, Long) = {
76 | val produceID = total % productNumbers
77 | (produceID, produceID % click.productCategoryNumbers)
78 | }
79 |
80 | // generate date for pageView
81 | def generatePageView(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
82 | if (times < 20) {
83 | // pageView ID:[0,100)
84 | val pageId: Long = total % 100
85 | val actionTime: Long = getCurrentActionTime(preActionTime)
86 | val searchKeyword: String = ""
87 | val clickCategoryId: String = ""
88 | val clickProductId: String = ""
89 | val orderCategoryIds: String = ""
90 | val orderProductIds: String = ""
91 | val payCategoryIds: String = ""
92 | val payProductIds: String = ""
93 |
94 | // Add data
95 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
96 | producer.send(new ProducerRecord("userVisit", message))
97 | length = length + message.length
98 | total = total + 1
99 | // Go to next action
100 | val (t1, t2, t3) =
101 | if (times < 3) {
102 | (4, 7, 9)
103 | } else if (times < 10) {
104 | (2, 4, 7)
105 | } else {
106 | (1, 2, 3)
107 | }
108 | val tmp = seed % 10
109 | seed = seed + 1
110 | if (tmp <= t1) {
111 | // Visit
112 | generatePageView(times + 1, userId, sessionId, cityId, actionTime)
113 | } else if (tmp <= t2) {
114 | // Search
115 | generateSearch(times + 1, userId, sessionId, cityId, actionTime)
116 | } else if (tmp <= t3) {
117 | // Click
118 | generateClick(times + 1, userId, sessionId, cityId, actionTime)
119 | } else {
120 | // nothings, finish
121 | }
122 |
123 | }
124 | }
125 |
126 | // Generate data for searching
127 | def generateSearch(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
128 | if (times < 20) {
129 | // search ID:[100,150)
130 | val pageId: Long = total % 50 + 100
131 | val actionTime = getCurrentActionTime(preActionTime)
132 | val searchKeyword: String = keywords((total % keywordSize).toInt)
133 | val clickCategoryId: String = ""
134 | val clickProductId: String = ""
135 | val orderCategoryIds: String = ""
136 | val orderProductIds: String = ""
137 | val payCategoryIds: String = ""
138 | val payProductIds: String = ""
139 |
140 | // Add data
141 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
142 | producer.send(new ProducerRecord("userVisit",message))
143 | length = length + message.length
144 | total = total + 1
145 | // Go to next action
146 | val (t1, t2, t3) =
147 | if (times < 3) {
148 | (2, 5, 8)
149 | } else if (times < 10) {
150 | (1, 2, 5)
151 | } else {
152 | (1, 2, 3)
153 | }
154 | val tmp = seed % 10
155 | seed = seed + 1
156 | if (tmp <= t1) {
157 | // Visit
158 | generatePageView(times + 1, userId, sessionId, cityId, actionTime)
159 | } else if (tmp <= t2) {
160 | // Search
161 | generateSearch(times + 1, userId, sessionId, cityId, actionTime)
162 | } else if (tmp <= t3) {
163 | // Click
164 | generateClick(times + 1, userId, sessionId, cityId, actionTime)
165 | } else {
166 | // nothings, finish
167 | }
168 | }
169 | }
170 |
171 | // Generate data for clicking
172 | def generateClick(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
173 | if (times < 20) {
174 | // click ID:[150,300)
175 | val pageId: Long = total % 150 + 150
176 | val actionTime = getCurrentActionTime(preActionTime)
177 | val searchKeyword: String = ""
178 | val (productID, categoryID) = generateProduceAndCategoryId()
179 | val clickProductId: String = productID.toString
180 | val clickCategoryId: String = categoryID.toString
181 | val orderCategoryIds: String = ""
182 | val orderProductIds: String = ""
183 | val payCategoryIds: String = ""
184 | val payProductIds: String = ""
185 |
186 | // Add data
187 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
188 | producer.send(new ProducerRecord("userVisit", message))
189 | // Go to next action
190 | total = total + 1
191 | length = length + message.length
192 |
193 | val (t1, t2, t3, t4) =
194 | if (times < 3) {
195 | (3, 6, 15, 18)
196 | } else if (times < 10) {
197 | (2, 4, 11, 15)
198 | } else {
199 | (1, 2, 6, 8)
200 | }
201 |
202 | val tmp = seed % 20
203 | seed = seed + 1
204 | if (tmp <= t1) {
205 | // Visit
206 | generatePageView(times + 1, userId, sessionId, cityId, actionTime)
207 | } else if (tmp <= t2) {
208 | // Search
209 | generateSearch(times + 1, userId, sessionId, cityId, actionTime)
210 | } else if (tmp <= t3) {
211 | // Order
212 | generateOrder(times + 1, userId, sessionId, cityId, actionTime)
213 | } else if (tmp <= t4) {
214 | // Click
215 | generateClick(times + 1, userId, sessionId, cityId, actionTime)
216 | } else {
217 | // nothings, finish
218 | }
219 |
220 | }
221 | }
222 |
223 | // Generate date for order
224 | def generateOrder(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
225 | if (times < 20) {
226 | // order ID:[300,301)
227 | val pageId: Long = 300
228 | val actionTime = getCurrentActionTime(preActionTime)
229 | val searchKeyword: String = ""
230 | val clickProductId: String = ""
231 | val clickCategoryId: String = ""
232 | // There may be some product ordered together, range:[1,6)
233 | val randomProductNumbers = total % 5 + 1
234 | val bf = ArrayBuffer[(Long, Long)]()
235 | for (j <- 0 until randomProductNumbers.toInt) {
236 | bf += generateProduceAndCategoryId()
237 | }
238 | val nbf = bf.distinct
239 |
240 | val orderCategoryIds: String = nbf.map(_._2).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR)
241 | val orderProductIds: String = nbf.map(_._1).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR)
242 | val payCategoryIds: String = ""
243 | val payProductIds: String = ""
244 |
245 | // Add data
246 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
247 | producer.send(new ProducerRecord("userVisit", message))
248 | total = total + 1
249 | length = length + message.length
250 | // Go to next action
251 | val (t1, t2, t3) =
252 | if (times <= 3) {
253 | (1, 2, 9)
254 | } else if (times < 10) {
255 | (1, 2, 8)
256 | } else {
257 | (1, 2, 7)
258 | }
259 |
260 | val tmp = seed % 10
261 | seed = seed + 1
262 |
263 | if (tmp <= t1) {
264 | // Visit
265 | generatePageView(times + 1, userId, sessionId, cityId, actionTime)
266 | } else if (tmp <= t2) {
267 | // Search
268 | generateSearch(times + 1, userId, sessionId, cityId, actionTime)
269 | } else if (tmp <= t3) {
270 | // Pay
271 | generatePay(times + 1, userId, sessionId, cityId, actionTime, productIds = orderProductIds, categoryIds = orderCategoryIds)
272 | } else {
273 | // nothings, finish
274 | }
275 |
276 | }
277 | }
278 |
279 | // Generate data for pay
280 | def generatePay(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long, productIds: String, categoryIds: String): Unit = {
281 | if (times <= 20) {
282 | // pay ID:301
283 | val pageId: Long = 301
284 | val actionTime = getCurrentActionTime(preActionTime)
285 | val searchKeyword: String = ""
286 | val clickProductId: String = ""
287 | val clickCategoryId: String = ""
288 | val orderCategoryIds: String = ""
289 | val orderProductIds: String = ""
290 | val payCategoryIds: String = categoryIds
291 | val payProductIds: String = productIds
292 |
293 | // Add data
294 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
295 | producer.send(new ProducerRecord("userVisit", message))
296 |
297 | total = total + 1
298 | length = length + message.length
299 | // Go to next action
300 | val (t1, t2) =
301 | if (times < 10) {
302 | (4, 8)
303 | } else {
304 | (1, 3)
305 | }
306 |
307 | val tmp = seed % 10
308 | seed = seed + 1
309 |
310 | if (tmp <= t1) {
311 | // Visit
312 | generatePageView(times + 1, userId, sessionId, cityId, actionTime)
313 | } else if (tmp <= t2) {
314 | // Search
315 | generateSearch(times + 1, userId, sessionId, cityId, actionTime)
316 | } else {
317 | // nothings, finish
318 | }
319 |
320 | }
321 | }
322 |
323 | var flag: Boolean = true
324 | while (flag) {
325 | val startTime = System.currentTimeMillis()
326 | val userId: Long = random.nextInt(userNumbers)
327 | val sessionId = hostName + "_" + threadName + "_"+ total
328 | val cityId = citys((total % cityTypeSize).toInt)._1
329 | seed = random.nextInt(100)
330 | // action主要分为:浏览、搜索、点击、下单及支付
331 | /**
332 | * Suppose the access chain has several situations:
333 | * 1. Visit -> Search-> Click -> Order -> Pay
334 | * 2. Search -> Click -> Order -> Pay
335 | * 3. Visit -> Click -> Order -> Pay
336 | * Note:Visit, Search, Click can be generated continuously while Pay and Order can not appear successfully
337 | * ======>
338 | * After visiting, there may be search, click and visit action.
339 | * After searching, there may be click, search and search action.
340 | * After clicking, there may be visit, search, order and click action.
341 | * After ordering, there may be search, visit and pay action.
342 | * After paying, there may be search and visit action支付之后可能存在搜索和浏览两种情况
343 | * Note:After all action, there may be finish action.
344 | **/
345 |
346 | // 80% visit, 20% click
347 | if (total % 5 < 4) {
348 | // generate data for visit
349 | generatePageView(0, userId, sessionId, cityId, startTime)
350 | } else {
351 | // generate data for search
352 | generateSearch(0, userId, sessionId, cityId, startTime)
353 | }
354 |
355 | if ( (System.currentTimeMillis() - start) > time*1000) {
356 | flag = false
357 | }
358 |
359 | }
360 | }
361 |
362 | /**
363 | * Simulation code for generating product
364 | *
365 | * @param
366 | * @return
367 | */
368 | def mockProductInfo() = {
369 | val producer = createProducer
370 | val buffer = ArrayBuffer[ProductInfo]()
371 | for (i <- 0 until productNumbers) {
372 | val productID: Long = i.toLong
373 | val productName: String = s"product_${productID}"
374 | // 60% third party products; 40% proprietary products
375 | val extendInfo: String = {
376 | val obj = new JSONObject()
377 | if (random.nextDouble() <= 0.4) {
378 | // proprietary product
379 | obj.put("product_type", "0")
380 | } else {
381 | // third party products
382 | obj.put("product_type", "1")
383 | }
384 | obj.toJSONString
385 | }
386 | producer.send(new ProducerRecord("productInfo", ProductInfo(productID, productName, extendInfo).formatted(",").getBytes()))
387 |
388 | }
389 | }
390 |
391 | }
392 |
--------------------------------------------------------------------------------
/dataGen/src/main/scala/com/intel/streaming_benchmark/click.scala:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark
2 |
3 | import java.util.Random
4 |
5 |
6 | object click {
7 |
8 | val random = new Random
9 | val splitSymbol = ","
10 |
11 | val userNumbers = 1000
12 |
13 | val userVisitSessionNumbers = 10000
14 |
15 | val productNumbers = 10000
16 |
17 | val productCategoryNumbers = 50
18 |
19 | val professionals = Array("Programmer", "Teacher", "Cook", "Driver", "Doctor", "Nurse", "Designer", "Farmer", "Worker", "Assistant")
20 | val professionalTypeSize = professionals.length
21 |
22 | val citys: Array[(Int, String)] = Array("Shanghai", "Beijing", "Shenzhen", "Guangzhou", "Nanjing", "Hangzhou", "Changsha", "Nanchang", "Zhangjiajie", "Hong Kong", "Macao").zipWithIndex.map(_.swap)
23 | val cityTypeSize = citys.length
24 |
25 | val sexs = Array("male", "female", "unknown")
26 | val sexTypeSize = sexs.length
27 | // search key word
28 | val keywords = Array("Hot Pot", "Cake", "Chongqing spicy chicken", "Chongqing facet",
29 | "Biscuits", "Fish", "International Trade Building or Cetra Building", "Pacific Mall", "Japanese cuisine", "Hot Spring")
30 | val keywordSize = keywords.length
31 |
32 | var count = 0
33 | }
34 |
35 |
36 | case class ProductInfo(
37 | productID: Long,
38 | productName: String,
39 | extendInfo: String
40 | ) {
41 | /**
42 | * Format
43 | *
44 | * @param splitSymbol
45 | * @return
46 | */
47 | def formatted(splitSymbol: String = "^"): String = {
48 | s"${productID}${splitSymbol}${productName}${splitSymbol}${extendInfo}"
49 | }
50 | }
51 |
52 | object ProductInfo {
53 | /**
54 | * column name of the table
55 | */
56 | val columnNames = Array("product_id", "product_name", "extend_info")
57 |
58 | /**
59 | * Parse row data and return the object; if parsing fails return None
60 | *
61 | * @param line
62 | * @param splitSymbol
63 | * @return
64 | */
65 | def parseProductInfo(line: String, splitSymbol: String = "\\^"): Option[ProductInfo] = {
66 | val arr = line.split(splitSymbol)
67 | if (arr.length == 3) {
68 | Some(
69 | new ProductInfo(
70 | arr(0).toLong,
71 | arr(1),
72 | arr(2)
73 | )
74 | )
75 | } else None
76 | }
77 | }
78 |
79 |
80 |
81 | case class UserInfo(
82 | userId: Long,
83 | userName: String,
84 | name: String,
85 | age: Int,
86 | professional: String,
87 | city: String,
88 | sex: String
89 | ) {
90 | /**
91 | * Format time
92 | *
93 | * @param splitSymbol
94 | * @return
95 | */
96 | def formatted(splitSymbol: String = ","): String = {
97 | s"${userId}${splitSymbol}${userName}${splitSymbol}${name}${splitSymbol}${age}${splitSymbol}${professional}${splitSymbol}${city}${splitSymbol}${sex}"
98 | }
99 | }
100 |
101 | object UserInfo {
102 | /**
103 | * column name of the table
104 | */
105 | val columnNames = Array("user_id", "user_name", "name", "age", "professional", "city", "sex")
106 |
107 | /**
108 | * Parse row data and return the object; if parsing fails return None
109 | *
110 | * @param line
111 | * @param splitSymbol
112 | * @return
113 | */
114 | def parseUserInfo(line: String, splitSymbol: String = ","): Option[UserInfo] = {
115 | val arr = line.split(splitSymbol)
116 | if (arr.length == 7) {
117 | Some(new UserInfo(
118 | arr(0).toLong,
119 | arr(1),
120 | arr(2),
121 | arr(3).toInt,
122 | arr(4),
123 | arr(5),
124 | arr(6)
125 | ))
126 | } else None
127 | }
128 | }
129 |
130 |
131 | case class UserVisitAction(
132 | date: String,
133 | userId: Long,
134 | sessionId: String,
135 | pageId: Long,
136 | actionTime: Long,
137 | searchKeyword: String,
138 | clickCategoryId: String,
139 | clickProductId: String,
140 | orderCategoryIds: String,
141 | orderProductIds: String,
142 | payCategoryIds: String,
143 | payProductIds: String,
144 | cityId: Int
145 | ) {
146 | /**
147 | * Format time
148 | *
149 | * @param splitSymbol
150 | * @return
151 | */
152 | def formatted(splitSymbol: String = ","): String = {
153 | s"${date}${splitSymbol}${userId}${splitSymbol}${sessionId}${splitSymbol}${pageId}${splitSymbol}${actionTime}${splitSymbol}${searchKeyword}${splitSymbol}${clickCategoryId}${splitSymbol}${clickProductId}${splitSymbol}${orderCategoryIds}${splitSymbol}${orderProductIds}${splitSymbol}${payCategoryIds}${splitSymbol}${payProductIds}${splitSymbol}${cityId}"
154 | }
155 | }
156 |
157 | object UserVisitAction {
158 | /**
159 | * column name of the table
160 | */
161 | val columnNames = Array("date", "user_id", "session_id", "page_id", "action_time", "search_keyword", "click_category_id", "click_product_id", "order_category_ids", "order_product_ids", "pay_category_ids", "pay_product_ids", "city_id")
162 |
163 | /**
164 | * Parse row data and return the object; if parsing fails return None
165 | *
166 | * @param line
167 | * @param splitSymbol
168 | * @return
169 | */
170 | def parseUserVisitAction(line: String, splitSymbol: String = ","): Option[UserVisitAction] = {
171 | val arr = line.split(splitSymbol)
172 | if (arr.length == 13) {
173 | Some(
174 | new UserVisitAction(
175 | arr(0),
176 | arr(1).toLong,
177 | arr(2),
178 | arr(3).toLong,
179 | arr(4).toLong,
180 | arr(5),
181 | arr(6),
182 | arr(7),
183 | arr(8),
184 | arr(9),
185 | arr(10),
186 | arr(11),
187 | arr(12).toInt
188 | )
189 | )
190 | } else None
191 | }
192 | }
193 |
194 |
195 |
--------------------------------------------------------------------------------
/flink/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
1 | streambench.flink.checkpointDuration 5000
2 | streambench.flink.timeType EventTime
--------------------------------------------------------------------------------
/flink/log/q1.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q1.sql.log
--------------------------------------------------------------------------------
/flink/log/q10.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q10.sql.log
--------------------------------------------------------------------------------
/flink/log/q11.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q11.sql.log
--------------------------------------------------------------------------------
/flink/log/q12.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q12.sql.log
--------------------------------------------------------------------------------
/flink/log/q2.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q2.sql.log
--------------------------------------------------------------------------------
/flink/log/q3.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q3.sql.log
--------------------------------------------------------------------------------
/flink/log/q4.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q4.sql.log
--------------------------------------------------------------------------------
/flink/log/q5.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q5.sql.log
--------------------------------------------------------------------------------
/flink/log/q6.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q6.sql.log
--------------------------------------------------------------------------------
/flink/log/q7.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q7.sql.log
--------------------------------------------------------------------------------
/flink/log/q8.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q8.sql.log
--------------------------------------------------------------------------------
/flink/log/q9.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q9.sql.log
--------------------------------------------------------------------------------
/flink/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | streaming_benchmark
7 | com.intel.streaming_benchmark
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | flink
13 |
14 |
15 | com.intel.streaming_benchmark
16 | common
17 | 1.0-SNAPSHOT
18 |
19 |
20 | com.alibaba
21 | fastjson
22 | 1.2.58
23 |
24 |
25 |
26 | src/main/java
27 |
28 |
29 | src/main/resources
30 | true
31 |
32 |
33 | src/main/java
34 |
35 | ../*.java
36 |
37 |
38 |
39 |
40 |
41 |
42 | org.codehaus.mojo
43 | build-helper-maven-plugin
44 | 1.4
45 |
46 |
47 | add-source
48 | generate-sources
49 |
50 | add-source
51 |
52 |
53 |
54 | ../common/src/main/scala
55 | ../common/src/main/java
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | net.alchim31.maven
66 | scala-maven-plugin
67 | 3.2.2
68 |
69 |
70 | scala-compile-first
71 | process-resources
72 |
73 | add-source
74 | compile
75 |
76 |
77 |
78 | scala-test-compile
79 | process-test-resources
80 |
81 | testCompile
82 |
83 |
84 |
85 |
86 |
87 | org.apache.maven.plugins
88 | maven-compiler-plugin
89 | 3.2
90 |
91 | 1.8
92 | 1.8
93 | UTF-8
94 |
95 |
96 |
97 |
98 |
99 | org.apache.maven.plugins
100 | maven-compiler-plugin
101 | 3.8.0
102 |
103 | 1.8
104 | 1.8
105 |
106 |
107 |
108 | org.apache.maven.plugins
109 | maven-resources-plugin
110 | 3.0.2
111 |
112 | UTF-8
113 |
114 |
115 |
116 |
117 | org.apache.maven.plugins
118 | maven-shade-plugin
119 | 2.4.3
120 |
121 |
122 |
123 |
124 | *:*
125 |
126 | META-INF/*.SF
127 | META-INF/*.DSA
128 | META-INF/*.RSA
129 |
130 |
131 |
132 |
133 |
134 | junit:junit
135 | org.slf4j:slf4j-simple
136 | org.slf4j:slf4j-log4j12
137 | com.101tec:zkclient
138 | com.github.sgroschupf:zkclient
139 | org.apache.httpcomponents:httpclient
140 |
141 |
142 |
143 |
144 |
145 | package
146 |
147 | shade
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
--------------------------------------------------------------------------------
/flink/query/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 | commodity, count(userId) num, TUMBLE_START(rowtime, INTERVAL '10' SECOND),TUMBLE_END(rowtime, INTERVAL '10' SECOND), UNIX_TIMESTAMP(TUMBLE_START(rowtime, INTERVAL '10' SECOND)) - UNIX_TIMESTAMP(TO_TIMESTAMP(min(times)))
3 | from
4 | shopping
5 | group by
6 | TUMBLE(rowtime, INTERVAL '10' SECOND), commodity
--------------------------------------------------------------------------------
/flink/query/q10.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.dt, a.h, COUNT(sessionId) num
3 | FROM
4 | (SELECT
5 | sessionId, MAX(actionTime)-MIN(actionTime) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h
6 | FROM
7 | userVisit
8 | GROUP BY
9 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a
10 | WHERE
11 | a.len < 100
12 | GROUP BY
13 | a.dt, a.h
--------------------------------------------------------------------------------
/flink/query/q11.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.dt, a.h, SUM(a.len) total
3 | FROM
4 | (SELECT
5 | sessionId, MAX(actionTime)-MIN(actionTime) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h
6 | FROM
7 | userVisit
8 | GROUP BY
9 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a
10 | WHERE
11 | a.len < 1
12 | GROUP BY
13 | a.dt, a.h
--------------------------------------------------------------------------------
/flink/query/q12.sql:
--------------------------------------------------------------------------------
1 |
2 | SELECT
3 | *
4 | FROM
5 | (SELECT
6 | *, ROW_NUMBER() OVER (PARTITION BY w.cityId ORDER BY w.num DESC) as rownum
7 | FROM
8 | (SELECT
9 | TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num
10 | FROM
11 | userVisit
12 | WHERE
13 | payProductIds IS NOT NULL
14 | GROUP BY
15 | cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND)
16 | ) w
17 | ) v
18 | WHERE
19 | v.rownum <= 10
--------------------------------------------------------------------------------
/flink/query/q2.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), count(*) click_count
3 | FROM
4 | click
5 | GROUP BY
6 | strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND)
--------------------------------------------------------------------------------
/flink/query/q3.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), SUM(cost)
3 | FROM
4 | imp
5 | GROUP BY
6 | strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND)
--------------------------------------------------------------------------------
/flink/query/q4.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id)
3 | FROM
4 | click a
5 | JOIN
6 | dau b
7 | ON
8 | a.device_id = b.device_id AND a.rowtime BETWEEN b.rowtime - INTERVAL '1' second AND b.rowtime + INTERVAL '1' second
9 | GROUP BY
10 | b.device_id, a.strategy, a.site, a.pos_id, TUMBLE(a.rowtime, INTERVAL '10' SECOND)
11 |
--------------------------------------------------------------------------------
/flink/query/q5.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sessionId, MAX(actionTime)-MIN(actionTime) as len
3 | FROM
4 | userVisit
5 | GROUP BY
6 | sessionId, TUMBLE(rowtime, INTERVAL '10' SECOND)
7 |
8 |
--------------------------------------------------------------------------------
/flink/query/q6.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sessionId, (MAX(actionTime)-MIN(actionTime)) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num
3 | FROM
4 | userVisit
5 | GROUP BY
6 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), TUMBLE(rowtime, INTERVAL '10' SECOND)
7 | HAVING
8 | (MAX(actionTime)-MIN(actionTime)) < 100
--------------------------------------------------------------------------------
/flink/query/q7.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num
3 | FROM
4 | userVisit
5 | WHERE
6 | payProductIds IS NOT NULL
7 | GROUP BY
8 | cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND)
--------------------------------------------------------------------------------
/flink/query/q8.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | TUMBLE_START(rowtime, INTERVAL '10' SECOND) startTime, TUMBLE_END(rowtime, INTERVAL '10' SECOND) finish, cityId, count(clickCategoryId) as sequence
3 | FROM
4 | userVisit
5 | WHERE
6 | clickCategoryId IS NOT NULL
7 | GROUP BY
8 | cityId, TUMBLE(rowtime, INTERVAL '10' SECOND)
--------------------------------------------------------------------------------
/flink/query/q9.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1, count(*)
3 | FROM
4 | (SELECT device_id, strategy, site, pos_id FROM click) a
5 | JOIN
6 | (SELECT device_id, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'yyyyMMdd') as var1, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'HH') as var2 FROM dau) b
7 | ON
8 | a.device_id = b.device_id
9 | GROUP BY
10 | a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1
--------------------------------------------------------------------------------
/flink/result/result.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/result/result.log
--------------------------------------------------------------------------------
/flink/src/main/java/com/intel/streaming_benchmark/flink/Benchmark.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.flink;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import com.intel.streaming_benchmark.common.*;
5 | import com.intel.streaming_benchmark.utils.FlinkBenchConfig;
6 | import org.apache.flink.api.common.JobExecutionResult;
7 | import org.apache.flink.api.common.accumulators.IntCounter;
8 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
9 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
10 | import org.apache.flink.api.java.tuple.*;
11 | import org.apache.flink.configuration.Configuration;
12 | import org.apache.flink.streaming.api.TimeCharacteristic;
13 | import org.apache.flink.streaming.api.datastream.DataStream;
14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
15 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
18 | import org.apache.flink.table.api.EnvironmentSettings;
19 | import org.apache.flink.table.api.Table;
20 | import org.apache.flink.table.api.TableConfig;
21 | import org.apache.flink.table.api.java.StreamTableEnvironment;
22 | import org.apache.flink.types.Row;
23 | import org.apache.flink.util.Collector;
24 | import com.alibaba.fastjson.JSONObject;
25 | import javax.annotation.Nullable;
26 | import java.io.BufferedWriter;
27 | import java.io.File;
28 | import java.io.FileWriter;
29 | import java.text.SimpleDateFormat;
30 | import java.util.Properties;
31 |
32 | public class Benchmark {
33 | public static void main(String[] args) throws Exception {
34 | if (args.length < 2)
35 | BenchLogUtil.handleError("Usage: RunBench ");
36 | //root Config
37 | ConfigLoader cl = new ConfigLoader(args[0]);
38 | String benchmarkConfDir = new File(args[0]).getParent();
39 |
40 | //flink config
41 | String flinkConf = benchmarkConfDir + "/../flink/conf/benchmarkConf.yaml";
42 | cl.merge(flinkConf);
43 |
44 | // Prepare configuration
45 | FlinkBenchConfig conf = new FlinkBenchConfig();
46 | conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST);
47 | conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST);
48 | conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP);
49 | conf.checkpointDuration = Long.parseLong(cl.getProperty(StreamBenchConfig.FLINK_CHECKPOINTDURATION));
50 | conf.timeType = cl.getProperty(StreamBenchConfig.FLINK_TIMETYPE);
51 | conf.topic = QueryConfig.getTables(args[1]);
52 | conf.sqlLocation = benchmarkConfDir + "/../flink/query";
53 | conf.resultLocation = benchmarkConfDir + "/../flink/result";
54 | conf.sqlName = args[1];
55 | runQuery(conf);
56 | }
57 |
58 | public static void runQuery(FlinkBenchConfig config) throws Exception{
59 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
60 | env.enableCheckpointing(config.checkpointDuration);
61 | if(config.timeType.equals("EventTime")){
62 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
63 | }else{
64 | env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
65 | }
66 |
67 | TableConfig tc = new TableConfig();
68 | EnvironmentSettings builder = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
69 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,builder);
70 |
71 | Properties properties = new Properties();
72 | properties.setProperty("zookeeper.connect", config.zkHost);
73 | properties.setProperty("group.id", config.consumerGroup);
74 | properties.setProperty("bootstrap.servers", config.brokerList);
75 |
76 | String[] topics = config.topic.split(",");
77 |
78 | //generate table
79 | for(int i = 0; i < topics.length; i++){
80 | // source stream
81 | FlinkKafkaConsumer010 consumer = new FlinkKafkaConsumer010(topics[i], new SimpleStringSchema(),properties);
82 | consumer.setStartFromLatest();
83 | // consumer.setStartFromEarliest();
84 | //add stream source for flink
85 | DataStream stream = env.addSource(consumer);
86 | // stream parse need table schema
87 | String[] fieldNames = TableSchemaProvider.getSchema(topics[i]).getFieldNames();
88 | // TypeInformation returnType = TypeExtractor.createTypeInfo();
89 | DataStream streamParsed;
90 |
91 | if(config.timeType.equals("EventTime")){
92 | if(topics[i].equals("shopping")){
93 | streamParsed = stream.flatMap(new DeserializeShopping()).assignTimestampsAndWatermarks(new ShoppingWatermarks());
94 | }else if(topics[i].equals("click")){
95 | streamParsed = stream.flatMap(new DeserializeClick()).assignTimestampsAndWatermarks(new ClickWatermarks());
96 | }else if(topics[i].equals("imp")){
97 | streamParsed = stream.flatMap(new DeserializeImp()).assignTimestampsAndWatermarks(new ImpWatermarks());
98 | }else if(topics[i].equals("dau")){
99 | streamParsed = stream.flatMap(new DeserializeDau()).assignTimestampsAndWatermarks(new DauWatermarks());
100 | }else if(topics[i].equals("userVisit")){
101 | streamParsed = stream.flatMap(new DeserializeUserVisit()).assignTimestampsAndWatermarks(new UserVisitWatermarks());
102 | }else{
103 | System.out.println("No such topic, please check your benchmarkConf.yaml");
104 | return;
105 | }
106 |
107 | }else{
108 | if(topics[i].equals("shopping")){
109 | streamParsed = stream.flatMap(new DeserializeShopping());
110 | }else if(topics[i].equals("click")){
111 | streamParsed = stream.flatMap(new DeserializeClick());
112 | }else if(topics[i].equals("imp")){
113 | streamParsed = stream.flatMap(new DeserializeImp());
114 | }else if(topics[i].equals("dau")){
115 | streamParsed = stream.flatMap(new DeserializeDau());
116 | }else if(topics[i].equals("userVisit")){
117 | streamParsed = stream.flatMap(new DeserializeUserVisit());
118 | }else{
119 | System.out.println("No such topic, please check your benchmarkConf.yaml");
120 | return;
121 | }
122 | }
123 |
124 | tableEnv.registerTable(topics[i], tableEnv.fromDataStream(streamParsed, FieldString(fieldNames, config.timeType)));
125 | }
126 |
127 | //runQuery
128 | File file = new File(config.sqlLocation + "/" + config.sqlName);
129 | if (!file.exists()) {
130 | return;
131 | }
132 | try {
133 | String queryString = DateUtils.fileToString(file);
134 | Table table = tableEnv.sqlQuery(queryString);
135 | table.printSchema();
136 | DataStream> tuple2DataStream = tableEnv.toRetractStream(table, Row.class);
137 | tuple2DataStream.print();
138 | } catch (Exception e) {
139 | e.printStackTrace();
140 | }
141 |
142 | JobExecutionResult execute = env.execute(config.sqlName);
143 | JobExecutionResult jobExecutionResult = execute.getJobExecutionResult();
144 | long netRuntime = jobExecutionResult.getNetRuntime();
145 | System.out.println("----------------runtime---------------- :" + netRuntime);
146 | long count = 0;
147 | for(int i = 0; i < topics.length; i++){
148 | Integer tmp = (Integer)jobExecutionResult.getAccumulatorResult(topics[i]);
149 | count = count + tmp.longValue();
150 | }
151 | File resultFile = new File(config.resultLocation + "/result.log" );
152 | if (!resultFile.exists()) {
153 | resultFile.createNewFile();
154 | }
155 | FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log", true);
156 | BufferedWriter bufferWriter = new BufferedWriter(fileWriter);
157 | bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(System.currentTimeMillis()) + "; " + config.sqlName + " Runtime: " + netRuntime/1000 + " TPS:" + count/(netRuntime/1000) + "\r\n");
158 | bufferWriter.close();
159 |
160 | }
161 |
162 | private static String FieldString(String[] fieldNames, String timeType){
163 | String fileds = "";
164 | for(int i =0; i< fieldNames.length; i++){
165 | fileds = fileds + fieldNames[i] + ",";
166 | }
167 | if(timeType.equals("EventTime")){
168 | fileds = fileds + "rowtime.rowtime";
169 | }else{
170 | fileds = fileds + "rowtime.proctime";
171 | }
172 | return fileds;
173 | }
174 |
175 | public static class ShoppingWatermarks implements AssignerWithPeriodicWatermarks> {
176 | Long currentMaxTimestamp = 0L;
177 | final Long maxOutOfOrderness = 2000L;
178 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
179 |
180 | @Nullable
181 | @Override
182 | public Watermark getCurrentWatermark() {
183 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
184 | return watermark;
185 | }
186 |
187 | @Override
188 | public long extractTimestamp(Tuple3 element, long previousElementTimestamp) {
189 | Long timestamp = Long.valueOf(element.f2);
190 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
191 | return timestamp;
192 | }
193 | }
194 |
195 |
196 | public static class ClickWatermarks implements AssignerWithPeriodicWatermarks> {
197 | Long currentMaxTimestamp = 0L;
198 | final Long maxOutOfOrderness = 2000L;
199 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
200 |
201 | @Nullable
202 | @Override
203 | public Watermark getCurrentWatermark() {
204 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
205 | return watermark;
206 | }
207 |
208 | @Override
209 | public long extractTimestamp(Tuple6 element, long previousElementTimestamp) {
210 | Long timestamp = Long.valueOf(element.f0);
211 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
212 | return timestamp;
213 | }
214 | }
215 |
216 |
217 | public static class ImpWatermarks implements AssignerWithPeriodicWatermarks> {
218 | Long currentMaxTimestamp = 0L;
219 | final Long maxOutOfOrderness = 2000L;
220 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
221 |
222 | @Nullable
223 | @Override
224 | public Watermark getCurrentWatermark() {
225 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
226 | return watermark;
227 | }
228 |
229 | @Override
230 | public long extractTimestamp(Tuple7 element, long previousElementTimestamp) {
231 | Long timestamp = Long.valueOf(element.f0);
232 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
233 | return timestamp;
234 | }
235 | }
236 |
237 |
238 | public static class DauWatermarks implements AssignerWithPeriodicWatermarks> {
239 | Long currentMaxTimestamp = 0L;
240 | final Long maxOutOfOrderness = 2000L;
241 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
242 |
243 | @Nullable
244 | @Override
245 | public Watermark getCurrentWatermark() {
246 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
247 | return watermark;
248 | }
249 |
250 | @Override
251 | public long extractTimestamp(Tuple2 element, long previousElementTimestamp) {
252 | Long timestamp = Long.valueOf(element.f0);
253 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
254 | return timestamp;
255 | }
256 | }
257 |
258 |
259 | public static class UserVisitWatermarks implements AssignerWithPeriodicWatermarks> {
260 | Long currentMaxTimestamp = 0L;
261 | final Long maxOutOfOrderness = 2000L;
262 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
263 |
264 | @Nullable
265 | @Override
266 | public Watermark getCurrentWatermark() {
267 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
268 | return watermark;
269 | }
270 |
271 | @Override
272 | public long extractTimestamp(Tuple13 element, long previousElementTimestamp) {
273 | Long timestamp = Long.valueOf(element.f4);
274 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
275 | return timestamp;
276 | }
277 | }
278 |
279 |
280 |
281 | public static class DeserializeShopping extends RichFlatMapFunction> {
282 |
283 | // Counter numLines;
284 | private IntCounter shopping = new IntCounter();
285 | @Override
286 | public void open(Configuration parameters) throws Exception {
287 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
288 | getRuntimeContext().addAccumulator("shopping", this.shopping);
289 | super.open(parameters);
290 | }
291 |
292 | @Override
293 | public void flatMap(String s, Collector> collector) throws Exception {
294 | this.shopping.add(1);
295 | String[] split = s.split(",");
296 | collector.collect(new Tuple3(split[0], split[1], Long.valueOf(split[2])));
297 | }
298 | }
299 |
300 | public static class DeserializeClick extends RichFlatMapFunction> {
301 |
302 | private IntCounter click = new IntCounter();
303 | @Override
304 | public void open(Configuration parameters) throws Exception {
305 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
306 | getRuntimeContext().addAccumulator("click", this.click);
307 | super.open(parameters);
308 | }
309 |
310 | @Override
311 | public void flatMap(String input, Collector> collector) throws Exception {
312 | this.click.add(1);
313 | JSONObject obj = JSON.parseObject(input);
314 | // JSONObject obj = new JSONObject(input);
315 | Tuple6 tuple = new Tuple6<>(
316 | obj.getLong("click_time"),
317 | obj.getString("strategy"),
318 | obj.getString("site"),
319 | obj.getString("pos_id"),
320 | obj.getString("poi_id"),
321 | obj.getString("device_id")
322 | );
323 | collector.collect(tuple);
324 | }
325 | }
326 |
327 | public static class DeserializeImp extends RichFlatMapFunction> {
328 |
329 | private IntCounter imp = new IntCounter();
330 | @Override
331 | public void open(Configuration parameters) throws Exception {
332 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
333 | getRuntimeContext().addAccumulator("imp", this.imp);
334 | super.open(parameters);
335 | }
336 |
337 | @Override
338 | public void flatMap(String input, Collector> collector) throws Exception {
339 | this.imp.add(1);
340 | JSONObject obj = JSON.parseObject(input);
341 | // JSONObject obj = new JSONObject(input);
342 | Tuple7 tuple = new Tuple7<>(
343 | obj.getLong("imp_time"),
344 | obj.getString("strategy"),
345 | obj.getString("site"),
346 | obj.getString("pos_id"),
347 | obj.getString("poi_id"),
348 | obj.getDouble("cost"),
349 | obj.getString("device_id")
350 | );
351 | collector.collect(tuple);
352 | }
353 | }
354 |
355 | public static class DeserializeDau extends RichFlatMapFunction> {
356 |
357 | private IntCounter dau = new IntCounter();
358 | @Override
359 | public void open(Configuration parameters) throws Exception {
360 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
361 | getRuntimeContext().addAccumulator("dau", this.dau);
362 | super.open(parameters);
363 | }
364 |
365 | @Override
366 | public void flatMap(String input, Collector> collector) throws Exception {
367 | this.dau.add(1);
368 | JSONObject obj = JSON.parseObject(input);
369 | // JSONObject obj = new JSONObject(input);
370 | Tuple2 tuple = new Tuple2<>(
371 | obj.getLong("dau_time"),
372 | obj.getString("device_id")
373 | );
374 | collector.collect(tuple);
375 | }
376 | }
377 |
378 |
379 | public static class DeserializeUserVisit extends RichFlatMapFunction> {
380 |
381 | private IntCounter userVisit = new IntCounter();
382 | @Override
383 | public void open(Configuration parameters) throws Exception {
384 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
385 | getRuntimeContext().addAccumulator("userVisit", this.userVisit);
386 | super.open(parameters);
387 | }
388 |
389 | @Override
390 | public void flatMap(String s, Collector> collector) throws Exception {
391 | this.userVisit.add(1);
392 | String[] split = s.split(",");
393 | Tuple13 tuple = new Tuple13<>(
394 | split[0],
395 | Long.valueOf(split[1]),
396 | split[2],
397 | Long.valueOf(split[3]),
398 | Long.valueOf(split[4]),
399 | split[5],
400 | split[6],
401 | split[7],
402 | split[8],
403 | split[9],
404 | split[10],
405 | split[11],
406 | Integer.valueOf(split[12])
407 | );
408 | collector.collect(tuple);
409 | }
410 | }
411 |
412 | }
413 |
--------------------------------------------------------------------------------
/flink/src/main/java/com/intel/streaming_benchmark/utils/FlinkBenchConfig.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.intel.streaming_benchmark.utils;
19 |
20 | import java.io.Serializable;
21 |
22 | public class FlinkBenchConfig implements Serializable {
23 | // public String testCase;
24 |
25 | // Kafka related
26 | public String zkHost;
27 | public String brokerList;
28 | public String topic;
29 | public String consumerGroup;
30 | // public String offsetReset;
31 | // public String reportTopic;
32 |
33 | // Flink related
34 | public long checkpointDuration;
35 | public String resultLocation;
36 | public String sqlLocation;
37 | public String sqlName;
38 | public String timeType;
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.intel.streaming_benchmark
8 | streaming_benchmark
9 | pom
10 | 1.0-SNAPSHOT
11 |
12 | common
13 | spark
14 | flink
15 | dataGen
16 |
17 |
18 |
19 |
20 | org.scala-lang
21 | scala-library
22 | 2.11.8
23 |
24 |
25 | org.scala-lang
26 | scala-compiler
27 | 2.11.8
28 |
29 |
30 |
31 | org.eclipse.tycho
32 | tycho-compiler-jdt
33 | 0.21.0
34 |
35 |
36 |
37 | org.eclipse.tycho
38 | tycho-compiler-jdt
39 | 0.21.0
40 |
41 |
42 |
43 |
44 | org.apache.flink
45 | flink-table-api-java
46 | 1.9.0
47 |
48 |
49 |
50 | org.apache.flink
51 | flink-table-planner-blink_2.11
52 | 1.9.0
53 |
54 |
55 |
56 |
57 | org.apache.flink
58 | flink-streaming-java_2.11
59 | 1.9.0
60 |
61 |
62 |
63 |
64 | com.alibaba
65 | fastjson
66 | 1.2.58
67 |
68 |
69 |
70 |
71 |
72 | org.apache.flink
73 | flink-streaming-scala_2.11
74 | 1.9.0
75 |
76 |
77 |
78 |
79 | org.apache.flink
80 | flink-connector-kafka-0.10_2.11
81 | 1.9.0
82 |
83 |
84 |
85 |
86 | org.apache.kafka
87 | kafka_2.11
88 | 0.10.2.1
89 |
90 |
91 |
92 |
93 |
94 |
95 | maven-compiler-plugin
96 | 3.8.0
97 |
98 | 1.8
99 | 1.8
100 |
101 |
102 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/spark/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/conf/benchmarkConf.yaml
--------------------------------------------------------------------------------
/spark/log/q1.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q1.sql.log
--------------------------------------------------------------------------------
/spark/log/q2.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q2.sql.log
--------------------------------------------------------------------------------
/spark/log/q3.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q3.sql.log
--------------------------------------------------------------------------------
/spark/log/q4.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q4.sql.log
--------------------------------------------------------------------------------
/spark/log/q5.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q5.sql.log
--------------------------------------------------------------------------------
/spark/log/q6.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q6.sql.log
--------------------------------------------------------------------------------
/spark/log/q7.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q7.sql.log
--------------------------------------------------------------------------------
/spark/log/q8.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q8.sql.log
--------------------------------------------------------------------------------
/spark/log/q9.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q9.sql.log
--------------------------------------------------------------------------------
/spark/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | streaming_benchmark
7 | com.intel.streaming_benchmark
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | spark
13 |
14 |
15 |
16 | com.intel.streaming_benchmark
17 | common
18 | 1.0-SNAPSHOT
19 |
20 |
21 |
22 | org.apache.spark
23 | spark-sql-kafka-0-10_2.11
24 | 2.3.1
25 | compile
26 |
27 |
28 | kafka-clients
29 | org.apache.kafka
30 |
31 |
32 |
33 |
34 |
35 | org.apache.kafka
36 | kafka-clients
37 | 0.10.2.1
38 |
39 |
40 |
41 | org.apache.spark
42 | spark-streaming-kafka-0-10_2.11
43 | 2.3.1
44 |
45 |
46 |
47 | org.apache.spark
48 | spark-streaming_2.11
49 | 2.3.1
50 | compile
51 |
52 |
53 | org.apache.spark
54 | spark-sql_2.11
55 | 2.3.1
56 | compile
57 |
58 |
59 | org.apache.spark
60 | spark-sql-kafka-0-10_2.11
61 | 2.3.1
62 |
63 |
64 |
65 |
66 | com.fasterxml.jackson.core
67 | jackson-databind
68 | 2.6.5
69 |
70 |
71 |
72 | net.jpountz.lz4
73 | lz4
74 | 1.3.0
75 |
76 |
77 |
78 |
79 |
80 | src/main/java
81 |
82 |
83 | src/main/resources
84 | true
85 |
86 |
87 | src/main/java
88 |
89 | ../*.java
90 |
91 |
92 |
93 |
94 |
95 |
96 | org.codehaus.mojo
97 | build-helper-maven-plugin
98 | 1.4
99 |
100 |
101 | add-source
102 | generate-sources
103 |
104 | add-source
105 |
106 |
107 |
108 | ../common/src/main/scala
109 | ../common/src/main/java
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 | net.alchim31.maven
120 | scala-maven-plugin
121 | 3.2.2
122 |
123 |
124 | scala-compile-first
125 | process-resources
126 |
127 | add-source
128 | compile
129 |
130 |
131 |
132 | scala-test-compile
133 | process-test-resources
134 |
135 | testCompile
136 |
137 |
138 |
139 |
140 |
141 | org.apache.maven.plugins
142 | maven-compiler-plugin
143 | 3.2
144 |
145 | 1.8
146 | 1.8
147 | UTF-8
148 |
149 |
150 |
151 |
152 |
153 | org.apache.maven.plugins
154 | maven-compiler-plugin
155 | 3.8.0
156 |
157 | 1.8
158 | 1.8
159 |
160 |
161 |
162 | org.apache.maven.plugins
163 | maven-resources-plugin
164 | 3.0.2
165 |
166 | UTF-8
167 |
168 |
169 |
170 |
171 | org.apache.maven.plugins
172 | maven-shade-plugin
173 | 2.4.3
174 |
175 |
176 | package
177 |
178 | shade
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 | org.apache.maven.plugins
192 | maven-shade-plugin
193 | 2.4.3
194 |
195 |
196 | package
197 |
198 | shade
199 |
200 |
201 |
202 |
203 | *:*
204 |
205 | META-INF/*.SF
206 | META-INF/*.DSA
207 | META-INF/*.RSA
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
--------------------------------------------------------------------------------
/spark/query/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 | commodity, count(userId) num, WINDOW(times, '10 seconds').start, WINDOW(times, '10 seconds').end
3 | from
4 | shopping
5 | group BY
6 | WINDOW(times, '10 seconds'), commodity
--------------------------------------------------------------------------------
/spark/query/q2.sql:
--------------------------------------------------------------------------------
1 | select
2 | strategy, site, pos_id, WINDOW(click_time, '10 seconds').start, pos_id, WINDOW(click_time, '10 seconds').end, count(*) click_count
3 | from
4 | click
5 | GROUP BY
6 | strategy, site, pos_id, WINDOW(click_time, '10 seconds')
--------------------------------------------------------------------------------
/spark/query/q3.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | strategy, site, pos_id, WINDOW(imp_time, '10 seconds').start, pos_id, WINDOW(imp_time, '10 seconds').end, SUM(cost)
3 | FROM
4 | imp
5 | GROUP BY
6 | strategy, site, pos_id, WINDOW(imp_time, '10 seconds')
--------------------------------------------------------------------------------
/spark/query/q4.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id)
3 | FROM
4 | click a
5 | JOIN
6 | dau b
7 | ON
8 | a.device_id = b.device_id AND a.click_time BETWEEN b.dau_time - INTERVAL 1 second AND b.dau_time + INTERVAL 1 second
9 | GROUP BY
10 | b.device_id, a.strategy, a.site, a.pos_id, WINDOW(a.click_time, '10 seconds')
11 |
--------------------------------------------------------------------------------
/spark/query/q5.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm , MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm2, count(*)
3 | FROM
4 | userVisit
5 | GROUP BY
6 | sessionId, WINDOW(actionTime, '10 seconds')
--------------------------------------------------------------------------------
/spark/query/q6.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num
3 | FROM
4 | userVisit
5 | GROUP BY
6 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), WINDOW(actionTime, '10 seconds')
7 | HAVING
8 | (MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))) < 100
--------------------------------------------------------------------------------
/spark/query/q7.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | WINDOW(actionTime, '10 seconds').start starts, WINDOW(actionTime, '10 seconds').end finish , cityId, payProductIds, count(*)
3 | FROM
4 | userVisit
5 | WHERE
6 | payProductIds IS NOT NULL
7 | GROUP BY
8 | cityId, payProductIds, WINDOW(actionTime, '10 seconds')
--------------------------------------------------------------------------------
/spark/query/q8.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | WINDOW(actionTime, '10 seconds').start start, WINDOW(actionTime, '10 seconds').end finish ,count(*) as sequence
3 | FROM
4 | userVisit
5 | WHERE
6 | clickCategoryId IS NOT NULL
7 | GROUP BY
8 | cityId, WINDOW(actionTime, '10 seconds')
--------------------------------------------------------------------------------
/spark/query/q9.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.device_id, a.strategy, a.site, a.pos_id, b.var1, count(*)
3 | FROM
4 | (SELECT device_id, strategy, site, pos_id FROM click) a
5 | JOIN
6 | (SELECT device_id, dau_time as var1 FROM dau) b
7 | ON
8 | a.device_id = b.device_id
9 | GROUP BY
10 | a.device_id, a.strategy, a.site, a.pos_id, b.var1
--------------------------------------------------------------------------------
/spark/result/result.log:
--------------------------------------------------------------------------------
1 | Finished time: 2019-11-05 20:56:58; q9.sql Runtime: 62 TPS:5884
2 |
--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/spark/Benchmark.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.spark;
2 |
3 | import com.intel.streaming_benchmark.common.*;
4 | import com.intel.streaming_benchmark.utils.SchemaProvider;
5 | import com.intel.streaming_benchmark.utils.SparkBenchConfig;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.MapPartitionsFunction;
8 | import org.apache.spark.sql.Dataset;
9 | import org.apache.spark.sql.Row;
10 | import org.apache.spark.sql.RowFactory;
11 | import org.apache.spark.sql.SparkSession;
12 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
13 | import org.apache.spark.sql.streaming.StreamingQuery;
14 | import org.apache.spark.sql.streaming.Trigger;
15 | import org.apache.spark.util.LongAccumulator;
16 | import com.alibaba.fastjson.JSONObject;
17 | import java.io.BufferedWriter;
18 | import java.io.File;
19 | import java.io.FileWriter;
20 | import java.sql.Timestamp;
21 | import java.util.*;
22 |
23 | public class Benchmark {
24 | public static void main(String[] args) throws Exception {
25 | if (args.length < 2)
26 | BenchLogUtil.handleError("Usage: RunBench ");
27 |
28 | ConfigLoader cl = new ConfigLoader(args[0]);
29 | String benchmarkConfDir = new File(args[0]).getParent();
30 | //spark config
31 | String sparkConf = benchmarkConfDir + "/../spark/conf/benchmarkConf.yaml";
32 | cl.merge(sparkConf);
33 | // Prepare configuration
34 | SparkBenchConfig conf = new SparkBenchConfig();
35 | conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST);
36 | conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST);
37 | conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP);
38 | conf.topic = QueryConfig.getTables(args[1]);
39 | conf.sqlLocation = benchmarkConfDir + "/../spark/query";
40 | conf.resultLocation = benchmarkConfDir + "/../spark/result";
41 | conf.sqlName = args[1];
42 | conf.runTime = Integer.valueOf(args[2]);
43 | runQuery(conf);
44 | }
45 |
46 | public static void runQuery(SparkBenchConfig config) throws Exception {
47 |
48 | //create SparkSession
49 | SparkSession spark = SparkSession
50 | .builder()
51 | .appName(config.sqlName)
52 | // .master("local[2]")
53 | .getOrCreate();
54 | JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
55 |
56 | String[] topics = config.topic.split(",");
57 | Dataset df;
58 | LongAccumulator longAccumulator = jsc.sc().longAccumulator();
59 | Long startTime= System.currentTimeMillis();
60 |
61 | //generate table
62 | for(int i = 0; i < topics.length; i++){
63 | ExpressionEncoder encoder = SchemaProvider.provideSchema(topics[i]);
64 | if(topics[i].equals("shopping")){
65 | //read data from kafka and get primary data which need to be paresd to mutiple columns.
66 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() {
67 | @Override
68 | public Iterator call(Iterator input) throws Exception {
69 | List rows = new ArrayList<>();
70 | while (input.hasNext()) {
71 | longAccumulator.add(1);
72 | Row next = input.next();
73 | String[] split = next.getString(0).split(",");
74 | rows.add(RowFactory.create(split[0],split[1],Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[2])))));
75 | }
76 | return rows.iterator();
77 | }
78 | }, encoder).withWatermark("times", "4 seconds");
79 | // .
80 | }else if(topics[i].equals("click")){
81 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() {
82 | @Override
83 | public Iterator call(Iterator input) throws Exception {
84 | List rows = new ArrayList<>();
85 | while (input.hasNext()) {
86 | longAccumulator.add(1);
87 | JSONObject obj = JSONObject.parseObject(input.next().getString(0));
88 | // JSONObject obj = new JSONObject(input.next().getString(0));
89 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("click_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getString("device_id")));
90 | }
91 | return rows.iterator();
92 | }
93 | }, encoder).withWatermark("click_time", "4 seconds");
94 |
95 | }else if(topics[i].equals("imp")){
96 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() {
97 | @Override
98 | public Iterator call(Iterator input) throws Exception {
99 | List rows = new ArrayList<>();
100 | while (input.hasNext()) {
101 | longAccumulator.add(1);
102 | JSONObject obj = JSONObject.parseObject(input.next().getString(0));
103 | // JSONObject obj = new JSONObject(input.next().getString(0));
104 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("imp_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getDouble("cost"), obj.getString("device_id")));
105 | }
106 | return rows.iterator();
107 | }
108 | }, encoder).withWatermark("imp_time", "4 seconds");
109 | }else if(topics[i].equals("dau")){
110 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList)
111 | .option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() {
112 | @Override
113 | public Iterator call(Iterator input) throws Exception {
114 | List rows = new ArrayList<>();
115 | while (input.hasNext()) {
116 | longAccumulator.add(1);
117 | JSONObject obj = JSONObject.parseObject(input.next().getString(0));
118 | // JSONObject obj = new JSONObject(input.next().getString(0));
119 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("dau_time"))), obj.getString("device_id")));
120 | }
121 | return rows.iterator();
122 | }
123 | }, encoder).withWatermark("dau_time", "4 seconds");
124 | }else if(topics[i].equals("userVisit")){
125 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() {
126 | @Override
127 | public Iterator call(Iterator input) throws Exception {
128 | List rows = new ArrayList<>();
129 | while (input.hasNext()) {
130 | longAccumulator.add(1);
131 | String[] split = input.next().getString(0).split(",");
132 | rows.add(RowFactory.create(split[0], Long.valueOf(split[1]), split[2], Long.valueOf(split[3]), Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[4]))), split[5], split[6], split[7], split[8], split[9], split[10], split[11], Integer.valueOf(split[12])));
133 | }
134 | return rows.iterator();
135 | }
136 | }, encoder).withWatermark("actionTime", "4 seconds");
137 | }else{
138 | System.out.println("No such topic, please check your benchmarkConf.yaml");
139 | return;
140 | }
141 |
142 | df.createOrReplaceTempView(topics[i]);
143 | }
144 |
145 | //runQuery
146 | File file = new File(config.sqlLocation + "/" + config.sqlName);
147 | if (!file.exists()) {
148 | return;
149 | }
150 | try {
151 | String queryString = DateUtils.fileToString(file);
152 | Dataset sql = spark.sql(queryString);
153 | StreamingQuery start = sql.writeStream().outputMode("append").format("console").trigger(Trigger.ProcessingTime("30 seconds")).start();
154 | start.awaitTermination(config.runTime * 1000);
155 | System.out.println("2 Total number: " + longAccumulator.value());
156 |
157 | } catch (Exception e) {
158 | e.printStackTrace();
159 | }
160 | Long finishTime= System.currentTimeMillis();
161 | Long runningTime = (finishTime - startTime) / 1000;
162 | File resultFile = new File(config.resultLocation + "/result.log" );
163 | if (!resultFile.exists()) {
164 | resultFile.createNewFile();
165 | }
166 | FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log" , true);
167 | BufferedWriter bufferWriter = new BufferedWriter(fileWriter);
168 | bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(finishTime) + "; " + config.sqlName + " Runtime: " + runningTime + " TPS:" + longAccumulator.value()/runningTime + "\r\n");
169 | bufferWriter.close();
170 |
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/utils/SchemaProvider.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.utils;
2 |
3 | import org.apache.spark.sql.Row;
4 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
5 | import org.apache.spark.sql.catalyst.encoders.RowEncoder;
6 | import org.apache.spark.sql.types.DataTypes;
7 | import org.apache.spark.sql.types.StructType;
8 |
9 | public class SchemaProvider {
10 |
11 | public static ExpressionEncoder provideSchema(String topic){
12 | StructType type = new StructType();
13 | if(topic.equals("shopping")){
14 | type = type.add("userID", DataTypes.StringType)
15 | .add("commodity", DataTypes.StringType)
16 | .add("times", DataTypes.TimestampType);
17 | }else if(topic.equals("click")){
18 | type = type.add("click_time", DataTypes.TimestampType)
19 | .add("strategy", DataTypes.StringType)
20 | .add("site", DataTypes.StringType)
21 | .add("pos_id", DataTypes.StringType)
22 | .add("poi_id", DataTypes.StringType)
23 | .add("device_id", DataTypes.StringType);
24 | }else if(topic.equals("imp")){
25 | type = type.add("imp_time", DataTypes.TimestampType)
26 | .add("strategy", DataTypes.StringType)
27 | .add("site", DataTypes.StringType)
28 | .add("pos_id", DataTypes.StringType)
29 | .add("poi_id", DataTypes.StringType)
30 | .add("cost", DataTypes.DoubleType)
31 | .add("device_id", DataTypes.StringType);
32 | }else if(topic.equals("dau")){
33 | type = type.add("dau_time", DataTypes.TimestampType)
34 | .add("device_id", DataTypes.StringType);
35 | }else if(topic.equals("userVisit")){
36 | type = type.add("date", DataTypes.StringType)
37 | .add("userId", DataTypes.LongType)
38 | .add("sessionId", DataTypes.StringType)
39 | .add("pageId", DataTypes.LongType)
40 | .add("actionTime", DataTypes.TimestampType)
41 | .add("searchKeyword", DataTypes.StringType)
42 | .add("clickCategoryId", DataTypes.StringType)
43 | .add("clickProductId", DataTypes.StringType)
44 | .add("orderCategoryIds", DataTypes.StringType)
45 | .add("orderProductIds", DataTypes.StringType)
46 | .add("payCategoryIds", DataTypes.StringType)
47 | .add("payProductIds", DataTypes.StringType)
48 | .add("cityId", DataTypes.IntegerType);
49 | }else {
50 | System.out.println("No such table schema!!!");
51 | return null;
52 | }
53 |
54 | return RowEncoder.apply(type);
55 |
56 | }
57 |
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/utils/SparkBenchConfig.java:
--------------------------------------------------------------------------------
1 | package com.intel.streaming_benchmark.utils;
2 |
3 | public class SparkBenchConfig {
4 | // Kafka related
5 | public String zkHost;
6 | public String brokerList;
7 | public String topic;
8 | public String consumerGroup;
9 | public String valueDeserializer;
10 | public String keyDeserializer;
11 |
12 |
13 | // public String offsetReset;
14 | // public String reportTopic;
15 |
16 | // Spark related
17 | public long checkpointDuration;
18 | public String resultLocation;
19 | public String sqlLocation;
20 | public String sqlName;
21 | public String timeType;
22 |
23 |
24 | public int runTime;
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/utils/dataGenerator.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | curDir=$(cd `dirname $0`;pwd)
4 | #curDir=`dirname $0`
5 | echo $curDir
6 | rootDir=$(dirname $curDir)
7 | echo $rootDir
8 |
9 | DATAGEN_TIME=$1
10 | THREAD_PER_NODE=$2
11 | SQL=$3
12 | ENGINE=$4
13 |
14 |
15 | /opt/Beaver/jdk/bin/java -cp $rootDir/dataGen/target/dataGen-1.0-SNAPSHOT.jar com.intel.streaming_benchmark.Datagen $DATAGEN_TIME $THREAD_PER_NODE $SQL $rootDir/conf/benchmarkConf.yaml >> $rootDir/$ENGINE/log/dataGen_${SQL}.log 2>&1 &
16 |
--------------------------------------------------------------------------------