├── README.md
├── bin
    ├── runAll.sh
    ├── runFlinkBenchmark.sh
    └── runSparkBenchmark.sh
├── common
    ├── pom.xml
    └── src
    │   └── main
    │       ├── java
    │           └── com
    │           │   └── intel
    │           │       └── streaming_benchmark
    │           │           └── common
    │           │               ├── BenchLogUtil.java
    │           │               ├── ConfigLoader.java
    │           │               ├── DateUtils.java
    │           │               └── StreamBenchConfig.java
    │       └── scala
    │           └── com
    │               └── intel
    │                   └── streaming_benchmark
    │                       └── common
    │                           ├── QueryConfig.scala
    │                           ├── Schema.scala
    │                           └── TableSchemaProvider.scala
├── conf
    ├── benchmarkConf.yaml
    ├── dataGenHosts
    ├── env
    └── queriesToRun
├── dataGen
    ├── pom.xml
    └── src
    │   └── main
    │       ├── java
    │           └── com
    │           │   └── intel
    │           │       └── streaming_benchmark
    │           │           ├── Datagen.java
    │           │           └── utils
    │           │               ├── ConfigLoader.java
    │           │               ├── Constants.java
    │           │               └── GetProducer.java
    │       └── scala
    │           └── com
    │               └── intel
    │                   └── streaming_benchmark
    │                       ├── ClickProducer.scala
    │                       └── click.scala
├── flink
    ├── conf
    │   └── benchmarkConf.yaml
    ├── log
    │   ├── q1.sql.log
    │   ├── q10.sql.log
    │   ├── q11.sql.log
    │   ├── q12.sql.log
    │   ├── q2.sql.log
    │   ├── q3.sql.log
    │   ├── q4.sql.log
    │   ├── q5.sql.log
    │   ├── q6.sql.log
    │   ├── q7.sql.log
    │   ├── q8.sql.log
    │   └── q9.sql.log
    ├── pom.xml
    ├── query
    │   ├── q1.sql
    │   ├── q10.sql
    │   ├── q11.sql
    │   ├── q12.sql
    │   ├── q2.sql
    │   ├── q3.sql
    │   ├── q4.sql
    │   ├── q5.sql
    │   ├── q6.sql
    │   ├── q7.sql
    │   ├── q8.sql
    │   └── q9.sql
    ├── result
    │   └── result.log
    └── src
    │   └── main
    │       └── java
    │           └── com
    │               └── intel
    │                   └── streaming_benchmark
    │                       ├── flink
    │                           └── Benchmark.java
    │                       └── utils
    │                           └── FlinkBenchConfig.java
├── pom.xml
├── spark
    ├── conf
    │   └── benchmarkConf.yaml
    ├── log
    │   ├── q1.sql.log
    │   ├── q2.sql.log
    │   ├── q3.sql.log
    │   ├── q4.sql.log
    │   ├── q5.sql.log
    │   ├── q6.sql.log
    │   ├── q7.sql.log
    │   ├── q8.sql.log
    │   └── q9.sql.log
    ├── pom.xml
    ├── query
    │   ├── q1.sql
    │   ├── q2.sql
    │   ├── q3.sql
    │   ├── q4.sql
    │   ├── q5.sql
    │   ├── q6.sql
    │   ├── q7.sql
    │   ├── q8.sql
    │   └── q9.sql
    ├── result
    │   └── result.log
    └── src
    │   └── main
    │       └── java
    │           └── com
    │               └── intel
    │                   └── streaming_benchmark
    │                       ├── spark
    │                           └── Benchmark.java
    │                       └── utils
    │                           ├── SchemaProvider.java
    │                           └── SparkBenchConfig.java
└── utils
    └── dataGenerator.sh


/README.md:
--------------------------------------------------------------------------------
 1 | # Streaming_benchmark
 2 | Streaming Benchmark is designed to measure the performance of stream processing system such as flink and spark. Three use cases are simulated (User Visit Session Analysis, Evaluation of Real-time Advertising and Shopping Record Analysis). Raw data is generated and stored in Kafka. Streams map into streaming tables and queries act on these tables.
 3 | 
 4 | ## Building
 5 | ```
 6 | mvn clean package
 7 | ```
 8 | ## Prerequisites
 9 | You should have Apache Kafka, Apache zookeeper, Apache Spark and Flink-1.9 installed in your cluster.
10 | 
11 | ## Setup
12 | 1. Clone the project into your master.
13 | 2. Update conf/benchmarkConf.yaml (The properties of Kafka, Zookeeper, benchmark...)
14 | ```
15 | streambench.zkHost                       ip1:2181,ip2:2181,ip3:2181...
16 | streambench.kafka.brokerList             ip1:port1,ip1:port2...
17 | streambench.kafka.consumerGroup          benchmark(default)
18 | ```
19 | 3. Update flink/conf/benchmarkConf.yaml (The properties of flink)
20 | ```
21 | streambench.flink.checkpointDuration     5000
22 | streambench.flink.timeType               EventTime(Use EventTime or ProcessTime)
23 | ```
24 | 4. Update conf/dataGenHosts (The hosts where data will be generated; suggest to generate data on kafka node)
25 | ```
26 | ip1
27 | ip2
28 | ...
29 | ```
30 | 5. Update conf/queriesToRun (The queries will be run)
31 | ```
32 | q1.sql
33 | q2.sql
34 | q3.sql
35 | ...
36 | ```
37 | 6. Update conf/env
38 | ```
39 | export DATAGEN_TIME=100 (Running time for each query)
40 | export THREAD_PER_NODE=10(The number of thread to generate data for per node.)
41 | export FLINK_HOME={FLINK_HOME}
42 | export SPARK_HOME={SPARK_HOME}
43 | ```
44 | 7. Copy the project to every node which will generate data (the same hosts in conf/dataGenHosts) and ensure that the master node can log in these hosts without password.
45 | 
46 | ## Run Benchmark
47 | Start Zookeeper, kafka, Spark, Flink first.
48 | Run flink benchmark: `sh bin/runFlinkBenchmark.sh`.
49 | Run spark benchmark: `sh bin/runSparkBenchmark.sh`.
50 | Run both flink and spark benchmark: `sh bin/runAll.sh`.
51 | 
52 | ## Result
53 | The results will be save on flink/result/result.log and spark/result/result.log and the format of result is just like below:
54 | ```
55 | Finished time: 2019-10-30 19:07:26; q1.sql  Runtime: 58s TPS:10709265
56 | Finished time: 2019-10-30 19:08:37; q2.sql  Runtime: 57s TPS:8061793
57 | Finished time: 2019-10-30 19:09:51; q5.sql  Runtime: 57s TPS:4979921
58 | ```
59 | 


--------------------------------------------------------------------------------
/bin/runAll.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curDir=$(cd `dirname $0`;pwd)
 4 | rootDir=$(dirname $curDir)
 5 | 
 6 | if [ -e $rootDir/conf/env ]; then
 7 |     source $rootDir/conf/env
 8 | fi
 9 | 
10 | mainClass1=com.intel.streaming_benchmark.flink.Benchmark
11 | mainClass2=com.intel.streaming_benchmark.spark.Benchmark
12 | dataGenClass=com.intel.streaming_benchmark.Datagen
13 | HOSTNAME=`hostname`
14 | 
15 | echo "Run Flink benchmark!"
16 | for sql in `cat $rootDir/conf/queriesToRun`;
17 | do
18 |     echo "Data generator start!"
19 |     for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql flink"; done
20 |     echo "RUNING $sql"
21 |     nohup $FLINK_HOME/bin/flink run -c $mainClass1 $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 &
22 |     sleep $DATAGEN_TIME
23 |     FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true`
24 |     $FLINK_HOME/bin/flink cancel  $FLINK_ID
25 |     echo $FLINK_ID
26 |     sleep 10
27 | done
28 | 
29 | sleep 30
30 | 
31 | echo "Run Spark benchmark!"
32 | for sql in `cat $rootDir/conf/queriesToRun`;
33 | do
34 |     echo "Data generator start!"
35 |     for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql spark"; done
36 |     echo "RUNING $sql"
37 |     nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 &
38 |     sleep $DATAGEN_TIME
39 | done


--------------------------------------------------------------------------------
/bin/runFlinkBenchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curDir=$(cd  `dirname $0`;pwd)
 4 | rootDir=$(dirname $curDir)
 5 | 
 6 | if [ -e $rootDir/conf/env ]; then
 7 |     source $rootDir/conf/env
 8 | fi
 9 | 
10 | mainClass=com.intel.streaming_benchmark.flink.Benchmark
11 | dataGenClass=com.intel.streaming_benchmark.Datagen
12 | 
13 | for sql in `cat $rootDir/conf/queriesToRun`;
14 | do
15 |     echo "Data generator start!"
16 |     for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql flink"; done
17 |     echo "RUNING $sql"
18 |     nohup $FLINK_HOME/bin/flink run -c $mainClass $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 &
19 |     sleep $DATAGEN_TIME
20 |     FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true`
21 |     $FLINK_HOME/bin/flink cancel  $FLINK_ID
22 |     echo $FLINK_ID
23 |     sleep 10
24 | done
25 | 
26 | 


--------------------------------------------------------------------------------
/bin/runSparkBenchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curDir=$(cd  `dirname $0`;pwd)
 4 | rootDir=$(dirname $curDir)
 5 | 
 6 | if [ -e $rootDir/conf/env ]; then
 7 |     source $rootDir/conf/env
 8 | fi
 9 | 
10 | mainClass=com.intel.streaming_benchmark.spark.Benchmark
11 | dataGenClass=com.intel.streaming_benchmark.Datagen
12 | HOSTNAME=`hostname`
13 | 
14 | for sql in `cat $rootDir/conf/queriesToRun`;
15 | do
16 |     echo "Data generator start!"
17 |     for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql spark"; done
18 |     echo "RUNING $sql"
19 |     nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 &
20 | #   $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME
21 |     sleep $DATAGEN_TIME
22 | done
23 | 


--------------------------------------------------------------------------------
/common/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>streaming_benchmark</artifactId>
 7 |         <groupId>com.intel.streaming_benchmark</groupId>
 8 |         <version>1.0-SNAPSHOT</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>common</artifactId>
13 | 
14 |     <build>
15 |         <plugins>
16 |             <plugin>
17 |                 <groupId>org.codehaus.mojo</groupId>
18 |                 <artifactId>build-helper-maven-plugin</artifactId>
19 |                 <version>1.4</version>
20 |                 <executions>
21 |                     <execution>
22 |                         <id>add-source</id>
23 |                         <phase>generate-sources</phase>
24 |                         <goals>
25 |                             <goal>add-source</goal>
26 |                         </goals>
27 |                         <configuration>
28 |                             <sources>
29 |                                 <source>src/main/scala</source>
30 |                                 <source>src/main/java</source>
31 |                                 <!-- 我们可以通过在这里添加多个source节点，来添加任意多个源文件夹 -->
32 |                             </sources>
33 |                         </configuration>
34 |                     </execution>
35 |                 </executions>
36 |             </plugin>
37 |             <plugin>
38 |                 <groupId>net.alchim31.maven</groupId>
39 |                 <artifactId>scala-maven-plugin</artifactId>
40 |                 <version>3.2.2</version>
41 |                 <executions>
42 |                     <execution>
43 |                         <id>scala-compile-first</id>
44 |                         <phase>process-resources</phase>
45 |                         <goals>
46 |                             <goal>add-source</goal>
47 |                             <goal>compile</goal>
48 |                         </goals>
49 |                     </execution>
50 |                     <execution>
51 |                         <id>scala-test-compile</id>
52 |                         <phase>process-test-resources</phase>
53 |                         <goals>
54 |                             <goal>testCompile</goal>
55 |                         </goals>
56 |                     </execution>
57 |                 </executions>
58 |             </plugin>
59 |         </plugins>
60 | 
61 |     </build>
62 | 
63 | 
64 | 
65 | </project>


--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/BenchLogUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.intel.streaming_benchmark.common;
19 | 
20 | import java.io.File;
21 | import java.io.PrintWriter;
22 | 
23 | public class BenchLogUtil {
24 |   private static PrintWriter out;
25 | 
26 |   public static void init() throws Exception {
27 |     File file = new File("/tmp/benchlog-flink.txt");
28 |     out = new PrintWriter(file);
29 |   }
30 | 
31 |   public static void logMsg(String msg) {
32 |     try {
33 |       if (out == null) {
34 |         init();
35 |       }
36 |     } catch (Exception e) {
37 |       e.printStackTrace();
38 |     }
39 |     out.println(msg);
40 |     out.flush();
41 |     System.out.println(msg);
42 |   }
43 | 
44 |   public static void close() {
45 |     if (out != null) {
46 |       out.close();
47 |     }
48 |   }
49 | 
50 |   public static void handleError(String msg) {
51 |     System.err.println(msg);
52 |     System.exit(1);
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/ConfigLoader.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package com.intel.streaming_benchmark.common;
 19 | 
 20 | import java.io.BufferedReader;
 21 | import java.io.FileNotFoundException;
 22 | import java.io.FileReader;
 23 | import java.io.IOException;
 24 | import java.util.HashMap;
 25 | import java.util.Map;
 26 | 
 27 | public class ConfigLoader {
 28 |     private String ConfigFileName = null;
 29 |     private Map store;
 30 | 
 31 |     public ConfigLoader(String filename){
 32 |         ConfigFileName = filename;
 33 |         store = new HashMap();
 34 |         // Load and parse config
 35 |         try {
 36 |             BufferedReader br = new BufferedReader(new FileReader(filename));
 37 |             String line = br.readLine();
 38 |             while(line != null){
 39 |                 if ((line.length()>0) && (line.charAt(0)!='#')) {
 40 |                     String[] words = line.split("\\s+");
 41 |                     if (words.length == 2) {
 42 |                         String key = words[0];
 43 |                         String value = words[1];
 44 |                         store.put(key, value);
 45 |                     } else if (words.length == 1) {
 46 |                         String key = words[0];
 47 |                         store.put(key, "");
 48 |                     } else {
 49 |                         if (!line.startsWith("streambench"))
 50 |                             System.out.println("Warning: unknown config parsed, skip:" + line);
 51 |                     }
 52 |                 }
 53 |                 line = br.readLine();
 54 |             }
 55 |         } catch (FileNotFoundException e) {
 56 |             System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
 57 |         } catch (IOException e) {
 58 |             System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
 59 |             e.printStackTrace();
 60 |         }
 61 |     }
 62 | 
 63 |     public String getProperty(String key){
 64 |         if (store.containsKey(key))
 65 |             return (String) store.get(key);
 66 |         else {
 67 |             System.out.println("ERROR: Unknown config key:" + key);
 68 |             return null;
 69 |         }
 70 |     }
 71 | 
 72 |     public void merge(String fileName){
 73 | 
 74 |         try{
 75 |             BufferedReader br = new BufferedReader(new FileReader(fileName));
 76 |             String line = br.readLine();
 77 |             while(line != null) {
 78 |                 if ((line.length() > 0) && (line.charAt(0) != '#')) {
 79 |                     String[] words = line.split("\\s+");
 80 |                     String key = words[0];
 81 |                     String value = words[1];
 82 |                     if(store.containsKey(key)){
 83 |                         store.replace(key,value);
 84 |                     }else {
 85 |                         store.put(key, value);
 86 |                     }
 87 |                 }
 88 |                 line = br.readLine();
 89 |             }
 90 |         }catch (FileNotFoundException e) {
 91 |             System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
 92 |         } catch (IOException e) {
 93 |             System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
 94 |             e.printStackTrace();
 95 |         }
 96 | 
 97 | 
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/DateUtils.java:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark.common;
  2 | 
  3 | import java.io.ByteArrayOutputStream;
  4 | import java.io.File;
  5 | import java.io.FileInputStream;
  6 | import java.text.SimpleDateFormat;
  7 | import java.util.Calendar;
  8 | import java.util.Date;
  9 | import java.util.Random;
 10 | 
 11 | /**
 12 |  * Time data format converter
 13 |  */
 14 | public class DateUtils {
 15 |     public static final int dayOfMillis = 86400000;
 16 |     public static final String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
 17 |     public static final String DATE_FORMAT = "yyyy-MM-dd";
 18 |     public static final String DATEKEY_FORMAT = "yyyyMMdd";
 19 | 
 20 |     /**
 21 |      * Convert millisecond timestamps into: yyyy-MM-dd HH:mm:ss
 22 |      *
 23 |      * @param time
 24 |      * @return
 25 |      */
 26 |     public static String parseLong2String(long time) {
 27 |         return parseLong2String(time, TIME_FORMAT);
 28 |     }
 29 | 
 30 |     /**
 31 |      *  Convert millisecond timestamps into defined date format
 32 |      *
 33 |      * @param time
 34 |      * @param pattern
 35 |      * @return
 36 |      */
 37 |     public static String parseLong2String(long time, String pattern) {
 38 |         return parseLong2String(time, new SimpleDateFormat(pattern));
 39 |     }
 40 | 
 41 |     /**
 42 |      *  Convert millisecond timestamps into date according to formatter
 43 |      *
 44 |      * @param time
 45 |      * @param sdf
 46 |      * @return
 47 |      */
 48 |     public static String parseLong2String(long time, SimpleDateFormat sdf) {
 49 |         Calendar cal = Calendar.getInstance();
 50 |         cal.setTimeInMillis(time);
 51 |         return sdf.format(cal.getTime());
 52 |     }
 53 | 
 54 |     /**
 55 |      * Convert string time into long timestamps
 56 |      *
 57 |      * @param date time type，format：yyyy-MM-dd HH:mm:ss
 58 |      * @return
 59 |      */
 60 |     public static long parseString2Long(String date) {
 61 |         return parseString2Long(date, TIME_FORMAT);
 62 |     }
 63 | 
 64 |     /**
 65 |      * Convert string time into long timestamps according to the time format string
 66 |      *
 67 |      * @param date
 68 |      * @param pattern
 69 |      * @return
 70 |      */
 71 |     public static long parseString2Long(String date, String pattern) {
 72 |         return parseString2Long(date, new SimpleDateFormat(pattern));
 73 |     }
 74 | 
 75 |     /**
 76 |      *  Convert string time into long timestamps according to the time format string
 77 |      *
 78 |      * @param date
 79 |      * @param sdf
 80 |      * @return
 81 |      */
 82 |     public static long parseString2Long(String date, SimpleDateFormat sdf) {
 83 |         try {
 84 |             return sdf.parse(date).getTime();
 85 |         } catch (Exception e) {
 86 |             throw new RuntimeException(e);
 87 |         }
 88 |     }
 89 | 
 90 |     /**
 91 |      *  Convert long timestamps into the value according to the time type
 92 |      *
 93 |      * @param millis milliseconds timestamp
 94 |      * @param type   time type
 95 |      * @return
 96 |      */
 97 |     public static int getSpecificDateValueOfDateTypeEnum(long millis, DateTypeEnum type) {
 98 |         Calendar cal = Calendar.getInstance();
 99 |         cal.setTimeInMillis(millis);
100 |         switch (type) {
101 |             case YEAR:
102 |                 return cal.get(Calendar.YEAR);
103 |             case MONTH:
104 |                 return cal.get(Calendar.MONTH) + 1;
105 |             case DAY:
106 |                 return cal.get(Calendar.DAY_OF_MONTH);
107 |             case HOUR:
108 |                 return cal.get(Calendar.HOUR_OF_DAY);
109 |             case MINUTE:
110 |                 return cal.get(Calendar.MINUTE);
111 |             case SECOND:
112 |                 return cal.get(Calendar.SECOND);
113 |             case MILLISECOND:
114 |                 return cal.get(Calendar.MILLISECOND);
115 |         }
116 | 
117 |         throw new IllegalArgumentException("Parameter exception");
118 |     }
119 | 
120 |     /**
121 |      * get the date of the day，format:yyyy-MM-dd
122 |      *
123 |      * @return Date of the day
124 |      */
125 |     public static String getTodayDate() {
126 |         return new SimpleDateFormat(DATE_FORMAT).format(new Date());
127 |     }
128 | 
129 |     /**
130 |      * Get a random milliseconds timestamps of today
131 |      *
132 |      * @param random
133 |      * @return
134 |      */
135 |     public static long getRandomTodayTimeOfMillis(Random random) {
136 |         Calendar cal = Calendar.getInstance();
137 |         cal.set(Calendar.HOUR_OF_DAY, 0);
138 |         cal.set(Calendar.MINUTE, 0);
139 |         cal.set(Calendar.SECOND, 0);
140 |         cal.set(Calendar.MILLISECOND, 0);
141 |         if (random.nextDouble() <= 0.7) {
142 |             // [0-21] => 70%
143 |             int millis = dayOfMillis / 8 * 7;
144 |             cal.add(Calendar.MILLISECOND, 1 + random.nextInt(millis));
145 |         } else {
146 |             // [1-23] => 30%
147 |             int millis = dayOfMillis / 24;
148 |             cal.add(Calendar.MILLISECOND, millis + random.nextInt(millis * 23));
149 |         }
150 |         return cal.getTimeInMillis();
151 |     }
152 | 
153 |     /**
154 |      * Time type
155 |      */
156 |     public static enum DateTypeEnum {
157 |         YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND
158 |     }
159 | 
160 |     /**
161 |      * Judge if time1 is before time2
162 |      *
163 |      * @param time1
164 |      * @param time2
165 |      * @return Judgement result
166 |      */
167 |     public static boolean before(String time1, String time2) {
168 |         try {
169 |             SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
170 |             Date dateTime1 = sdf.parse(time1);
171 |             Date dateTime2 = sdf.parse(time2);
172 | 
173 |             if (dateTime1.before(dateTime2)) {
174 |                 return true;
175 |             }
176 |         } catch (Exception e) {
177 |             e.printStackTrace();
178 |         }
179 |         return false;
180 |     }
181 | 
182 |     /**
183 |      * Judge if time1 is after time2
184 |      *
185 |      * @param time1
186 |      * @param time2
187 |      * @return Judgement result
188 |      */
189 |     public static boolean after(String time1, String time2) {
190 |         try {
191 |             SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
192 |             Date dateTime1 = sdf.parse(time1);
193 |             Date dateTime2 = sdf.parse(time2);
194 | 
195 |             if (dateTime1.after(dateTime2)) {
196 |                 return true;
197 |             }
198 |         } catch (Exception e) {
199 |             e.printStackTrace();
200 |         }
201 |         return false;
202 |     }
203 | 
204 |     /**
205 |      * Calculate time difference(Unit: second)
206 |      *
207 |      * @param time1
208 |      * @param time2
209 |      * @return difference
210 |      */
211 |     public static int minus(String time1, String time2) {
212 |         try {
213 |             SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT);
214 |             Date datetime1 = sdf.parse(time1);
215 |             Date datetime2 = sdf.parse(time2);
216 | 
217 |             long millisecond = datetime1.getTime() - datetime2.getTime();
218 | 
219 |             return Integer.valueOf(String.valueOf(millisecond / 1000));
220 |         } catch (Exception e) {
221 |             e.printStackTrace();
222 |         }
223 |         return 0;
224 |     }
225 | 
226 |     /**
227 |      *get year, month, day and hour
228 |      *
229 |      * @param datetime time（yyyy-MM-dd HH:mm:ss）
230 |      * @return result（yyyy-MM-dd_HH）
231 |      */
232 |     public static String getDateHour(String datetime) {
233 |         String date = datetime.split(" ")[0];
234 |         String hourMinuteSecond = datetime.split(" ")[1];
235 |         String hour = hourMinuteSecond.split(":")[0];
236 |         return date + "_" + hour;
237 |     }
238 | 
239 |     /**
240 |      * get the date of yesterday（yyyy-MM-dd）
241 |      *
242 |      * @return the date of yesterday
243 |      */
244 |     public static String getYesterdayDate() {
245 |         Calendar cal = Calendar.getInstance();
246 |         cal.setTime(new Date());
247 |         cal.add(Calendar.DAY_OF_YEAR, -1);
248 | 
249 |         Date date = cal.getTime();
250 | 
251 |         SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
252 |         return sdf.format(date);
253 |     }
254 | 
255 |     /**
256 |      * format date，reserve minute
257 |      * yyyyMMddHHmm
258 |      *
259 |      * @param date
260 |      * @return
261 |      */
262 |     public static String formatTimeMinute(Date date) {
263 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm");
264 |         return sdf.format(date);
265 |     }
266 | 
267 |     public static String fileToString(File file) throws Exception{
268 |         FileInputStream inStream = new FileInputStream(file);
269 |         ByteArrayOutputStream outStream = new ByteArrayOutputStream();
270 |         try {
271 | 
272 |             Boolean reading = true;
273 |             while (reading) {
274 |                 int c = inStream.read();
275 |                 if(c == -1){
276 |                     reading = false;
277 |                 }else{
278 |                     outStream.write(c);
279 |                 }
280 |             }
281 |             outStream.flush();
282 |         }catch (Exception e){
283 |             System.err.println(e.getMessage());
284 |         }finally {
285 |             inStream.close();
286 |         }
287 |         return new String(outStream.toByteArray(), "UTF-8");
288 |     }
289 | 
290 | }
291 | 


--------------------------------------------------------------------------------
/common/src/main/java/com/intel/streaming_benchmark/common/StreamBenchConfig.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.intel.streaming_benchmark.common;
19 | 
20 | /**
21 |  * All name of configurations used in StreamBench are defined here. Later I plan to refactor
22 |  * property name. With this mapping layer, the underlying Java/Scala code don't need to be
23 |  * changed.
24 |  */
25 | public class StreamBenchConfig {
26 |   // =====================================
27 |   // General StreamBench Conf
28 |   // =====================================
29 |   public static String ZK_HOST = "streambench.zkHost";
30 | 
31 |   public static String CONSUMER_GROUP = "streambench.kafka.consumerGroup";
32 | 
33 |   public static String KAFKA_BROKER_LIST = "streambench.kafka.brokerList";
34 | 
35 |   public static String DATAGEN_TIME = "streambench.dataGen.time";
36 | 
37 |   public static String DATAGEN_THROUGHPUT = "streambench.dataGen.throughput";
38 | 
39 | 
40 | 
41 |   // =====================================
42 |   // Data Generator Related Conf
43 |   // =====================================
44 | //  public static String DATAGEN_RECORDS_PRE_INTERVAL = "hibench.streambench.datagen.recordsPerInterval";
45 | //
46 | //  public static String DATAGEN_INTERVAL_SPAN = "hibench.streambench.datagen.intervalSpan";
47 | //
48 | //  public static String DATAGEN_TOTAL_RECORDS = "hibench.streambench.datagen.totalRecords";
49 | //
50 | //  public static String DATAGEN_TOTAL_ROUNDS = "hibench.streambench.datagen.totalRounds";
51 | //
52 | //  public static String DATAGEN_RECORD_LENGTH = "hibench.streambench.datagen.recordLength";
53 | //
54 | //  public static String DATAGEN_PRODUCER_NUMBER = "hibench.streambench.datagen.producerNumber";
55 |   // =====================================
56 |   // Spark Streaming Related Conf
57 |   // =====================================
58 | //  public static String SPARK_BATCH_INTERVAL = "hibench.streambench.spark.batchInterval";
59 | //
60 | //  public static String SPARK_CHECKPOINT_PATH = "hibench.streambench.spark.checkpointPath";
61 | //
62 | //  public static String SPARK_ENABLE_WAL = "hibench.streambench.spark.enableWAL";
63 | //
64 | //  public static String SPARK_USE_DIRECT_MODE = "hibench.streambench.spark.useDirectMode";
65 | //
66 | //  public static String SPARK_STORAGE_LEVEL = "hibench.streambench.spark.storageLevel";
67 | //
68 | //  public static String SPARK_RECEIVER_NUMBER = "hibench.streambench.spark.receiverNumber";
69 | 
70 |   // ======================================
71 |   // Flink Related Conf
72 |   // ======================================
73 | 
74 | 
75 |   public static String FLINK_CHECKPOINTDURATION = "streambench.flink.checkpointDuration";
76 | 
77 |   public static String FLINK_RESULT_DIR = "streambench.flink.result.dir";
78 | 
79 |   public static String FLINK_TIMETYPE = "streambench.flink.timeType";
80 | 
81 | 
82 |   public static String SQL_LOCATION= "streambench.flink.sqlLocation";
83 | 
84 | 
85 | 
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/QueryConfig.scala:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark.common
 2 | 
 3 | object QueryConfig {
 4 |   val queryScene: Map[String, String] = Map(
 5 |     "q1.sql" -> "Shopping_record",
 6 |     "q2.sql" -> "Real_time_Advertising",
 7 |     "q3.sql" -> "Real_time_Advertising",
 8 |     "q4.sql" -> "Real_time_Advertising",
 9 |     "q5.sql" -> "User_visit_session_record",
10 |     "q6.sql" -> "User_visit_session_record",
11 |     "q7.sql" -> "User_visit_session_record",
12 |     "q8.sql" -> "User_visit_session_record",
13 |     "q9.sql" -> "Real_time_Advertising",
14 |     "q10.sql" -> "User_visit_session_record",
15 |     "q11.sql" -> "User_visit_session_record",
16 |     "q12.sql" -> "User_visit_session_record"
17 |   )
18 | 
19 |   val queryTables: Map[String, String] = Map(
20 |     "q1.sql" -> "shopping",
21 |     "q2.sql" -> "click",
22 |     "q3.sql" -> "imp",
23 |     "q4.sql" -> "dau,click",
24 |     "q5.sql" -> "userVisit",
25 |     "q6.sql" -> "userVisit",
26 |     "q7.sql" -> "userVisit",
27 |     "q8.sql" -> "userVisit",
28 |     "q9.sql" -> "dau,click",
29 |     "q10.sql" -> "userVisit",
30 |     "q11.sql" -> "userVisit",
31 |     "q12.sql" -> "userVisit"
32 |   )
33 | 
34 |   def getScene(query: String): String ={
35 |     if (queryScene.contains(query)) {
36 |       queryScene(query)
37 |     } else {
38 |       throw new IllegalArgumentException(s"$query does not exist!")
39 |     }
40 |   }
41 | 
42 |   def getTables(query: String): String ={
43 |     if (queryTables.contains(query)) {
44 |       queryTables(query)
45 |     } else {
46 |       throw new IllegalArgumentException(s"$query does not exist!")
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/Schema.scala:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark.common
 2 | 
 3 | trait Schema {
 4 | 
 5 |   def getFieldNames: Array[String]
 6 | 
 7 |   def getFieldTypes: Array[String]
 8 | 
 9 | 
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/common/src/main/scala/com/intel/streaming_benchmark/common/TableSchemaProvider.scala:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark.common
  2 | 
  3 | case class Column(
  4 |     name: String,
  5 |     index: Int,
  6 |     types: String
  7 | 
  8 | )
  9 | 
 10 | trait TableSchema extends Schema {
 11 | 
 12 |   val columns: Array[Column]
 13 | 
 14 |   def getFieldNames: Array[String] = columns.map(_.name)
 15 | 
 16 |   def getFieldTypes: Array[String] =
 17 |     columns.map(column => column.types)
 18 | 
 19 | }
 20 | 
 21 | object Shopping extends TableSchema {
 22 | 
 23 |   override val columns = Array[Column](
 24 |     Column("userId", 0, "String"),
 25 |     Column("commodity", 1, "String"),
 26 |     Column("times", 2, "LONG")
 27 |   )
 28 | }
 29 | 
 30 | object Click extends TableSchema {
 31 | 
 32 |   override val columns = Array[Column](
 33 |     Column("click_time", 0, "Long"),
 34 |     Column("strategy", 1, "String"),
 35 |     Column("site", 2, "String"),
 36 |     Column("pos_id", 3, "String"),
 37 |     Column("poi_id", 4, "String"),
 38 |     Column("device_id", 5, "String")
 39 |   )
 40 | }
 41 | 
 42 | object Imp extends TableSchema {
 43 | 
 44 |   override val columns = Array[Column](
 45 |     Column("imp_time", 0, "Long"),
 46 |     Column("strategy", 1, "String"),
 47 |     Column("site", 2, "String"),
 48 |     Column("pos_id", 3, "String"),
 49 |     Column("poi_id", 4, "String"),
 50 |     Column("cost", 5, "Double"),
 51 |     Column("device_id", 6, "String")
 52 |   )
 53 | }
 54 | 
 55 | object Dau extends TableSchema {
 56 | 
 57 |   override val columns = Array[Column](
 58 |     Column("dau_time", 0, "Long"),
 59 |     Column("device_id", 1, "String")
 60 |   )
 61 | }
 62 | 
 63 | object UserVisit extends TableSchema {
 64 | 
 65 |   override val columns = Array[Column](
 66 |     Column("date", 0, "String"),
 67 |     Column("userId", 1, "Long"),
 68 |     Column("sessionId", 2, "String"),
 69 |     Column("pageId", 3, "Long"),
 70 |     Column("actionTime", 4, "String"),
 71 |     Column("searchKeyword", 5, "String"),
 72 |     Column("clickCategoryId", 6, "String"),
 73 |     Column("clickProductId", 7, "String"),
 74 |     Column("orderCategoryIds", 8, "String"),
 75 |     Column("orderProductIds", 9, "String"),
 76 |     Column("payCategoryIds", 10, "String"),
 77 |     Column("payProductIds", 11, "String"),
 78 |     Column("cityId", 12, "String")
 79 |   )
 80 | }
 81 | 
 82 | object TableSchemaProvider {
 83 |   val schemaMap: Map[String, Schema] = Map(
 84 |     "shopping" -> Shopping,
 85 |     "click" -> Click,
 86 |     "imp" -> Imp,
 87 |     "dau" -> Dau,
 88 |     "userVisit" -> UserVisit
 89 |   )
 90 | 
 91 |   def getSchema(tableName: String): Schema = {
 92 |     if (schemaMap.contains(tableName)) {
 93 |       schemaMap(tableName)
 94 |     } else {
 95 |       throw new IllegalArgumentException(s"$tableName does not exist!")
 96 |     }
 97 |   }
 98 | 
 99 | }
100 | 


--------------------------------------------------------------------------------
/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
1 | streambench.zkHost                       10.1.2.166:2181
2 | streambench.kafka.brokerList             10.1.2.143:9093,10.1.2.143:9094,10.1.2.143:9095,10.1.2.143:9096,10.1.2.143:9097,10.1.2.143:9098,10.1.2.143:9099,10.1.2.143:9100,10.1.2.143:9101,10.1.2.143:9102,10.1.2.159:9093,10.1.2.159:9094,10.1.2.159:9095,10.1.2.159:9096,10.1.2.159:9097,10.1.2.159:9098,10.1.2.159:9099,10.1.2.159:9100,10.1.2.159:9101,10.1.2.159:9102,10.1.2.166:9093,10.1.2.166:9094,10.1.2.166:9095,10.1.2.166:9096,10.1.2.166:9097,10.1.2.166:9098,10.1.2.166:9099,10.1.2.166:9100,10.1.2.166:9101,10.1.2.166:9102,10.1.2.164:9093,10.1.2.164:9094,10.1.2.164:9095,10.1.2.164:9096,10.1.2.164:9097,10.1.2.164:9098,10.1.2.164:9099,10.1.2.164:9100,10.1.2.164:9101,10.1.2.164:9102
3 | streambench.kafka.consumerGroup          kafka_to_hdfs2
4 | 


--------------------------------------------------------------------------------
/conf/dataGenHosts:
--------------------------------------------------------------------------------
1 | 10.1.2.143
2 | 10.1.2.159
3 | 10.1.2.164
4 | 10.1.2.166


--------------------------------------------------------------------------------
/conf/env:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BASE_HOME=$(cd `dirname $0`;pwd)
 4 | BENCH_HOME=$(dirname $BASE_HOME)
 5 | 
 6 | export DATAGEN_TIME=200
 7 | export THREAD_PER_NODE=10
 8 | export CONF=$BENCH_HOME/conf/benchmarkConf.yaml
 9 | export FLINK_HOME=/opt/Beaver/flink
10 | export SPARK_HOME=/opt/Beaver/spark
11 | 


--------------------------------------------------------------------------------
/conf/queriesToRun:
--------------------------------------------------------------------------------
1 | q9.sql
2 | 


--------------------------------------------------------------------------------
/dataGen/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>streaming_benchmark</artifactId>
  7 |         <groupId>com.intel.streaming_benchmark</groupId>
  8 |         <version>1.0-SNAPSHOT</version>
  9 |     </parent>
 10 |     <modelVersion>4.0.0</modelVersion>
 11 | 
 12 |     <artifactId>dataGen</artifactId>
 13 |     <dependencies>
 14 |         <dependency>
 15 |             <groupId>com.alibaba</groupId>
 16 |             <artifactId>fastjson</artifactId>
 17 |             <version>1.2.58</version>
 18 |         </dependency>
 19 | 
 20 |         <dependency>
 21 |             <groupId>org.apache.kafka</groupId>
 22 |             <artifactId>kafka_2.11</artifactId>
 23 |             <version>0.10.2.1</version>
 24 | 
 25 |         </dependency>
 26 | 
 27 |         <dependency>
 28 |             <groupId>com.intel.streaming_benchmark</groupId>
 29 |             <artifactId>common</artifactId>
 30 |             <version>1.0-SNAPSHOT</version>
 31 |         </dependency>
 32 | 
 33 | 
 34 |     </dependencies>
 35 |     <build>
 36 |         <plugins>
 37 |             <plugin>
 38 |                 <groupId>org.apache.maven.plugins</groupId>
 39 |                 <artifactId>maven-shade-plugin</artifactId>
 40 |                 <version>2.4.3</version>
 41 |                 <configuration>
 42 |                     <!-- put your configurations here -->
 43 |                     <filters>
 44 |                         <filter>
 45 |                             <artifact>*:*</artifact>
 46 |                             <excludes>
 47 |                                 <exclude>META-INF/*.SF</exclude>
 48 |                                 <exclude>META-INF/*.DSA</exclude>
 49 |                                 <exclude>META-INF/*.RSA</exclude>
 50 |                             </excludes>
 51 |                         </filter>
 52 |                     </filters>
 53 |                     <artifactSet>
 54 |                         <excludes>
 55 |                             <exclude>junit:junit</exclude>
 56 |                             <exclude>org.slf4j:slf4j-simple</exclude>
 57 |                             <exclude>org.slf4j:slf4j-log4j12</exclude>
 58 |                             <exclude>com.101tec:zkclient</exclude>
 59 |                             <exclude>com.github.sgroschupf:zkclient</exclude>
 60 |                             <exclude>org.apache.httpcomponents:httpclient</exclude>
 61 |                         </excludes>
 62 |                     </artifactSet>
 63 |                 </configuration>
 64 |                 <executions>
 65 |                     <execution>
 66 |                         <phase>package</phase>
 67 |                         <goals>
 68 |                             <goal>shade</goal>
 69 |                         </goals>
 70 |                     </execution>
 71 |                 </executions>
 72 |             </plugin>
 73 |             <plugin>
 74 |                 <groupId>net.alchim31.maven</groupId>
 75 |                 <artifactId>scala-maven-plugin</artifactId>
 76 |                 <version>3.2.0</version>
 77 |                 <executions>
 78 |                     <execution>
 79 |                         <id>compile-scala</id>
 80 |                         <phase>compile</phase>
 81 |                         <goals>
 82 |                             <goal>add-source</goal>
 83 |                             <goal>compile</goal>
 84 |                         </goals>
 85 |                     </execution>
 86 |                     <execution>
 87 |                         <id>test-compile-scala</id>
 88 |                         <phase>test-compile</phase>
 89 |                         <goals>
 90 |                             <goal>add-source</goal>
 91 |                             <goal>testCompile</goal>
 92 |                         </goals>
 93 |                     </execution>
 94 |                 </executions>
 95 |                 <configuration>
 96 |                     <scalaVersion>2.11.8</scalaVersion>
 97 |                 </configuration>
 98 |             </plugin>
 99 |             <plugin>
100 |                 <groupId>org.codehaus.mojo</groupId>
101 |                 <artifactId>build-helper-maven-plugin</artifactId>
102 |                 <version>1.4</version>
103 |                 <executions>
104 |                     <execution>
105 |                         <id>add-source</id>
106 |                         <phase>generate-sources</phase>
107 |                         <goals>
108 |                             <goal>add-source</goal>
109 |                         </goals>
110 |                         <configuration>
111 |                             <sources>
112 |                                 <source>../common/src/main/scala</source>
113 |                                 <source>../common/src/main/java</source>
114 |                                 <!-- 我们可以通过在这里添加多个source节点，来添加任意多个源文件夹 -->
115 |                             </sources>
116 |                         </configuration>
117 |                     </execution>
118 |                 </executions>
119 |             </plugin>
120 |             <plugin>
121 |                 <groupId>org.apache.maven.plugins</groupId>
122 |                 <artifactId>maven-compiler-plugin</artifactId>
123 |                 <configuration>
124 |                     <source>1.8</source>
125 |                     <target>1.8</target>
126 |                 </configuration>
127 |             </plugin>
128 |         </plugins>
129 |         <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
130 |             <plugins>
131 |                 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
132 |                 <plugin>
133 |                     <artifactId>maven-clean-plugin</artifactId>
134 |                     <version>3.1.0</version>
135 |                 </plugin>
136 |                 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
137 |                 <plugin>
138 |                     <artifactId>maven-resources-plugin</artifactId>
139 |                     <version>3.0.2</version>
140 |                 </plugin>
141 |                 <plugin>
142 |                     <artifactId>maven-compiler-plugin</artifactId>
143 |                     <version>3.8.0</version>
144 |                 </plugin>
145 |                 <plugin>
146 |                     <artifactId>maven-surefire-plugin</artifactId>
147 |                     <version>2.22.1</version>
148 |                 </plugin>
149 |                 <plugin>
150 |                     <artifactId>maven-jar-plugin</artifactId>
151 |                     <version>3.0.2</version>
152 |                 </plugin>
153 |                 <plugin>
154 |                     <artifactId>maven-install-plugin</artifactId>
155 |                     <version>2.5.2</version>
156 |                 </plugin>
157 |                 <plugin>
158 |                     <artifactId>maven-deploy-plugin</artifactId>
159 |                     <version>2.8.2</version>
160 |                 </plugin>
161 |                 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
162 |                 <plugin>
163 |                     <artifactId>maven-site-plugin</artifactId>
164 |                     <version>3.7.1</version>
165 |                 </plugin>
166 |                 <plugin>
167 |                     <groupId>net.alchim31.maven</groupId>
168 |                     <artifactId>scala-maven-plugin</artifactId>
169 |                     <version>3.2.2</version>
170 |                     <executions>
171 |                         <execution>
172 |                             <id>scala-compile-first</id>
173 |                             <phase>process-resources</phase>
174 |                             <goals>
175 |                                 <goal>add-source</goal>
176 |                                 <goal>compile</goal>
177 |                             </goals>
178 |                         </execution>
179 |                         <execution>
180 |                             <id>scala-test-compile</id>
181 |                             <phase>process-test-resources</phase>
182 |                             <goals>
183 |                                 <goal>testCompile</goal>
184 |                             </goals>
185 |                         </execution>
186 |                     </executions>
187 |                 </plugin>
188 |                 <plugin>
189 |                     <groupId>org.apache.maven.plugins</groupId>
190 |                     <artifactId>maven-compiler-plugin</artifactId>
191 |                     <version>3.2</version>
192 |                     <configuration>
193 |                         <source>1.8</source>
194 |                         <target>1.8</target>
195 |                         <encoding>UTF-8</encoding>
196 |                     </configuration>
197 |                 </plugin>
198 | 
199 | 
200 |             </plugins>
201 |         </pluginManagement>
202 |     </build>
203 | </project>


--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/Datagen.java:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark;
 2 | 
 3 | import com.intel.streaming_benchmark.common.ConfigLoader;
 4 | import com.intel.streaming_benchmark.common.QueryConfig;
 5 | import com.intel.streaming_benchmark.utils.GetProducer;
 6 | 
 7 | import java.util.concurrent.ExecutorService;
 8 | import java.util.concurrent.Executors;
 9 | 
10 | public class Datagen {
11 |     public static void main(String[] args) {
12 | 
13 |         System.out.println("------------------Already input args[]------------------");
14 |         //the time to generate data
15 |         Long time = Long.valueOf(args[0]);
16 |         System.out.println("------------------time: " + time + "s-------------------");
17 |         //the topic of Kafka
18 |         String sqlName = args[2];
19 |         System.out.println("------------------sql: " + sqlName + "------------------");
20 |         String scene = QueryConfig.getScene(sqlName);
21 | 
22 |         ConfigLoader configLoader = new ConfigLoader(args[3]);
23 |         System.out.println("------------------config: " + args[3] + "---------------");
24 |         //the number of thread for datagen
25 |         int producerNumber = Integer.valueOf(args[1]);
26 |         System.out.println("----------Thread_per_node:" + producerNumber + "--------");
27 |         ExecutorService pool = Executors.newFixedThreadPool(producerNumber);
28 |         for(int i = 0; i < producerNumber; i++){
29 |             pool.execute(new GetProducer(scene, time, configLoader));
30 |         }
31 |         System.out.println("============ StreamBench Data Generator ============");
32 |         pool.shutdown();
33 |         System.out.println("======== StreamBench Data Generator Finished ========");
34 | 
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/ConfigLoader.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.intel.streaming_benchmark.utils;
19 | 
20 | import java.io.BufferedReader;
21 | import java.io.FileNotFoundException;
22 | import java.io.FileReader;
23 | import java.io.IOException;
24 | import java.util.HashMap;
25 | import java.util.Map;
26 | 
27 | public class ConfigLoader {
28 |     private String ConfigFileName = null;
29 |     private Map store;
30 | 
31 |     public ConfigLoader(String filename){
32 |         ConfigFileName = filename;
33 |         store = new HashMap();
34 |         // Load and parse config
35 |         try {
36 |             BufferedReader br = new BufferedReader(new FileReader(filename));
37 |             String line = br.readLine();
38 |             while(line != null){
39 |                 if ((line.length()>0) && (line.charAt(0)!='#')) {
40 |                     String[] words = line.split("\\s+");
41 |                     if (words.length == 2) {
42 |                         String key = words[0];
43 |                         String value = words[1];
44 |                         store.put(key, value);
45 |                     } else if (words.length == 1) {
46 |                         String key = words[0];
47 |                         store.put(key, "");
48 |                     } else {
49 |                         if (!line.startsWith("hibench"))
50 |                             System.out.println("Warning: unknown config parsed, skip:" + line);
51 |                     }
52 |                 }
53 |                 line = br.readLine();
54 |             }
55 |         } catch (FileNotFoundException e) {
56 |             System.out.println("ERROR: Config file not found! Should not happen. Caused by:");
57 |         } catch (IOException e) {
58 |             System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:");
59 |             e.printStackTrace();
60 |         }
61 |     }
62 | 
63 |     public String getProperty(String key){
64 |         if (store.containsKey(key))
65 |             return (String) store.get(key);
66 |         else {
67 |             System.out.println("ERROR: Unknown config key:" + key);
68 |             return null;
69 |         }
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/Constants.java:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark.utils;
 2 | 
 3 | /**
 4 |  * Project Basic dataUtil.Constants
 5 |  */
 6 | public interface Constants {
 7 | 
 8 |     String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR = "|";
 9 |     String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR_ESCAOE = "\\|";
10 | 
11 |     /**
12 |      * Project Configuration dataUtil.Constants
13 |      */
14 |     String JDBC_DRIVER = "jdbc.driver";
15 |     String JDBC_DATASOURCE_SIZE = "jdbc.datasource.size";
16 |     String JDBC_URL = "jdbc.url";
17 |     String JDBC_USER = "jdbc.user";
18 |     String JDBC_PASSWORD = "jdbc.password";
19 | 
20 |     String SPARK_SQL_JDBC_URL = "spark.sql.jdbc.url";
21 |     String SPARK_SQL_JDBC_URL_PROD = "spark.sql.jdbc.url.prod";
22 | 
23 |     String SPARK_LOCAL = "spark.local";
24 | 
25 |     String KAFKA_METADATA_BROKER_LIST = "metadata.broker.list";
26 |     String KAFKA_TOPICS = "kafka.topics";
27 |     String KAFKA_ZOOKEEPER_URL = "zookeeper.connect.url";
28 | 
29 | 
30 |     /**
31 |      * Spark Application dataUtil.Constants
32 |      */
33 |     String SPARK_APP_NAME_SESSION = "UserVisitSessionAnalyzeSpark_";
34 |     String SPARK_APP_NAME_PRODUCT = "AreaTop3ProductSpark_";
35 |     String SPARK_APP_NAME_AD = "AdClickRealTimeStateSpark";
36 | 
37 |     String FIELD_ACTION_TIME = "action_time";
38 |     String FIELD_SESSION_ID = "session_id";
39 |     String FIELD_SEARCH_KEYWORDS = "search_keyword";
40 |     String FIELD_CLICK_CATEGORY_ID = "click_category_id";
41 |     String FIELD_AGE = "age";
42 |     String FIELD_PROFESSIONAL = "professional";
43 |     String FIELD_CITY = "city";
44 |     String FIELD_SEX = "sex";
45 | 
46 | 
47 |     String FIELD_CATEGORY_ID = "categoryId";
48 |     String FIELD_CLICK_COUNT = "clickCount";
49 |     String FIELD_ORDER_COUNT = "orderCount";
50 |     String FIELD_PAY_COUNT = "payCount";
51 | 
52 |     String SESSION_COUNT = "session_count";
53 | 
54 |     String TIME_PERIOD_1s_4s = "1s_4s";
55 |     String TIME_PERIOD_4s_7s = "4s_7s";
56 |     String TIME_PERIOD_7s_10s = "7s_10s";
57 |     String TIME_PERIOD_10s_30s = "10s_30s";
58 |     String TIME_PERIOD_30s_60s = "30s_60s";
59 |     String TIME_PERIOD_1m_3m = "1m_3m";
60 |     String TIME_PERIOD_3m_10m = "3m_10m";
61 |     String TIME_PERIOD_10m_30m = "10m_30m";
62 |     String TIME_PERIOD_30m = "30m";
63 | 
64 |     String STEP_PERIOD_1_3 = "1_3";
65 |     String STEP_PERIOD_4_6 = "4_6";
66 |     String STEP_PERIOD_7_9 = "7_9";
67 |     String STEP_PERIOD_10_29 = "10_29";
68 |     String STEP_PERIOD_30_59 = "30_59";
69 |     String STEP_PERIOD_60 = "60";
70 | 
71 |     /**
72 |      * Source Table Column Names
73 |      */
74 |     String UVA_FIELD_USER_ID = "user_id";
75 |     String UVA_FIELD_DATE = "date";
76 |     String UVA_FIELD_SESSION_ID = "session_id";
77 |     String UVA_FIELD_ACTION_TIME = "action_time";
78 | 
79 |     /**
80 |      * Task dataUtil.Constants
81 |      */
82 |     String PARAM_SAMPLE_TYPE = "sampleType";
83 |     String PARAM_SESSION_RATIO = "sessionRatio";
84 |     String PARAM_START_DATE = "startDate";
85 |     String PARAM_END_DATE = "endDate";
86 |     String PARAM_START_AGE = "startAge";
87 |     String PARAM_END_AGE = "endAge";
88 |     String PARAM_PROFESSIONALS = "professionals";
89 |     String PARAM_CITIES = "cities";
90 |     String PARAM_SEX = "sex";
91 |     String PARAM_KEYWORDS = "keywords";
92 |     String PARAM_CATEGORY_IDS = "categoryIds";
93 |     String FIELD_VISIT_LENGTH = "visitLength";
94 |     String FIELD_STEP_LENGTH = "stepLength";
95 |     String FIELD_START_TIME = "startTime";
96 | }
97 | 


--------------------------------------------------------------------------------
/dataGen/src/main/java/com/intel/streaming_benchmark/utils/GetProducer.java:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark.utils;
  2 | 
  3 | import com.alibaba.fastjson.JSONObject;
  4 | import com.intel.streaming_benchmark.ClickProducer;
  5 | import com.intel.streaming_benchmark.common.ConfigLoader;
  6 | import com.intel.streaming_benchmark.common.StreamBenchConfig;
  7 | import org.apache.kafka.clients.producer.KafkaProducer;
  8 | import org.apache.kafka.clients.producer.ProducerConfig;
  9 | import org.apache.kafka.clients.producer.ProducerRecord;
 10 | import java.net.InetAddress;
 11 | import java.text.SimpleDateFormat;
 12 | import java.util.Properties;
 13 | import java.util.Random;
 14 | 
 15 | public class GetProducer extends Thread{
 16 | 
 17 |     private String topic;
 18 |     private Long time;
 19 |     private ConfigLoader cl;
 20 |     public GetProducer(String topic, Long time , ConfigLoader cl){
 21 | 
 22 |         super();
 23 |         this.topic = topic;
 24 |         this.time = time;
 25 |         this.cl = cl;
 26 |     }
 27 | 
 28 |     @Override
 29 |     public void run() {
 30 | 
 31 |         System.out.println(Thread.currentThread().getName() + "=======");
 32 | 
 33 |         if (topic.equals("Shopping_record")){
 34 |             datagenTopic1(cl);
 35 |         }
 36 |         else if(topic.equals("Real_time_Advertising")){
 37 |             datagenTopic2(cl);
 38 |         }
 39 |         else if(topic.equals("User_visit_session_record")){
 40 |              new ClickProducer(time, cl).run();
 41 |         }else{
 42 |             System.out.println("No such scene!");
 43 |         }
 44 | 
 45 |     }
 46 | 
 47 |     private KafkaProducer createProducer(ConfigLoader cl) {
 48 | 
 49 |         Properties properties = new Properties();
 50 |         properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
 51 |         properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
 52 |         properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST));
 53 |         return new KafkaProducer<>(properties);
 54 |     }
 55 | 
 56 |     private void datagenTopic1(ConfigLoader cl) {
 57 | 
 58 |         String[] commodities = {"milk", "bag", "book","desk","sweet", "food", "disk","pen", "shoe", "animal","phone", "paper", "cup", "light", "glass", "power", "GameBoy", "chopsticks"};
 59 |         Random random = new Random();
 60 |         KafkaProducer producer = createProducer(cl);
 61 |         SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
 62 |         long start = System.currentTimeMillis();
 63 |         Boolean flag = true;
 64 |         Long count = 0L;
 65 |         Long totalLength = 0L;
 66 |         String threadName = Thread.currentThread().getName();
 67 | 
 68 |         try {
 69 | 
 70 |             InetAddress address = InetAddress.getLocalHost();
 71 |             String hostName = address.getHostName().toString();
 72 |             while(flag){
 73 |                 byte[] message = (hostName + "_" + count + "_" + threadName + "," +  commodities[random.nextInt(commodities.length)] +"," + System.currentTimeMillis()).getBytes();
 74 |                 producer.send(new ProducerRecord("shopping", message));
 75 |                 count = count + 1;
 76 |                 totalLength = totalLength + message.length;
 77 |                 if((System.currentTimeMillis() - start) > time*1000){
 78 |                     flag = false;
 79 |                 }
 80 |             }
 81 |         }catch (Exception e){
 82 |             e.printStackTrace();
 83 |         }
 84 | 
 85 |         producer.close();
 86 |     }
 87 | 
 88 |     private void datagenTopic2(ConfigLoader cl){
 89 |         Long count = 0L;
 90 |         Long totalLength = 0L;
 91 | 
 92 |         KafkaProducer producer = createProducer(cl);
 93 |         long start = System.currentTimeMillis();
 94 |         Boolean flag = true;
 95 | 
 96 |         Random random = new Random();
 97 |         String strategy_all[] ={"t1","t2","t3","t4","t5","t6"};//t1:strategy1, t2:strategy2,,, t6:strategy6
 98 |         String site_all[] ={"1","2","3"};//1:baidu media,2:toutiao media,3: weibo media
 99 |         String pos_id_all[] ={"a","b","c"};//a:ad space,b:ad space,c:ad space
100 |         String poi_id_all[] ={"1001","1002","1003"};//1001:ad material,1002:ad material,1003:ad material
101 |         String cost_all[] ={"0.01","0.02","0.03"};//cost
102 |         String device_id_all[] ={"aaaaa","bbbbb","ccccc","ddddd","eeeee","fffff","ggggg"};//device
103 |         while(flag){
104 | 
105 |             try{
106 |                 JSONObject imp = new JSONObject();
107 |                 imp.put("imp_time",Long.valueOf(System.currentTimeMillis()));
108 |                 imp.put("strategy",strategy_all[random.nextInt(strategy_all.length-1)]);
109 |                 imp.put("site",pos_id_all[random.nextInt(site_all.length-1)]);
110 |                 imp.put("pos_id",strategy_all[random.nextInt(pos_id_all.length-1)]);
111 |                 imp.put("poi_id",poi_id_all[random.nextInt(poi_id_all.length-1)]);
112 |                 imp.put("cost",cost_all[random.nextInt(cost_all.length-1)]);
113 |                 imp.put("device_id",device_id_all[random.nextInt(device_id_all.length-1)]);
114 |                 //send exposure log
115 |                 byte[] imp_message  = imp.toJSONString().getBytes();
116 |                 producer.send(new ProducerRecord("imp",imp_message));
117 |                 count++;
118 |                 totalLength = totalLength + imp_message.length;
119 | 
120 |                 if (random.nextInt(4) ==1){//the probablity of triggerring Click
121 |                     JSONObject click =imp;
122 |                     click.remove("imp_time");
123 |                     click.remove("cost");
124 |                     click.put("click_time",Long.valueOf(System.currentTimeMillis()));
125 |                     byte[] click_message = click.toJSONString().getBytes();
126 |                     producer.send(new ProducerRecord("click",click_message));
127 |                     count++;
128 |                     totalLength = totalLength + click_message.length;
129 | 
130 |                     if (random.nextInt(2) ==1){//dau time,?50
131 |                         JSONObject dau = new JSONObject();
132 |                         dau.put("dau_time",Long.valueOf(System.currentTimeMillis()));
133 |                         dau.put("device_id",click.get("device_id").toString());
134 |                         byte[] dau_message = dau.toJSONString().getBytes();
135 |                         producer.send(new ProducerRecord("dau",dau_message));
136 |                         count++;
137 |                         totalLength = totalLength + dau_message.length;
138 |                     }
139 |                 }
140 |                 if((System.currentTimeMillis() - start) > time*1000){
141 |                     flag = false;
142 |                 }
143 | 
144 |             }catch (Exception e){
145 |                 e.printStackTrace();
146 |             }
147 |         }
148 |     }
149 | 
150 | 
151 | }
152 | 


--------------------------------------------------------------------------------
/dataGen/src/main/scala/com/intel/streaming_benchmark/ClickProducer.scala:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark
  2 | 
  3 | import java.net.InetAddress
  4 | import java.util.Properties
  5 | import com.alibaba.fastjson.JSONObject
  6 | import com.intel.streaming_benchmark.click.{cityTypeSize, citys, keywordSize, keywords, productNumbers, professionalTypeSize, professionals, random, sexTypeSize, sexs, userNumbers}
  7 | import com.intel.streaming_benchmark.common.{ConfigLoader, DateUtils, StreamBenchConfig}
  8 | import com.intel.streaming_benchmark.utils.Constants
  9 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
 10 | 
 11 | import scala.collection.mutable.ArrayBuffer
 12 | 
 13 | class ClickProducer(val time:Long, val cl: ConfigLoader){
 14 |   var total = 0L
 15 |   var length = 0L
 16 |   var threadName = Thread.currentThread().getName
 17 |   var hostName = InetAddress.getLocalHost.getHostName
 18 |   var seed = 0
 19 |   def run(): Unit = {
 20 |     //  mockUserInfo()
 21 |     //  mockProductInfo
 22 |     mockUserVisitAction(time)
 23 | 
 24 |   }
 25 | 
 26 |   private def createProducer = {
 27 |     val properties = new Properties
 28 |     properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
 29 |     properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer")
 30 |     properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST))
 31 |     new KafkaProducer[Array[Byte], Array[Byte]](properties)
 32 |   }
 33 | 
 34 | 
 35 |   /**
 36 |     * Simulation code for generating user information
 37 |     *
 38 |     * @param
 39 |     * @return
 40 |     */
 41 |   def mockUserInfo() = {
 42 |     val producer = createProducer
 43 |     for (i <- 0 until userNumbers) {
 44 |       val userId = i.toLong
 45 |       val age = (total % 60).toInt
 46 |       val userName = s"user_${i}"
 47 |       val name = s"name_${i}"
 48 |       val professional = professionals((total % professionalTypeSize).toInt)
 49 |       val city = citys((total%cityTypeSize).toInt)._2
 50 |       val sex = sexs((total % sexTypeSize).toInt)
 51 |       producer.send(new ProducerRecord("userInfo", UserInfo(
 52 |         userId, userName, name,
 53 |         age, professional, city, sex).formatted(",").getBytes()))
 54 |     }
 55 |   }
 56 | 
 57 |   /**
 58 |     * Simulation code for generating data of userVisitAction
 59 |     *
 60 |     * @param
 61 |     * @return
 62 |     */
 63 | 
 64 |   def mockUserVisitAction(time: Long) = {
 65 |     val date: String = DateUtils.getTodayDate()
 66 |     val producer = createProducer
 67 |     val start: Long = System.currentTimeMillis()
 68 | 
 69 |     // get action time according the time of last action
 70 |     def getCurrentActionTime(preActionTime: Long): Long = {
 71 |       preActionTime + total % 60
 72 |     }
 73 | 
 74 |     // generate a produceID and productCategoryNumber
 75 |     def generateProduceAndCategoryId(): (Long, Long) = {
 76 |       val produceID = total % productNumbers
 77 |         (produceID, produceID % click.productCategoryNumbers)
 78 |     }
 79 | 
 80 |     // generate date for pageView
 81 |     def generatePageView(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
 82 |       if (times < 20) {
 83 |         // pageView ID:[0,100)
 84 |         val pageId: Long = total % 100
 85 |         val actionTime: Long = getCurrentActionTime(preActionTime)
 86 |         val searchKeyword: String = ""
 87 |         val clickCategoryId: String = ""
 88 |         val clickProductId: String = ""
 89 |         val orderCategoryIds: String = ""
 90 |         val orderProductIds: String = ""
 91 |         val payCategoryIds: String = ""
 92 |         val payProductIds: String = ""
 93 | 
 94 |         // Add data
 95 |         val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
 96 |         producer.send(new ProducerRecord("userVisit", message))
 97 |         length = length + message.length
 98 |         total = total + 1
 99 |         // Go to next action
100 |         val (t1, t2, t3) =
101 |           if (times < 3) {
102 |             (4, 7, 9)
103 |           } else if (times < 10) {
104 |             (2, 4, 7)
105 |           } else {
106 |             (1, 2, 3)
107 |           }
108 |         val tmp = seed % 10
109 |         seed = seed + 1
110 |         if (tmp  <= t1) {
111 |           // Visit
112 |           generatePageView(times + 1, userId, sessionId, cityId, actionTime)
113 |         } else if (tmp  <= t2) {
114 |           // Search
115 |           generateSearch(times + 1, userId, sessionId, cityId, actionTime)
116 |         } else if (tmp <= t3) {
117 |           // Click
118 |           generateClick(times + 1, userId, sessionId, cityId, actionTime)
119 |         } else {
120 |           // nothings, finish
121 |         }
122 | 
123 |       }
124 |     }
125 | 
126 |     // Generate data for searching
127 |     def generateSearch(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
128 |       if (times < 20) {
129 |         // search ID:[100,150)
130 |         val pageId: Long = total % 50 + 100
131 |         val actionTime = getCurrentActionTime(preActionTime)
132 |         val searchKeyword: String = keywords((total % keywordSize).toInt)
133 |         val clickCategoryId: String = ""
134 |         val clickProductId: String = ""
135 |         val orderCategoryIds: String = ""
136 |         val orderProductIds: String = ""
137 |         val payCategoryIds: String = ""
138 |         val payProductIds: String = ""
139 | 
140 |         // Add data
141 |         val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
142 |         producer.send(new ProducerRecord("userVisit",message))
143 |         length = length + message.length
144 |         total = total + 1
145 |         // Go to next action
146 |         val (t1, t2, t3) =
147 |           if (times < 3) {
148 |             (2, 5, 8)
149 |           } else if (times < 10) {
150 |             (1, 2, 5)
151 |           } else {
152 |             (1, 2, 3)
153 |           }
154 |         val tmp = seed % 10
155 |         seed = seed + 1
156 |         if (tmp <= t1) {
157 |           // Visit
158 |           generatePageView(times + 1, userId, sessionId, cityId, actionTime)
159 |         } else if (tmp <= t2) {
160 |           // Search
161 |           generateSearch(times + 1, userId, sessionId, cityId, actionTime)
162 |         } else if (tmp <= t3) {
163 |           // Click
164 |           generateClick(times + 1, userId, sessionId, cityId, actionTime)
165 |         } else {
166 |           // nothings, finish
167 |         }
168 |       }
169 |     }
170 | 
171 |     // Generate data for clicking
172 |     def generateClick(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
173 |       if (times < 20) {
174 |         // click ID:[150,300)
175 |         val pageId: Long = total % 150 + 150
176 |         val actionTime = getCurrentActionTime(preActionTime)
177 |         val searchKeyword: String = ""
178 |         val (productID, categoryID) = generateProduceAndCategoryId()
179 |         val clickProductId: String = productID.toString
180 |         val clickCategoryId: String = categoryID.toString
181 |         val orderCategoryIds: String = ""
182 |         val orderProductIds: String = ""
183 |         val payCategoryIds: String = ""
184 |         val payProductIds: String = ""
185 | 
186 |         // Add data
187 |         val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
188 |         producer.send(new ProducerRecord("userVisit", message))
189 |         //  Go to next action
190 |         total = total + 1
191 |         length = length + message.length
192 | 
193 |         val (t1, t2, t3, t4) =
194 |           if (times < 3) {
195 |             (3, 6, 15, 18)
196 |           } else if (times < 10) {
197 |             (2, 4, 11, 15)
198 |           } else {
199 |             (1, 2, 6, 8)
200 |           }
201 | 
202 |         val tmp = seed % 20
203 |         seed = seed + 1
204 |         if (tmp <= t1) {
205 |           // Visit
206 |           generatePageView(times + 1, userId, sessionId, cityId, actionTime)
207 |         } else if (tmp <= t2) {
208 |           // Search
209 |           generateSearch(times + 1, userId, sessionId, cityId, actionTime)
210 |         } else if (tmp <= t3) {
211 |           // Order
212 |           generateOrder(times + 1, userId, sessionId, cityId, actionTime)
213 |         } else if (tmp <= t4) {
214 |           // Click
215 |           generateClick(times + 1, userId, sessionId, cityId, actionTime)
216 |         } else {
217 |           // nothings, finish
218 |         }
219 | 
220 |       }
221 |     }
222 | 
223 |     // Generate date for order
224 |     def generateOrder(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = {
225 |       if (times < 20) {
226 |         // order ID:[300,301)
227 |         val pageId: Long = 300
228 |         val actionTime = getCurrentActionTime(preActionTime)
229 |         val searchKeyword: String = ""
230 |         val clickProductId: String = ""
231 |         val clickCategoryId: String = ""
232 |         // There may be some product ordered together, range:[1,6)
233 |         val randomProductNumbers = total % 5 + 1
234 |         val bf = ArrayBuffer[(Long, Long)]()
235 |         for (j <- 0 until randomProductNumbers.toInt) {
236 |           bf += generateProduceAndCategoryId()
237 |         }
238 |         val nbf = bf.distinct
239 | 
240 |         val orderCategoryIds: String = nbf.map(_._2).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR)
241 |         val orderProductIds: String = nbf.map(_._1).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR)
242 |         val payCategoryIds: String = ""
243 |         val payProductIds: String = ""
244 | 
245 |         // Add data
246 |         val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
247 |         producer.send(new ProducerRecord("userVisit", message))
248 |         total = total + 1
249 |         length = length + message.length
250 |         // Go to next action
251 |         val (t1, t2, t3) =
252 |           if (times <= 3) {
253 |             (1, 2, 9)
254 |           } else if (times < 10) {
255 |             (1, 2, 8)
256 |           } else {
257 |             (1, 2, 7)
258 |           }
259 | 
260 |         val tmp = seed % 10
261 |         seed = seed + 1
262 | 
263 |         if (tmp <= t1) {
264 |           // Visit
265 |           generatePageView(times + 1, userId, sessionId, cityId, actionTime)
266 |         } else if (tmp <= t2) {
267 |           // Search
268 |           generateSearch(times + 1, userId, sessionId, cityId, actionTime)
269 |         } else if (tmp <= t3) {
270 |           // Pay
271 |           generatePay(times + 1, userId, sessionId, cityId, actionTime, productIds = orderProductIds, categoryIds = orderCategoryIds)
272 |         } else {
273 |           // nothings, finish
274 |         }
275 | 
276 |       }
277 |     }
278 | 
279 |     // Generate data for pay
280 |     def generatePay(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long, productIds: String, categoryIds: String): Unit = {
281 |       if (times <= 20) {
282 |         // pay ID:301
283 |         val pageId: Long = 301
284 |         val actionTime = getCurrentActionTime(preActionTime)
285 |         val searchKeyword: String = ""
286 |         val clickProductId: String = ""
287 |         val clickCategoryId: String = ""
288 |         val orderCategoryIds: String = ""
289 |         val orderProductIds: String = ""
290 |         val payCategoryIds: String = categoryIds
291 |         val payProductIds: String = productIds
292 | 
293 |         // Add data
294 |         val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes()
295 |         producer.send(new ProducerRecord("userVisit", message))
296 | 
297 |         total = total + 1
298 |         length = length + message.length
299 |         // Go to next action
300 |         val (t1, t2) =
301 |           if (times < 10) {
302 |             (4, 8)
303 |           } else {
304 |             (1, 3)
305 |           }
306 | 
307 |         val tmp = seed % 10
308 |         seed = seed + 1
309 | 
310 |         if (tmp <= t1) {
311 |           // Visit
312 |           generatePageView(times + 1, userId, sessionId, cityId, actionTime)
313 |         } else if (tmp <= t2) {
314 |           // Search
315 |           generateSearch(times + 1, userId, sessionId, cityId, actionTime)
316 |         } else {
317 |           // nothings, finish
318 |         }
319 | 
320 |       }
321 |     }
322 | 
323 |     var flag: Boolean = true
324 |     while (flag) {
325 |       val startTime = System.currentTimeMillis()
326 |       val userId: Long = random.nextInt(userNumbers)
327 |       val sessionId = hostName + "_" + threadName + "_"+ total
328 |       val cityId = citys((total % cityTypeSize).toInt)._1
329 |       seed = random.nextInt(100)
330 |       // action主要分为：浏览、搜索、点击、下单及支付
331 |       /**
332 |         * Suppose the access chain has several situations:
333 |         * 1. Visit -> Search-> Click -> Order -> Pay
334 |         * 2. Search -> Click -> Order -> Pay
335 |         * 3. Visit -> Click -> Order -> Pay
336 |         * Note：Visit, Search, Click can be generated continuously while Pay and Order can not appear successfully
337 |         * ======>
338 |         * After visiting, there may be search, click and visit action.
339 |         * After searching, there may be click, search and search action.
340 |         * After clicking, there may be visit, search, order and click action.
341 |         * After ordering, there may be search, visit and pay action.
342 |         * After paying, there may be search and visit action支付之后可能存在搜索和浏览两种情况
343 |         * Note：After all action, there may be finish action.
344 |         **/
345 | 
346 |       // 80% visit, 20% click
347 |       if (total % 5 < 4) {
348 |         // generate data for visit
349 |         generatePageView(0, userId, sessionId, cityId, startTime)
350 |       } else {
351 |         // generate data for search
352 |         generateSearch(0, userId, sessionId, cityId, startTime)
353 |       }
354 | 
355 |       if ( (System.currentTimeMillis() - start) > time*1000) {
356 |         flag = false
357 |       }
358 | 
359 |     }
360 |   }
361 | 
362 |   /**
363 |     * Simulation code for generating product
364 |     *
365 |     * @param
366 |     * @return
367 |     */
368 |   def mockProductInfo() = {
369 |     val producer = createProducer
370 |     val buffer = ArrayBuffer[ProductInfo]()
371 |     for (i <- 0 until productNumbers) {
372 |       val productID: Long = i.toLong
373 |       val productName: String = s"product_${productID}"
374 |       // 60% third party products; 40% proprietary products
375 |       val extendInfo: String = {
376 |         val obj = new JSONObject()
377 |         if (random.nextDouble() <= 0.4) {
378 |           // proprietary product
379 |           obj.put("product_type", "0")
380 |         } else {
381 |           // third party products
382 |           obj.put("product_type", "1")
383 |         }
384 |         obj.toJSONString
385 |       }
386 |       producer.send(new ProducerRecord("productInfo", ProductInfo(productID, productName, extendInfo).formatted(",").getBytes()))
387 | 
388 |     }
389 |   }
390 | 
391 | }
392 | 


--------------------------------------------------------------------------------
/dataGen/src/main/scala/com/intel/streaming_benchmark/click.scala:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark
  2 | 
  3 | import java.util.Random
  4 | 
  5 | 
  6 | object click {
  7 | 
  8 |   val random = new Random
  9 |   val splitSymbol = ","
 10 | 
 11 |   val userNumbers = 1000
 12 | 
 13 |   val userVisitSessionNumbers = 10000
 14 | 
 15 |   val productNumbers = 10000
 16 | 
 17 |   val productCategoryNumbers = 50
 18 | 
 19 |   val professionals = Array("Programmer", "Teacher", "Cook", "Driver", "Doctor", "Nurse", "Designer", "Farmer", "Worker", "Assistant")
 20 |   val professionalTypeSize = professionals.length
 21 | 
 22 |   val citys: Array[(Int, String)] = Array("Shanghai", "Beijing", "Shenzhen", "Guangzhou", "Nanjing", "Hangzhou", "Changsha", "Nanchang", "Zhangjiajie", "Hong Kong", "Macao").zipWithIndex.map(_.swap)
 23 |   val cityTypeSize = citys.length
 24 | 
 25 |   val sexs = Array("male", "female", "unknown")
 26 |   val sexTypeSize = sexs.length
 27 |   // search key word
 28 |   val keywords = Array("Hot Pot", "Cake", "Chongqing spicy chicken", "Chongqing facet",
 29 |     "Biscuits", "Fish", "International Trade Building or Cetra Building", "Pacific Mall", "Japanese cuisine", "Hot Spring")
 30 |   val keywordSize = keywords.length
 31 | 
 32 |   var count = 0
 33 | }
 34 | 
 35 | 
 36 | case class ProductInfo(
 37 |                         productID: Long,
 38 |                         productName: String,
 39 |                         extendInfo: String
 40 |                       ) {
 41 |   /**
 42 |     * Format
 43 |     *
 44 |     * @param splitSymbol
 45 |     * @return
 46 |     */
 47 |   def formatted(splitSymbol: String = "^"): String = {
 48 |     s"${productID}${splitSymbol}${productName}${splitSymbol}${extendInfo}"
 49 |   }
 50 | }
 51 | 
 52 | object ProductInfo {
 53 |   /**
 54 |     * column name of the table
 55 |     */
 56 |   val columnNames = Array("product_id", "product_name", "extend_info")
 57 | 
 58 |   /**
 59 |     * Parse row data and return the object; if parsing fails return None
 60 |     *
 61 |     * @param line
 62 |     * @param splitSymbol
 63 |     * @return
 64 |     */
 65 |   def parseProductInfo(line: String, splitSymbol: String = "\\^"): Option[ProductInfo] = {
 66 |     val arr = line.split(splitSymbol)
 67 |     if (arr.length == 3) {
 68 |       Some(
 69 |         new ProductInfo(
 70 |           arr(0).toLong,
 71 |           arr(1),
 72 |           arr(2)
 73 |         )
 74 |       )
 75 |     } else None
 76 |   }
 77 | }
 78 | 
 79 | 
 80 | 
 81 | case class UserInfo(
 82 |                      userId: Long,
 83 |                      userName: String,
 84 |                      name: String,
 85 |                      age: Int,
 86 |                      professional: String,
 87 |                      city: String,
 88 |                      sex: String
 89 |                    ) {
 90 |   /**
 91 |     * Format time
 92 |     *
 93 |     * @param splitSymbol
 94 |     * @return
 95 |     */
 96 |   def formatted(splitSymbol: String = ","): String = {
 97 |     s"${userId}${splitSymbol}${userName}${splitSymbol}${name}${splitSymbol}${age}${splitSymbol}${professional}${splitSymbol}${city}${splitSymbol}${sex}"
 98 |   }
 99 | }
100 | 
101 | object UserInfo {
102 |   /**
103 |     * column name of the table
104 |     */
105 |   val columnNames = Array("user_id", "user_name", "name", "age", "professional", "city", "sex")
106 | 
107 |   /**
108 |     * Parse row data and return the object; if parsing fails return None
109 |     *
110 |     * @param line
111 |     * @param splitSymbol
112 |     * @return
113 |     */
114 |   def parseUserInfo(line: String, splitSymbol: String = ","): Option[UserInfo] = {
115 |     val arr = line.split(splitSymbol)
116 |     if (arr.length == 7) {
117 |       Some(new UserInfo(
118 |         arr(0).toLong,
119 |         arr(1),
120 |         arr(2),
121 |         arr(3).toInt,
122 |         arr(4),
123 |         arr(5),
124 |         arr(6)
125 |       ))
126 |     } else None
127 |   }
128 | }
129 | 
130 | 
131 | case class UserVisitAction(
132 |                             date: String,
133 |                             userId: Long,
134 |                             sessionId: String,
135 |                             pageId: Long,
136 |                             actionTime: Long,
137 |                             searchKeyword: String,
138 |                             clickCategoryId: String,
139 |                             clickProductId: String,
140 |                             orderCategoryIds: String,
141 |                             orderProductIds: String,
142 |                             payCategoryIds: String,
143 |                             payProductIds: String,
144 |                             cityId: Int
145 |                           ) {
146 |   /**
147 |     * Format time
148 |     *
149 |     * @param splitSymbol
150 |     * @return
151 |     */
152 |   def formatted(splitSymbol: String = ","): String = {
153 |     s"${date}${splitSymbol}${userId}${splitSymbol}${sessionId}${splitSymbol}${pageId}${splitSymbol}${actionTime}${splitSymbol}${searchKeyword}${splitSymbol}${clickCategoryId}${splitSymbol}${clickProductId}${splitSymbol}${orderCategoryIds}${splitSymbol}${orderProductIds}${splitSymbol}${payCategoryIds}${splitSymbol}${payProductIds}${splitSymbol}${cityId}"
154 |   }
155 | }
156 | 
157 | object UserVisitAction {
158 |   /**
159 |     * column name of the table
160 |     */
161 |   val columnNames = Array("date", "user_id", "session_id", "page_id", "action_time", "search_keyword", "click_category_id", "click_product_id", "order_category_ids", "order_product_ids", "pay_category_ids", "pay_product_ids", "city_id")
162 | 
163 |   /**
164 |     * Parse row data and return the object; if parsing fails return None
165 |     *
166 |     * @param line
167 |     * @param splitSymbol
168 |     * @return
169 |     */
170 |   def parseUserVisitAction(line: String, splitSymbol: String = ","): Option[UserVisitAction] = {
171 |     val arr = line.split(splitSymbol)
172 |     if (arr.length == 13) {
173 |       Some(
174 |         new UserVisitAction(
175 |           arr(0),
176 |           arr(1).toLong,
177 |           arr(2),
178 |           arr(3).toLong,
179 |           arr(4).toLong,
180 |           arr(5),
181 |           arr(6),
182 |           arr(7),
183 |           arr(8),
184 |           arr(9),
185 |           arr(10),
186 |           arr(11),
187 |           arr(12).toInt
188 |         )
189 |       )
190 |     } else None
191 |   }
192 | }
193 | 
194 | 
195 | 


--------------------------------------------------------------------------------
/flink/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
1 | streambench.flink.checkpointDuration     5000
2 | streambench.flink.timeType               EventTime


--------------------------------------------------------------------------------
/flink/log/q1.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q1.sql.log


--------------------------------------------------------------------------------
/flink/log/q10.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q10.sql.log


--------------------------------------------------------------------------------
/flink/log/q11.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q11.sql.log


--------------------------------------------------------------------------------
/flink/log/q12.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q12.sql.log


--------------------------------------------------------------------------------
/flink/log/q2.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q2.sql.log


--------------------------------------------------------------------------------
/flink/log/q3.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q3.sql.log


--------------------------------------------------------------------------------
/flink/log/q4.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q4.sql.log


--------------------------------------------------------------------------------
/flink/log/q5.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q5.sql.log


--------------------------------------------------------------------------------
/flink/log/q6.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q6.sql.log


--------------------------------------------------------------------------------
/flink/log/q7.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q7.sql.log


--------------------------------------------------------------------------------
/flink/log/q8.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q8.sql.log


--------------------------------------------------------------------------------
/flink/log/q9.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q9.sql.log


--------------------------------------------------------------------------------
/flink/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>streaming_benchmark</artifactId>
  7 |         <groupId>com.intel.streaming_benchmark</groupId>
  8 |         <version>1.0-SNAPSHOT</version>
  9 |     </parent>
 10 |     <modelVersion>4.0.0</modelVersion>
 11 | 
 12 |     <artifactId>flink</artifactId>
 13 |     <dependencies>
 14 |         <dependency>
 15 |             <groupId>com.intel.streaming_benchmark</groupId>
 16 |             <artifactId>common</artifactId>
 17 |             <version>1.0-SNAPSHOT</version>
 18 |         </dependency>
 19 |         <dependency>
 20 |             <groupId>com.alibaba</groupId>
 21 |             <artifactId>fastjson</artifactId>
 22 |             <version>1.2.58</version>
 23 |         </dependency>
 24 |     </dependencies>
 25 |     <build>
 26 |         <sourceDirectory>src/main/java</sourceDirectory>
 27 |         <resources>
 28 |             <resource>
 29 |                 <directory>src/main/resources</directory>
 30 |                 <filtering>true</filtering>
 31 |             </resource>
 32 |             <resource>
 33 |                 <directory>src/main/java</directory>
 34 |                 <excludes>
 35 |                     <exclude>../*.java</exclude>
 36 |                 </excludes>
 37 |             </resource>
 38 |         </resources>
 39 | 
 40 |         <plugins>
 41 |             <plugin>
 42 |                 <groupId>org.codehaus.mojo</groupId>
 43 |                 <artifactId>build-helper-maven-plugin</artifactId>
 44 |                 <version>1.4</version>
 45 |                 <executions>
 46 |                     <execution>
 47 |                         <id>add-source</id>
 48 |                         <phase>generate-sources</phase>
 49 |                         <goals>
 50 |                             <goal>add-source</goal>
 51 |                         </goals>
 52 |                         <configuration>
 53 |                             <sources>
 54 |                                 <source>../common/src/main/scala</source>
 55 |                                 <source>../common/src/main/java</source>
 56 |                                 <!-- 我们可以通过在这里添加多个source节点，来添加任意多个源文件夹 -->
 57 |                             </sources>
 58 |                         </configuration>
 59 |                     </execution>
 60 |                 </executions>
 61 |             </plugin>
 62 | 
 63 | 
 64 |             <plugin>
 65 |                 <groupId>net.alchim31.maven</groupId>
 66 |                 <artifactId>scala-maven-plugin</artifactId>
 67 |                 <version>3.2.2</version>
 68 |                 <executions>
 69 |                     <execution>
 70 |                         <id>scala-compile-first</id>
 71 |                         <phase>process-resources</phase>
 72 |                         <goals>
 73 |                             <goal>add-source</goal>
 74 |                             <goal>compile</goal>
 75 |                         </goals>
 76 |                     </execution>
 77 |                     <execution>
 78 |                         <id>scala-test-compile</id>
 79 |                         <phase>process-test-resources</phase>
 80 |                         <goals>
 81 |                             <goal>testCompile</goal>
 82 |                         </goals>
 83 |                     </execution>
 84 |                 </executions>
 85 |             </plugin>
 86 |             <plugin>
 87 |                 <groupId>org.apache.maven.plugins</groupId>
 88 |                 <artifactId>maven-compiler-plugin</artifactId>
 89 |                 <version>3.2</version>
 90 |                 <configuration>
 91 |                     <source>1.8</source>
 92 |                     <target>1.8</target>
 93 |                     <encoding>UTF-8</encoding>
 94 |                 </configuration>
 95 |             </plugin>
 96 | 
 97 |         <!-- the Maven compiler plugin will compile Java source files -->
 98 |         <plugin>
 99 |             <groupId>org.apache.maven.plugins</groupId>
100 |             <artifactId>maven-compiler-plugin</artifactId>
101 |             <version>3.8.0</version>
102 |             <configuration>
103 |                 <source>1.8</source>
104 |                 <target>1.8</target>
105 |             </configuration>
106 |         </plugin>
107 |         <plugin>
108 |             <groupId>org.apache.maven.plugins</groupId>
109 |             <artifactId>maven-resources-plugin</artifactId>
110 |             <version>3.0.2</version>
111 |             <configuration>
112 |                 <encoding>UTF-8</encoding>
113 |             </configuration>
114 |         </plugin>
115 | 
116 |             <plugin>
117 |                 <groupId>org.apache.maven.plugins</groupId>
118 |                 <artifactId>maven-shade-plugin</artifactId>
119 |                 <version>2.4.3</version>
120 |                 <configuration>
121 |                     <!-- put your configurations here -->
122 |                     <filters>
123 |                         <filter>
124 |                             <artifact>*:*</artifact>
125 |                             <excludes>
126 |                                 <exclude>META-INF/*.SF</exclude>
127 |                                 <exclude>META-INF/*.DSA</exclude>
128 |                                 <exclude>META-INF/*.RSA</exclude>
129 |                             </excludes>
130 |                         </filter>
131 |                     </filters>
132 |                     <artifactSet>
133 |                         <excludes>
134 |                             <exclude>junit:junit</exclude>
135 |                             <exclude>org.slf4j:slf4j-simple</exclude>
136 |                             <exclude>org.slf4j:slf4j-log4j12</exclude>
137 |                             <exclude>com.101tec:zkclient</exclude>
138 |                             <exclude>com.github.sgroschupf:zkclient</exclude>
139 |                             <exclude>org.apache.httpcomponents:httpclient</exclude>
140 |                         </excludes>
141 |                     </artifactSet>
142 |                 </configuration>
143 |                 <executions>
144 |                     <execution>
145 |                         <phase>package</phase>
146 |                         <goals>
147 |                             <goal>shade</goal>
148 |                         </goals>
149 |                     </execution>
150 |                 </executions>
151 |             </plugin>
152 | 
153 |         <!-- the Maven Scala plugin will compile Scala source files -->
154 | 
155 | 
156 |         <!-- configure the eclipse plugin to generate eclipse project descriptors for a Scala project -->
157 | 
158 | 
159 |         <!-- allows the route to be run via 'mvn exec:java' -->
160 | 
161 | 
162 |     </plugins>
163 | 
164 | 
165 |     </build>
166 | 
167 | </project>


--------------------------------------------------------------------------------
/flink/query/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 |        commodity, count(userId) num, TUMBLE_START(rowtime, INTERVAL '10' SECOND),TUMBLE_END(rowtime, INTERVAL '10' SECOND), UNIX_TIMESTAMP(TUMBLE_START(rowtime, INTERVAL '10' SECOND)) - UNIX_TIMESTAMP(TO_TIMESTAMP(min(times)))
3 | from
4 |        shopping
5 | group by
6 |        TUMBLE(rowtime, INTERVAL '10' SECOND), commodity


--------------------------------------------------------------------------------
/flink/query/q10.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |       a.dt, a.h, COUNT(sessionId) num
 3 | FROM
 4 |       (SELECT
 5 |              sessionId, MAX(actionTime)-MIN(actionTime) as len,  DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h
 6 |        FROM
 7 |              userVisit
 8 |        GROUP BY
 9 |              sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a
10 | WHERE
11 |       a.len < 100
12 | GROUP BY
13 |       a.dt, a.h


--------------------------------------------------------------------------------
/flink/query/q11.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |        a.dt, a.h, SUM(a.len) total
 3 | FROM
 4 |        (SELECT
 5 |                 sessionId, MAX(actionTime)-MIN(actionTime) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h
 6 |         FROM
 7 |                 userVisit
 8 |         GROUP BY
 9 |                 sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a
10 | WHERE
11 |        a.len < 1
12 | GROUP BY
13 |        a.dt, a.h


--------------------------------------------------------------------------------
/flink/query/q12.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | SELECT
 3 |         *
 4 | FROM
 5 |     (SELECT
 6 |             *, ROW_NUMBER() OVER (PARTITION BY w.cityId ORDER BY w.num DESC) as rownum
 7 |      FROM
 8 |             (SELECT
 9 |                     TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num
10 |              FROM
11 |                     userVisit
12 |              WHERE
13 |                     payProductIds IS NOT NULL
14 |              GROUP BY
15 |                     cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND)
16 |             ) w
17 |     ) v
18 | WHERE
19 |     v.rownum <= 10


--------------------------------------------------------------------------------
/flink/query/q2.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |        strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), count(*) click_count
3 | FROM
4 |        click
5 | GROUP BY
6 |        strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/flink/query/q3.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), SUM(cost)
3 | FROM
4 |         imp
5 | GROUP BY
6 |         strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/flink/query/q4.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |         b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id)
 3 | FROM
 4 |         click a
 5 | JOIN
 6 |         dau b
 7 | ON
 8 |         a.device_id = b.device_id AND a.rowtime BETWEEN b.rowtime - INTERVAL '1' second AND b.rowtime + INTERVAL '1' second
 9 | GROUP BY
10 |         b.device_id, a.strategy, a.site, a.pos_id, TUMBLE(a.rowtime, INTERVAL '10' SECOND)
11 | 


--------------------------------------------------------------------------------
/flink/query/q5.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         sessionId, MAX(actionTime)-MIN(actionTime) as len
3 | FROM
4 |         userVisit
5 | GROUP BY
6 |         sessionId, TUMBLE(rowtime, INTERVAL '10' SECOND)
7 | 
8 | 


--------------------------------------------------------------------------------
/flink/query/q6.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |       sessionId, (MAX(actionTime)-MIN(actionTime)) as len,  DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num
3 | FROM
4 |       userVisit
5 | GROUP BY
6 |       sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), TUMBLE(rowtime, INTERVAL '10' SECOND)
7 | HAVING
8 |       (MAX(actionTime)-MIN(actionTime)) < 100


--------------------------------------------------------------------------------
/flink/query/q7.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num
3 | FROM
4 |         userVisit
5 | WHERE
6 |         payProductIds IS NOT NULL
7 | GROUP BY
8 |         cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/flink/query/q8.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         TUMBLE_START(rowtime, INTERVAL '10' SECOND) startTime, TUMBLE_END(rowtime, INTERVAL '10' SECOND) finish, cityId, count(clickCategoryId) as sequence
3 | FROM
4 |         userVisit
5 | WHERE
6 |         clickCategoryId IS NOT NULL
7 | GROUP BY
8 |         cityId, TUMBLE(rowtime, INTERVAL '10' SECOND)


--------------------------------------------------------------------------------
/flink/query/q9.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |         a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1, count(*)
 3 | FROM
 4 |         (SELECT device_id, strategy, site, pos_id FROM click) a
 5 | JOIN
 6 |         (SELECT device_id, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'yyyyMMdd') as var1, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'HH') as var2 FROM dau) b
 7 | ON
 8 |          a.device_id = b.device_id
 9 | GROUP BY
10 |          a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1


--------------------------------------------------------------------------------
/flink/result/result.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/result/result.log


--------------------------------------------------------------------------------
/flink/src/main/java/com/intel/streaming_benchmark/flink/Benchmark.java:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark.flink;
  2 | 
  3 | import com.alibaba.fastjson.JSON;
  4 | import com.intel.streaming_benchmark.common.*;
  5 | import com.intel.streaming_benchmark.utils.FlinkBenchConfig;
  6 | import org.apache.flink.api.common.JobExecutionResult;
  7 | import org.apache.flink.api.common.accumulators.IntCounter;
  8 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
  9 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 10 | import org.apache.flink.api.java.tuple.*;
 11 | import org.apache.flink.configuration.Configuration;
 12 | import org.apache.flink.streaming.api.TimeCharacteristic;
 13 | import org.apache.flink.streaming.api.datastream.DataStream;
 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 15 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
 16 | import org.apache.flink.streaming.api.watermark.Watermark;
 17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
 18 | import org.apache.flink.table.api.EnvironmentSettings;
 19 | import org.apache.flink.table.api.Table;
 20 | import org.apache.flink.table.api.TableConfig;
 21 | import org.apache.flink.table.api.java.StreamTableEnvironment;
 22 | import org.apache.flink.types.Row;
 23 | import org.apache.flink.util.Collector;
 24 | import com.alibaba.fastjson.JSONObject;
 25 | import javax.annotation.Nullable;
 26 | import java.io.BufferedWriter;
 27 | import java.io.File;
 28 | import java.io.FileWriter;
 29 | import java.text.SimpleDateFormat;
 30 | import java.util.Properties;
 31 | 
 32 | public class Benchmark {
 33 |     public static void main(String[] args) throws Exception {
 34 |         if (args.length < 2)
 35 |             BenchLogUtil.handleError("Usage: RunBench <ConfigFile> <QueryName>");
 36 |         //root Config
 37 |         ConfigLoader cl = new ConfigLoader(args[0]);
 38 |         String benchmarkConfDir = new File(args[0]).getParent();
 39 | 
 40 |         //flink config
 41 |         String flinkConf = benchmarkConfDir + "/../flink/conf/benchmarkConf.yaml";
 42 |         cl.merge(flinkConf);
 43 | 
 44 |         // Prepare configuration
 45 |         FlinkBenchConfig conf = new FlinkBenchConfig();
 46 |         conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST);
 47 |         conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST);
 48 |         conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP);
 49 |         conf.checkpointDuration = Long.parseLong(cl.getProperty(StreamBenchConfig.FLINK_CHECKPOINTDURATION));
 50 |         conf.timeType = cl.getProperty(StreamBenchConfig.FLINK_TIMETYPE);
 51 |         conf.topic = QueryConfig.getTables(args[1]);
 52 |         conf.sqlLocation = benchmarkConfDir + "/../flink/query";
 53 |         conf.resultLocation = benchmarkConfDir + "/../flink/result";
 54 |         conf.sqlName = args[1];
 55 |         runQuery(conf);
 56 |     }
 57 | 
 58 |     public static void runQuery(FlinkBenchConfig config) throws Exception{
 59 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 60 |         env.enableCheckpointing(config.checkpointDuration);
 61 |         if(config.timeType.equals("EventTime")){
 62 |             env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 63 |         }else{
 64 |             env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
 65 |         }
 66 | 
 67 |         TableConfig tc = new TableConfig();
 68 |         EnvironmentSettings builder = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
 69 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,builder);
 70 | 
 71 |         Properties properties = new Properties();
 72 |         properties.setProperty("zookeeper.connect", config.zkHost);
 73 |         properties.setProperty("group.id", config.consumerGroup);
 74 |         properties.setProperty("bootstrap.servers", config.brokerList);
 75 | 
 76 |         String[] topics =  config.topic.split(",");
 77 | 
 78 |         //generate table
 79 |         for(int i = 0; i < topics.length; i++){
 80 |             // source stream
 81 |             FlinkKafkaConsumer010<String> consumer = new FlinkKafkaConsumer010<String>(topics[i], new SimpleStringSchema(),properties);
 82 |             consumer.setStartFromLatest();
 83 | //            consumer.setStartFromEarliest();
 84 |             //add stream source for flink
 85 |             DataStream<String> stream = env.addSource(consumer);
 86 |             // stream parse  need table schema
 87 |             String[] fieldNames = TableSchemaProvider.getSchema(topics[i]).getFieldNames();
 88 |             //  TypeInformation returnType = TypeExtractor.createTypeInfo();
 89 |             DataStream streamParsed;
 90 | 
 91 |             if(config.timeType.equals("EventTime")){
 92 |                 if(topics[i].equals("shopping")){
 93 |                     streamParsed = stream.flatMap(new DeserializeShopping()).assignTimestampsAndWatermarks(new ShoppingWatermarks());
 94 |                 }else if(topics[i].equals("click")){
 95 |                     streamParsed = stream.flatMap(new DeserializeClick()).assignTimestampsAndWatermarks(new ClickWatermarks());
 96 |                 }else if(topics[i].equals("imp")){
 97 |                     streamParsed = stream.flatMap(new DeserializeImp()).assignTimestampsAndWatermarks(new ImpWatermarks());
 98 |                 }else if(topics[i].equals("dau")){
 99 |                     streamParsed = stream.flatMap(new DeserializeDau()).assignTimestampsAndWatermarks(new DauWatermarks());
100 |                 }else if(topics[i].equals("userVisit")){
101 |                     streamParsed = stream.flatMap(new DeserializeUserVisit()).assignTimestampsAndWatermarks(new UserVisitWatermarks());
102 |                 }else{
103 |                     System.out.println("No such topic, please check your benchmarkConf.yaml");
104 |                     return;
105 |                 }
106 | 
107 |             }else{
108 |                 if(topics[i].equals("shopping")){
109 |                     streamParsed = stream.flatMap(new DeserializeShopping());
110 |                 }else if(topics[i].equals("click")){
111 |                     streamParsed = stream.flatMap(new DeserializeClick());
112 |                 }else if(topics[i].equals("imp")){
113 |                     streamParsed = stream.flatMap(new DeserializeImp());
114 |                 }else if(topics[i].equals("dau")){
115 |                     streamParsed = stream.flatMap(new DeserializeDau());
116 |                 }else if(topics[i].equals("userVisit")){
117 |                     streamParsed = stream.flatMap(new DeserializeUserVisit());
118 |                 }else{
119 |                     System.out.println("No such topic, please check your benchmarkConf.yaml");
120 |                     return;
121 |                 }
122 |             }
123 | 
124 |             tableEnv.registerTable(topics[i], tableEnv.fromDataStream(streamParsed, FieldString(fieldNames, config.timeType)));
125 |         }
126 | 
127 |         //runQuery
128 |         File file = new File(config.sqlLocation + "/" + config.sqlName);
129 |         if (!file.exists()) {
130 |             return;
131 |         }
132 |         try {
133 |             String queryString = DateUtils.fileToString(file);
134 |             Table table = tableEnv.sqlQuery(queryString);
135 |             table.printSchema();
136 |             DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tableEnv.toRetractStream(table, Row.class);
137 |             tuple2DataStream.print();
138 |         } catch (Exception e) {
139 |             e.printStackTrace();
140 |         }
141 | 
142 |         JobExecutionResult execute = env.execute(config.sqlName);
143 |         JobExecutionResult jobExecutionResult = execute.getJobExecutionResult();
144 |         long netRuntime = jobExecutionResult.getNetRuntime();
145 |         System.out.println("----------------runtime---------------- :" + netRuntime);
146 |         long count = 0;
147 |         for(int i = 0; i < topics.length; i++){
148 |             Integer tmp =  (Integer)jobExecutionResult.getAccumulatorResult(topics[i]);
149 |             count = count + tmp.longValue();
150 |         }
151 |         File resultFile = new File(config.resultLocation + "/result.log" );
152 |         if (!resultFile.exists()) {
153 |             resultFile.createNewFile();
154 |         }
155 |         FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log", true);
156 |         BufferedWriter bufferWriter = new BufferedWriter(fileWriter);
157 |         bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(System.currentTimeMillis()) + "; " + config.sqlName + "  Runtime: " + netRuntime/1000 + " TPS:" + count/(netRuntime/1000) + "\r\n");
158 |         bufferWriter.close();
159 | 
160 |     }
161 | 
162 |     private static String FieldString(String[] fieldNames, String timeType){
163 |         String fileds = "";
164 |         for(int i =0; i< fieldNames.length; i++){
165 |             fileds = fileds + fieldNames[i] + ",";
166 |         }
167 |         if(timeType.equals("EventTime")){
168 |             fileds = fileds + "rowtime.rowtime";
169 |         }else{
170 |             fileds = fileds + "rowtime.proctime";
171 |         }
172 |         return fileds;
173 |     }
174 | 
175 |     public static class ShoppingWatermarks implements AssignerWithPeriodicWatermarks<Tuple3<String, String,Long>> {
176 |         Long currentMaxTimestamp = 0L;
177 |         final Long maxOutOfOrderness = 2000L;
178 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
179 | 
180 |         @Nullable
181 |         @Override
182 |         public Watermark getCurrentWatermark() {
183 |             Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
184 |             return watermark;
185 |         }
186 | 
187 |         @Override
188 |         public long extractTimestamp(Tuple3<String, String, Long> element, long previousElementTimestamp) {
189 |             Long timestamp = Long.valueOf(element.f2);
190 |             currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
191 |             return timestamp;
192 |         }
193 |     }
194 | 
195 | 
196 |     public static class ClickWatermarks implements AssignerWithPeriodicWatermarks<Tuple6<Long,String, String,String, String, String>> {
197 |         Long currentMaxTimestamp = 0L;
198 |         final Long maxOutOfOrderness = 2000L;
199 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
200 | 
201 |         @Nullable
202 |         @Override
203 |         public Watermark getCurrentWatermark() {
204 |             Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
205 |             return watermark;
206 |         }
207 | 
208 |         @Override
209 |         public long extractTimestamp(Tuple6<Long, String, String, String, String, String> element, long previousElementTimestamp) {
210 |             Long timestamp = Long.valueOf(element.f0);
211 |             currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
212 |             return timestamp;
213 |         }
214 |     }
215 | 
216 | 
217 |     public static class ImpWatermarks implements AssignerWithPeriodicWatermarks<Tuple7<Long, String, String, String, String, Double, String>> {
218 |         Long currentMaxTimestamp = 0L;
219 |         final Long maxOutOfOrderness = 2000L;
220 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
221 | 
222 |         @Nullable
223 |         @Override
224 |         public Watermark getCurrentWatermark() {
225 |             Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
226 |             return watermark;
227 |         }
228 | 
229 |         @Override
230 |         public long extractTimestamp(Tuple7<Long, String, String, String, String, Double, String> element, long previousElementTimestamp) {
231 |             Long timestamp = Long.valueOf(element.f0);
232 |             currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
233 |             return timestamp;
234 |         }
235 |     }
236 | 
237 | 
238 |     public static class DauWatermarks implements AssignerWithPeriodicWatermarks<Tuple2<Long,String>> {
239 |         Long currentMaxTimestamp = 0L;
240 |         final Long maxOutOfOrderness = 2000L;
241 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
242 | 
243 |         @Nullable
244 |         @Override
245 |         public Watermark getCurrentWatermark() {
246 |             Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
247 |             return watermark;
248 |         }
249 | 
250 |         @Override
251 |         public long extractTimestamp(Tuple2<Long, String> element, long previousElementTimestamp) {
252 |             Long timestamp = Long.valueOf(element.f0);
253 |             currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
254 |             return timestamp;
255 |         }
256 |     }
257 | 
258 | 
259 |     public static class UserVisitWatermarks implements AssignerWithPeriodicWatermarks<Tuple13<String, Long, String, Long, Long, String, String, String, String, String, String, String, Integer>> {
260 |         Long currentMaxTimestamp = 0L;
261 |         final Long maxOutOfOrderness = 2000L;
262 |         SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
263 | 
264 |         @Nullable
265 |         @Override
266 |         public Watermark getCurrentWatermark() {
267 |             Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness);
268 |             return watermark;
269 |         }
270 | 
271 |         @Override
272 |         public long extractTimestamp(Tuple13<String, Long, String, Long, Long, String, String, String, String, String, String, String, Integer> element, long previousElementTimestamp) {
273 |             Long timestamp = Long.valueOf(element.f4);
274 |             currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
275 |             return timestamp;
276 |         }
277 |     }
278 | 
279 | 
280 | 
281 |     public static class DeserializeShopping extends RichFlatMapFunction<String, Tuple3<String, String, Long>> {
282 | 
283 |         // Counter numLines;
284 |         private IntCounter shopping = new IntCounter();
285 |         @Override
286 |         public void open(Configuration parameters) throws Exception {
287 |             //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
288 |             getRuntimeContext().addAccumulator("shopping", this.shopping);
289 |             super.open(parameters);
290 |         }
291 | 
292 |         @Override
293 |         public void flatMap(String s, Collector<Tuple3<String, String, Long>> collector) throws Exception {
294 |             this.shopping.add(1);
295 |             String[] split = s.split(",");
296 |             collector.collect(new Tuple3<String, String, Long>(split[0], split[1], Long.valueOf(split[2])));
297 |         }
298 |     }
299 | 
300 |     public static class DeserializeClick extends RichFlatMapFunction<String, Tuple6<Long, String, String, String, String, String>> {
301 | 
302 |         private IntCounter click = new IntCounter();
303 |         @Override
304 |         public void open(Configuration parameters) throws Exception {
305 |             //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
306 |             getRuntimeContext().addAccumulator("click", this.click);
307 |             super.open(parameters);
308 |         }
309 | 
310 |         @Override
311 |         public void flatMap(String input, Collector<Tuple6<Long, String, String, String, String, String>> collector) throws Exception {
312 |             this.click.add(1);
313 |             JSONObject obj = JSON.parseObject(input);
314 | //            JSONObject obj = new JSONObject(input);
315 |             Tuple6<Long, String, String, String, String, String> tuple = new Tuple6<>(
316 |                     obj.getLong("click_time"),
317 |                     obj.getString("strategy"),
318 |                     obj.getString("site"),
319 |                     obj.getString("pos_id"),
320 |                     obj.getString("poi_id"),
321 |                     obj.getString("device_id")
322 |             );
323 |             collector.collect(tuple);
324 |         }
325 |     }
326 | 
327 |     public static class DeserializeImp extends RichFlatMapFunction<String, Tuple7<Long, String, String, String, String, Double, String>> {
328 | 
329 |         private IntCounter imp = new IntCounter();
330 |         @Override
331 |         public void open(Configuration parameters) throws Exception {
332 |             //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
333 |             getRuntimeContext().addAccumulator("imp", this.imp);
334 |             super.open(parameters);
335 |         }
336 | 
337 |         @Override
338 |         public void flatMap(String input, Collector<Tuple7<Long, String, String, String, String, Double, String>> collector) throws Exception {
339 |             this.imp.add(1);
340 |             JSONObject obj = JSON.parseObject(input);
341 | //            JSONObject obj = new JSONObject(input);
342 |             Tuple7<Long, String, String, String, String, Double, String> tuple = new Tuple7<>(
343 |                     obj.getLong("imp_time"),
344 |                     obj.getString("strategy"),
345 |                     obj.getString("site"),
346 |                     obj.getString("pos_id"),
347 |                     obj.getString("poi_id"),
348 |                     obj.getDouble("cost"),
349 |                     obj.getString("device_id")
350 |             );
351 |             collector.collect(tuple);
352 |         }
353 |     }
354 | 
355 |     public static class DeserializeDau extends RichFlatMapFunction<String, Tuple2<Long, String>> {
356 | 
357 |         private IntCounter dau = new IntCounter();
358 |         @Override
359 |         public void open(Configuration parameters) throws Exception {
360 |             //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
361 |             getRuntimeContext().addAccumulator("dau", this.dau);
362 |             super.open(parameters);
363 |         }
364 | 
365 |         @Override
366 |         public void flatMap(String input, Collector<Tuple2<Long, String>> collector) throws Exception {
367 |             this.dau.add(1);
368 |             JSONObject obj = JSON.parseObject(input);
369 | //            JSONObject obj = new JSONObject(input);
370 |             Tuple2<Long, String> tuple = new Tuple2<>(
371 |                     obj.getLong("dau_time"),
372 |                     obj.getString("device_id")
373 |             );
374 |             collector.collect(tuple);
375 |         }
376 |     }
377 | 
378 | 
379 |     public static class DeserializeUserVisit extends RichFlatMapFunction<String, Tuple13<String, Long, String, Long, Long, String, String, String, String, String, String, String, Integer>> {
380 | 
381 |         private IntCounter userVisit = new IntCounter();
382 |         @Override
383 |         public void open(Configuration parameters) throws Exception {
384 |             //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines");
385 |             getRuntimeContext().addAccumulator("userVisit", this.userVisit);
386 |             super.open(parameters);
387 |         }
388 | 
389 |         @Override
390 |         public void flatMap(String s, Collector<Tuple13<String, Long, String, Long, Long, String, String, String, String, String, String, String, Integer>> collector) throws Exception {
391 |             this.userVisit.add(1);
392 |             String[] split = s.split(",");
393 |             Tuple13<String, Long, String, Long, Long, String, String, String, String, String, String, String, Integer> tuple = new Tuple13<>(
394 |                     split[0],
395 |                     Long.valueOf(split[1]),
396 |                     split[2],
397 |                     Long.valueOf(split[3]),
398 |                     Long.valueOf(split[4]),
399 |                     split[5],
400 |                     split[6],
401 |                     split[7],
402 |                     split[8],
403 |                     split[9],
404 |                     split[10],
405 |                     split[11],
406 |                     Integer.valueOf(split[12])
407 |             );
408 |             collector.collect(tuple);
409 |         }
410 |     }
411 | 
412 | }
413 | 


--------------------------------------------------------------------------------
/flink/src/main/java/com/intel/streaming_benchmark/utils/FlinkBenchConfig.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.intel.streaming_benchmark.utils;
19 | 
20 | import java.io.Serializable;
21 | 
22 | public class FlinkBenchConfig implements Serializable {
23 | //  public String testCase;
24 | 
25 |   // Kafka related
26 |   public String zkHost;
27 |   public String brokerList;
28 |   public String topic;
29 |   public String consumerGroup;
30 | //  public String offsetReset;
31 | //  public String reportTopic;
32 | 
33 |   // Flink related
34 |   public long checkpointDuration;
35 |   public String resultLocation;
36 |   public String sqlLocation;
37 |   public String sqlName;
38 |   public String timeType;
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>com.intel.streaming_benchmark</groupId>
  8 |     <artifactId>streaming_benchmark</artifactId>
  9 |     <packaging>pom</packaging>
 10 |     <version>1.0-SNAPSHOT</version>
 11 |     <modules>
 12 |         <module>common</module>
 13 |         <module>spark</module>
 14 |         <module>flink</module>
 15 |         <module>dataGen</module>
 16 |     </modules>
 17 | 
 18 |     <dependencies>
 19 |         <dependency>
 20 |             <groupId>org.scala-lang</groupId>
 21 |             <artifactId>scala-library</artifactId>
 22 |             <version>2.11.8</version>
 23 |         </dependency>
 24 |         <dependency>
 25 |             <groupId>org.scala-lang</groupId>
 26 |             <artifactId>scala-compiler</artifactId>
 27 |             <version>2.11.8</version>
 28 |         </dependency>
 29 | 
 30 |         <dependency>
 31 |             <groupId>org.eclipse.tycho</groupId>
 32 |             <artifactId>tycho-compiler-jdt</artifactId>
 33 |             <version>0.21.0</version>
 34 |         </dependency>
 35 | 
 36 |         <dependency>
 37 |             <groupId>org.eclipse.tycho</groupId>
 38 |             <artifactId>tycho-compiler-jdt</artifactId>
 39 |             <version>0.21.0</version>
 40 |         </dependency>
 41 | 
 42 | 
 43 |         <dependency>
 44 |             <groupId>org.apache.flink</groupId>
 45 |             <artifactId>flink-table-api-java</artifactId>
 46 |             <version>1.9.0</version>
 47 |         </dependency>
 48 | 
 49 |         <dependency>
 50 |             <groupId>org.apache.flink</groupId>
 51 |             <artifactId>flink-table-planner-blink_2.11</artifactId>
 52 |             <version>1.9.0</version>
 53 |         </dependency>
 54 | 
 55 | 
 56 |         <dependency>
 57 |             <groupId>org.apache.flink</groupId>
 58 |             <artifactId>flink-streaming-java_2.11</artifactId>
 59 |             <version>1.9.0</version>
 60 |         </dependency>
 61 | 
 62 | 
 63 |         <dependency>
 64 |             <groupId>com.alibaba</groupId>
 65 |             <artifactId>fastjson</artifactId>
 66 |             <version>1.2.58</version>
 67 |         </dependency>
 68 | 
 69 | 
 70 | 
 71 |         <dependency>
 72 |             <groupId>org.apache.flink</groupId>
 73 |             <artifactId>flink-streaming-scala_2.11</artifactId>
 74 |             <version>1.9.0</version>
 75 |         </dependency>
 76 | 
 77 | 
 78 |         <dependency>
 79 |             <groupId>org.apache.flink</groupId>
 80 |             <artifactId>flink-connector-kafka-0.10_2.11</artifactId>
 81 |             <version>1.9.0</version>
 82 |         </dependency>
 83 | 
 84 | 
 85 |         <dependency>
 86 |             <groupId>org.apache.kafka</groupId>
 87 |             <artifactId>kafka_2.11</artifactId>
 88 |             <version>0.10.2.1</version>
 89 |         </dependency>
 90 | 
 91 |     </dependencies>
 92 |     <build>
 93 |         <plugins>
 94 |             <plugin>
 95 |                 <artifactId>maven-compiler-plugin</artifactId>
 96 |                 <version>3.8.0</version>
 97 |                 <configuration>
 98 |                     <source>1.8</source>
 99 |                     <target>1.8</target>
100 |                 </configuration>
101 |             </plugin>
102 |         </plugins>
103 |     </build>
104 | 
105 | 
106 | </project>


--------------------------------------------------------------------------------
/spark/conf/benchmarkConf.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/conf/benchmarkConf.yaml


--------------------------------------------------------------------------------
/spark/log/q1.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q1.sql.log


--------------------------------------------------------------------------------
/spark/log/q2.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q2.sql.log


--------------------------------------------------------------------------------
/spark/log/q3.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q3.sql.log


--------------------------------------------------------------------------------
/spark/log/q4.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q4.sql.log


--------------------------------------------------------------------------------
/spark/log/q5.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q5.sql.log


--------------------------------------------------------------------------------
/spark/log/q6.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q6.sql.log


--------------------------------------------------------------------------------
/spark/log/q7.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q7.sql.log


--------------------------------------------------------------------------------
/spark/log/q8.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q8.sql.log


--------------------------------------------------------------------------------
/spark/log/q9.sql.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q9.sql.log


--------------------------------------------------------------------------------
/spark/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <parent>
  6 |         <artifactId>streaming_benchmark</artifactId>
  7 |         <groupId>com.intel.streaming_benchmark</groupId>
  8 |         <version>1.0-SNAPSHOT</version>
  9 |     </parent>
 10 |     <modelVersion>4.0.0</modelVersion>
 11 | 
 12 |     <artifactId>spark</artifactId>
 13 | 
 14 |     <dependencies>
 15 |         <dependency>
 16 |             <groupId>com.intel.streaming_benchmark</groupId>
 17 |             <artifactId>common</artifactId>
 18 |             <version>1.0-SNAPSHOT</version>
 19 |         </dependency>
 20 | 
 21 |         <dependency>
 22 |             <groupId>org.apache.spark</groupId>
 23 |             <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
 24 |             <version>2.3.1</version>
 25 |             <scope>compile</scope>
 26 |             <exclusions>
 27 |                 <exclusion>
 28 |                     <artifactId>kafka-clients</artifactId>
 29 |                     <groupId>org.apache.kafka</groupId>
 30 |                 </exclusion>
 31 |             </exclusions>
 32 |         </dependency>
 33 | 
 34 |         <dependency>
 35 |             <groupId>org.apache.kafka</groupId>
 36 |             <artifactId>kafka-clients</artifactId>
 37 |             <version>0.10.2.1</version>
 38 |         </dependency>
 39 | 
 40 |         <dependency>
 41 |             <groupId>org.apache.spark</groupId>
 42 |             <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
 43 |             <version>2.3.1</version>
 44 |         </dependency>
 45 | 
 46 |         <dependency>
 47 |             <groupId>org.apache.spark</groupId>
 48 |             <artifactId>spark-streaming_2.11</artifactId>
 49 |             <version>2.3.1</version>
 50 |             <scope>compile</scope>
 51 |         </dependency>
 52 |         <dependency>
 53 |             <groupId>org.apache.spark</groupId>
 54 |             <artifactId>spark-sql_2.11</artifactId>
 55 |             <version>2.3.1</version>
 56 |             <scope>compile</scope>
 57 |         </dependency>
 58 |         <dependency>
 59 |             <groupId>org.apache.spark</groupId>
 60 |             <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
 61 |             <version>2.3.1</version>
 62 |         </dependency>
 63 | 
 64 | 
 65 |         <dependency>
 66 |             <groupId>com.fasterxml.jackson.core</groupId>
 67 |             <artifactId>jackson-databind</artifactId>
 68 |             <version>2.6.5</version>
 69 |         </dependency>
 70 | 
 71 |         <dependency>
 72 |             <groupId>net.jpountz.lz4</groupId>
 73 |             <artifactId>lz4</artifactId>
 74 |             <version>1.3.0</version>
 75 |         </dependency>
 76 |     </dependencies>
 77 | 
 78 | 
 79 |     <build>
 80 |         <sourceDirectory>src/main/java</sourceDirectory>
 81 |         <resources>
 82 |             <resource>
 83 |                 <directory>src/main/resources</directory>
 84 |                 <filtering>true</filtering>
 85 |             </resource>
 86 |             <resource>
 87 |                 <directory>src/main/java</directory>
 88 |                 <excludes>
 89 |                     <exclude>../*.java</exclude>
 90 |                 </excludes>
 91 |             </resource>
 92 |         </resources>
 93 | 
 94 |         <plugins>
 95 |             <plugin>
 96 |                 <groupId>org.codehaus.mojo</groupId>
 97 |                 <artifactId>build-helper-maven-plugin</artifactId>
 98 |                 <version>1.4</version>
 99 |                 <executions>
100 |                     <execution>
101 |                         <id>add-source</id>
102 |                         <phase>generate-sources</phase>
103 |                         <goals>
104 |                             <goal>add-source</goal>
105 |                         </goals>
106 |                         <configuration>
107 |                             <sources>
108 |                                 <source>../common/src/main/scala</source>
109 |                                 <source>../common/src/main/java</source>
110 |                                 <!-- 我们可以通过在这里添加多个source节点，来添加任意多个源文件夹 -->
111 |                             </sources>
112 |                         </configuration>
113 |                     </execution>
114 |                 </executions>
115 |             </plugin>
116 | 
117 | 
118 |             <plugin>
119 |                 <groupId>net.alchim31.maven</groupId>
120 |                 <artifactId>scala-maven-plugin</artifactId>
121 |                 <version>3.2.2</version>
122 |                 <executions>
123 |                     <execution>
124 |                         <id>scala-compile-first</id>
125 |                         <phase>process-resources</phase>
126 |                         <goals>
127 |                             <goal>add-source</goal>
128 |                             <goal>compile</goal>
129 |                         </goals>
130 |                     </execution>
131 |                     <execution>
132 |                         <id>scala-test-compile</id>
133 |                         <phase>process-test-resources</phase>
134 |                         <goals>
135 |                             <goal>testCompile</goal>
136 |                         </goals>
137 |                     </execution>
138 |                 </executions>
139 |             </plugin>
140 |             <plugin>
141 |                 <groupId>org.apache.maven.plugins</groupId>
142 |                 <artifactId>maven-compiler-plugin</artifactId>
143 |                 <version>3.2</version>
144 |                 <configuration>
145 |                     <source>1.8</source>
146 |                     <target>1.8</target>
147 |                     <encoding>UTF-8</encoding>
148 |                 </configuration>
149 |             </plugin>
150 | 
151 |             <!-- the Maven compiler plugin will compile Java source files -->
152 |             <plugin>
153 |                 <groupId>org.apache.maven.plugins</groupId>
154 |                 <artifactId>maven-compiler-plugin</artifactId>
155 |                 <version>3.8.0</version>
156 |                 <configuration>
157 |                     <source>1.8</source>
158 |                     <target>1.8</target>
159 |                 </configuration>
160 |             </plugin>
161 |             <plugin>
162 |                 <groupId>org.apache.maven.plugins</groupId>
163 |                 <artifactId>maven-resources-plugin</artifactId>
164 |                 <version>3.0.2</version>
165 |                 <configuration>
166 |                     <encoding>UTF-8</encoding>
167 |                 </configuration>
168 |             </plugin>
169 | 
170 |             <plugin>
171 |                 <groupId>org.apache.maven.plugins</groupId>
172 |                 <artifactId>maven-shade-plugin</artifactId>
173 |                 <version>2.4.3</version>
174 |                 <executions>
175 |                     <execution>
176 |                         <phase>package</phase>
177 |                         <goals>
178 |                             <goal>shade</goal>
179 |                         </goals>
180 |                         <configuration>
181 |                             <transformers>
182 |                                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
183 |                             </transformers>
184 |                         </configuration>
185 |                     </execution>
186 |                 </executions>
187 |             </plugin>
188 | 
189 | 
190 |             <plugin>
191 |                 <groupId>org.apache.maven.plugins</groupId>
192 |                 <artifactId>maven-shade-plugin</artifactId>
193 |                 <version>2.4.3</version>
194 |                 <executions>
195 |                     <execution>
196 |                         <phase>package</phase>
197 |                         <goals>
198 |                             <goal>shade</goal>
199 |                         </goals>
200 |                         <configuration>
201 |                             <filters>
202 |                                 <filter>
203 |                                     <artifact>*:*</artifact>
204 |                                     <excludes>
205 |                                         <exclude>META-INF/*.SF</exclude>
206 |                                         <exclude>META-INF/*.DSA</exclude>
207 |                                         <exclude>META-INF/*.RSA</exclude>
208 |                                     </excludes>
209 |                                 </filter>
210 |                             </filters>
211 |                             <transformers>
212 |                                 <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
213 |                             </transformers>
214 |                         </configuration>
215 |                     </execution>
216 |                 </executions>
217 |             </plugin>
218 | 
219 |             <!-- the Maven Scala plugin will compile Scala source files -->
220 | 
221 | 
222 |             <!-- configure the eclipse plugin to generate eclipse project descriptors for a Scala project -->
223 | 
224 | 
225 |             <!-- allows the route to be run via 'mvn exec:java' -->
226 | 
227 | 
228 |         </plugins>
229 | 
230 | 
231 |     </build>
232 | 
233 | 
234 | </project>


--------------------------------------------------------------------------------
/spark/query/q1.sql:
--------------------------------------------------------------------------------
1 | select
2 |        commodity, count(userId) num, WINDOW(times, '10 seconds').start, WINDOW(times, '10 seconds').end
3 | from
4 |        shopping
5 | group BY
6 |        WINDOW(times, '10 seconds'), commodity


--------------------------------------------------------------------------------
/spark/query/q2.sql:
--------------------------------------------------------------------------------
1 | select
2 |        strategy, site, pos_id, WINDOW(click_time, '10 seconds').start, pos_id, WINDOW(click_time, '10 seconds').end, count(*) click_count
3 | from
4 |        click
5 | GROUP BY
6 |        strategy, site, pos_id, WINDOW(click_time, '10 seconds')


--------------------------------------------------------------------------------
/spark/query/q3.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         strategy, site, pos_id, WINDOW(imp_time, '10 seconds').start, pos_id, WINDOW(imp_time, '10 seconds').end, SUM(cost)
3 | FROM
4 |         imp
5 | GROUP BY
6 |         strategy, site, pos_id, WINDOW(imp_time, '10 seconds')


--------------------------------------------------------------------------------
/spark/query/q4.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |        b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id)
 3 | FROM
 4 |         click a
 5 | JOIN
 6 |         dau b
 7 | ON
 8 |         a.device_id = b.device_id AND a.click_time BETWEEN b.dau_time - INTERVAL 1 second AND b.dau_time + INTERVAL 1 second
 9 | GROUP BY
10 |         b.device_id, a.strategy, a.site, a.pos_id, WINDOW(a.click_time, '10 seconds')
11 | 


--------------------------------------------------------------------------------
/spark/query/q5.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm , MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm2, count(*)
3 | FROM
4 |         userVisit
5 | GROUP BY
6 |         sessionId, WINDOW(actionTime, '10 seconds')


--------------------------------------------------------------------------------
/spark/query/q6.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |       sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as len,  DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num
3 | FROM
4 |       userVisit
5 | GROUP BY
6 |       sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), WINDOW(actionTime, '10 seconds')
7 | HAVING
8 |       (MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))) < 100


--------------------------------------------------------------------------------
/spark/query/q7.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         WINDOW(actionTime, '10 seconds').start starts, WINDOW(actionTime, '10 seconds').end finish , cityId, payProductIds, count(*)
3 | FROM
4 |         userVisit
5 | WHERE
6 |         payProductIds IS NOT NULL
7 | GROUP BY
8 |         cityId, payProductIds, WINDOW(actionTime, '10 seconds')


--------------------------------------------------------------------------------
/spark/query/q8.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 |         WINDOW(actionTime, '10 seconds').start start, WINDOW(actionTime, '10 seconds').end finish ,count(*) as sequence
3 | FROM
4 |         userVisit
5 | WHERE
6 |         clickCategoryId IS NOT NULL
7 | GROUP BY
8 |         cityId, WINDOW(actionTime, '10 seconds')


--------------------------------------------------------------------------------
/spark/query/q9.sql:
--------------------------------------------------------------------------------
 1 | SELECT
 2 |        a.device_id, a.strategy, a.site, a.pos_id, b.var1, count(*)
 3 | FROM
 4 |        (SELECT device_id, strategy, site, pos_id FROM click) a
 5 | JOIN
 6 |        (SELECT device_id, dau_time as var1 FROM dau) b
 7 | ON
 8 |        a.device_id = b.device_id
 9 | GROUP BY
10 |        a.device_id, a.strategy, a.site, a.pos_id, b.var1


--------------------------------------------------------------------------------
/spark/result/result.log:
--------------------------------------------------------------------------------
1 | Finished time: 2019-11-05 20:56:58; q9.sql  Runtime: 62 TPS:5884
2 | 


--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/spark/Benchmark.java:
--------------------------------------------------------------------------------
  1 | package com.intel.streaming_benchmark.spark;
  2 | 
  3 | import com.intel.streaming_benchmark.common.*;
  4 | import com.intel.streaming_benchmark.utils.SchemaProvider;
  5 | import com.intel.streaming_benchmark.utils.SparkBenchConfig;
  6 | import org.apache.spark.api.java.JavaSparkContext;
  7 | import org.apache.spark.api.java.function.MapPartitionsFunction;
  8 | import org.apache.spark.sql.Dataset;
  9 | import org.apache.spark.sql.Row;
 10 | import org.apache.spark.sql.RowFactory;
 11 | import org.apache.spark.sql.SparkSession;
 12 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
 13 | import org.apache.spark.sql.streaming.StreamingQuery;
 14 | import org.apache.spark.sql.streaming.Trigger;
 15 | import org.apache.spark.util.LongAccumulator;
 16 | import com.alibaba.fastjson.JSONObject;
 17 | import java.io.BufferedWriter;
 18 | import java.io.File;
 19 | import java.io.FileWriter;
 20 | import java.sql.Timestamp;
 21 | import java.util.*;
 22 | 
 23 | public class Benchmark {
 24 |     public static void main(String[] args) throws Exception {
 25 |         if (args.length < 2)
 26 |             BenchLogUtil.handleError("Usage: RunBench <ConfigFile> <QueryName>");
 27 | 
 28 |         ConfigLoader cl = new ConfigLoader(args[0]);
 29 |         String benchmarkConfDir = new File(args[0]).getParent();
 30 |         //spark config
 31 |         String sparkConf = benchmarkConfDir + "/../spark/conf/benchmarkConf.yaml";
 32 |         cl.merge(sparkConf);
 33 |         // Prepare configuration
 34 |         SparkBenchConfig conf = new SparkBenchConfig();
 35 |         conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST);
 36 |         conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST);
 37 |         conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP);
 38 |         conf.topic = QueryConfig.getTables(args[1]);
 39 |         conf.sqlLocation = benchmarkConfDir + "/../spark/query";
 40 |         conf.resultLocation = benchmarkConfDir + "/../spark/result";
 41 |         conf.sqlName = args[1];
 42 |         conf.runTime = Integer.valueOf(args[2]);
 43 |         runQuery(conf);
 44 |     }
 45 | 
 46 |     public static void runQuery(SparkBenchConfig config) throws Exception {
 47 | 
 48 |         //create SparkSession
 49 |         SparkSession spark = SparkSession
 50 |                 .builder()
 51 |                 .appName(config.sqlName)
 52 | //                .master("local[2]")
 53 |                 .getOrCreate();
 54 |         JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
 55 | 
 56 |         String[] topics =  config.topic.split(",");
 57 |         Dataset<Row> df;
 58 |         LongAccumulator longAccumulator = jsc.sc().longAccumulator();
 59 |         Long startTime= System.currentTimeMillis();
 60 | 
 61 |         //generate table
 62 |         for(int i = 0; i < topics.length; i++){
 63 |             ExpressionEncoder<Row> encoder = SchemaProvider.provideSchema(topics[i]);
 64 |             if(topics[i].equals("shopping")){
 65 |                 //read data from kafka and get primary data which need to be paresd to mutiple columns.
 66 |                 df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction<Row, Row>() {
 67 |                             @Override
 68 |                             public Iterator<Row> call(Iterator<Row> input) throws Exception {
 69 |                                 List<Row> rows = new ArrayList<>();
 70 |                                 while (input.hasNext()) {
 71 |                                     longAccumulator.add(1);
 72 |                                     Row next = input.next();
 73 |                                     String[] split = next.getString(0).split(",");
 74 |                                     rows.add(RowFactory.create(split[0],split[1],Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[2])))));
 75 |                                 }
 76 |                                 return rows.iterator();
 77 |                             }
 78 |                         }, encoder).withWatermark("times", "4 seconds");
 79 |          //               .
 80 |             }else if(topics[i].equals("click")){
 81 |                 df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction<Row, Row>() {
 82 |                             @Override
 83 |                             public Iterator<Row> call(Iterator<Row> input) throws Exception {
 84 |                                 List<Row> rows = new ArrayList<>();
 85 |                                 while (input.hasNext()) {
 86 |                                     longAccumulator.add(1);
 87 |                                     JSONObject obj = JSONObject.parseObject(input.next().getString(0));
 88 |            //                         JSONObject obj = new JSONObject(input.next().getString(0));
 89 |                                     rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("click_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getString("device_id")));
 90 |                                 }
 91 |                                 return rows.iterator();
 92 |                             }
 93 |                         }, encoder).withWatermark("click_time", "4 seconds");
 94 | 
 95 |             }else if(topics[i].equals("imp")){
 96 |                 df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction<Row, Row>() {
 97 |                             @Override
 98 |                             public Iterator<Row> call(Iterator<Row> input) throws Exception {
 99 |                                 List<Row> rows = new ArrayList<>();
100 |                                 while (input.hasNext()) {
101 |                                     longAccumulator.add(1);
102 |                                     JSONObject obj = JSONObject.parseObject(input.next().getString(0));
103 |            //                         JSONObject obj = new JSONObject(input.next().getString(0));
104 |                                     rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("imp_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getDouble("cost"), obj.getString("device_id")));
105 |                                 }
106 |                                 return rows.iterator();
107 |                             }
108 |                         }, encoder).withWatermark("imp_time", "4 seconds");
109 |             }else if(topics[i].equals("dau")){
110 |                 df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList)
111 |                         .option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction<Row, Row>() {
112 |                             @Override
113 |                             public Iterator<Row> call(Iterator<Row> input) throws Exception {
114 |                                 List<Row> rows = new ArrayList<>();
115 |                                 while (input.hasNext()) {
116 |                                     longAccumulator.add(1);
117 |                                     JSONObject obj = JSONObject.parseObject(input.next().getString(0));
118 |            //                         JSONObject obj = new JSONObject(input.next().getString(0));
119 |                                     rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("dau_time"))), obj.getString("device_id")));
120 |                                 }
121 |                                 return rows.iterator();
122 |                             }
123 |                         }, encoder).withWatermark("dau_time", "4 seconds");
124 |             }else if(topics[i].equals("userVisit")){
125 |                 df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction<Row, Row>() {
126 |                             @Override
127 |                             public Iterator<Row> call(Iterator<Row> input) throws Exception {
128 |                                 List<Row> rows = new ArrayList<>();
129 |                                 while (input.hasNext()) {
130 |                                     longAccumulator.add(1);
131 |                                     String[] split = input.next().getString(0).split(",");
132 |                                     rows.add(RowFactory.create(split[0], Long.valueOf(split[1]), split[2], Long.valueOf(split[3]), Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[4]))), split[5], split[6], split[7], split[8], split[9], split[10], split[11], Integer.valueOf(split[12])));
133 |                                 }
134 |                                 return rows.iterator();
135 |                             }
136 |                         }, encoder).withWatermark("actionTime", "4 seconds");
137 |             }else{
138 |                 System.out.println("No such topic, please check your benchmarkConf.yaml");
139 |                 return;
140 |             }
141 | 
142 |             df.createOrReplaceTempView(topics[i]);
143 |         }
144 | 
145 |         //runQuery
146 |         File file = new File(config.sqlLocation + "/" + config.sqlName);
147 |         if (!file.exists()) {
148 |             return;
149 |         }
150 |         try {
151 |             String queryString = DateUtils.fileToString(file);
152 |             Dataset<Row> sql = spark.sql(queryString);
153 |             StreamingQuery start = sql.writeStream().outputMode("append").format("console").trigger(Trigger.ProcessingTime("30 seconds")).start();
154 |             start.awaitTermination(config.runTime * 1000);
155 |             System.out.println("2 Total number: " +  longAccumulator.value());
156 | 
157 |         } catch (Exception e) {
158 |             e.printStackTrace();
159 |         }
160 |         Long finishTime= System.currentTimeMillis();
161 |         Long runningTime = (finishTime - startTime) / 1000;
162 |         File resultFile = new File(config.resultLocation + "/result.log" );
163 |         if (!resultFile.exists()) {
164 |             resultFile.createNewFile();
165 |         }
166 |         FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log" , true);
167 |         BufferedWriter bufferWriter = new BufferedWriter(fileWriter);
168 |         bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(finishTime) + "; " + config.sqlName + "  Runtime: " + runningTime + " TPS:" + longAccumulator.value()/runningTime + "\r\n");
169 |         bufferWriter.close();
170 | 
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/utils/SchemaProvider.java:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark.utils;
 2 | 
 3 | import org.apache.spark.sql.Row;
 4 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
 5 | import org.apache.spark.sql.catalyst.encoders.RowEncoder;
 6 | import org.apache.spark.sql.types.DataTypes;
 7 | import org.apache.spark.sql.types.StructType;
 8 | 
 9 | public class SchemaProvider {
10 | 
11 |     public static ExpressionEncoder<Row> provideSchema(String topic){
12 |         StructType type = new StructType();
13 |         if(topic.equals("shopping")){
14 |             type = type.add("userID", DataTypes.StringType)
15 |                     .add("commodity", DataTypes.StringType)
16 |                     .add("times", DataTypes.TimestampType);
17 |         }else if(topic.equals("click")){
18 |             type = type.add("click_time", DataTypes.TimestampType)
19 |                     .add("strategy", DataTypes.StringType)
20 |                     .add("site", DataTypes.StringType)
21 |                     .add("pos_id", DataTypes.StringType)
22 |                     .add("poi_id", DataTypes.StringType)
23 |                     .add("device_id", DataTypes.StringType);
24 |         }else if(topic.equals("imp")){
25 |             type = type.add("imp_time", DataTypes.TimestampType)
26 |                     .add("strategy", DataTypes.StringType)
27 |                     .add("site", DataTypes.StringType)
28 |                     .add("pos_id", DataTypes.StringType)
29 |                     .add("poi_id", DataTypes.StringType)
30 |                     .add("cost", DataTypes.DoubleType)
31 |                     .add("device_id", DataTypes.StringType);
32 |         }else if(topic.equals("dau")){
33 |             type = type.add("dau_time", DataTypes.TimestampType)
34 |                     .add("device_id", DataTypes.StringType);
35 |         }else if(topic.equals("userVisit")){
36 |             type = type.add("date", DataTypes.StringType)
37 |                     .add("userId", DataTypes.LongType)
38 |                     .add("sessionId", DataTypes.StringType)
39 |                     .add("pageId", DataTypes.LongType)
40 |                     .add("actionTime", DataTypes.TimestampType)
41 |                     .add("searchKeyword", DataTypes.StringType)
42 |                     .add("clickCategoryId", DataTypes.StringType)
43 |                     .add("clickProductId", DataTypes.StringType)
44 |                     .add("orderCategoryIds", DataTypes.StringType)
45 |                     .add("orderProductIds", DataTypes.StringType)
46 |                     .add("payCategoryIds", DataTypes.StringType)
47 |                     .add("payProductIds", DataTypes.StringType)
48 |                     .add("cityId", DataTypes.IntegerType);
49 |         }else {
50 |             System.out.println("No such table schema!!!");
51 |             return null;
52 |         }
53 | 
54 |         return RowEncoder.apply(type);
55 | 
56 |     }
57 | 
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/spark/src/main/java/com/intel/streaming_benchmark/utils/SparkBenchConfig.java:
--------------------------------------------------------------------------------
 1 | package com.intel.streaming_benchmark.utils;
 2 | 
 3 | public class SparkBenchConfig {
 4 |     // Kafka related
 5 |     public String zkHost;
 6 |     public String brokerList;
 7 |     public String topic;
 8 |     public String consumerGroup;
 9 |     public String valueDeserializer;
10 |     public String keyDeserializer;
11 | 
12 | 
13 | //  public String offsetReset;
14 | //  public String reportTopic;
15 | 
16 |     // Spark related
17 |     public long checkpointDuration;
18 |     public String resultLocation;
19 |     public String sqlLocation;
20 |     public String sqlName;
21 |     public String timeType;
22 | 
23 | 
24 |     public int runTime;
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/utils/dataGenerator.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | curDir=$(cd `dirname $0`;pwd)
 4 | #curDir=`dirname $0`
 5 | echo $curDir
 6 | rootDir=$(dirname $curDir)
 7 | echo $rootDir
 8 | 
 9 | DATAGEN_TIME=$1
10 | THREAD_PER_NODE=$2
11 | SQL=$3
12 | ENGINE=$4
13 | 
14 | 
15 | /opt/Beaver/jdk/bin/java -cp $rootDir/dataGen/target/dataGen-1.0-SNAPSHOT.jar com.intel.streaming_benchmark.Datagen $DATAGEN_TIME $THREAD_PER_NODE $SQL $rootDir/conf/benchmarkConf.yaml >> $rootDir/$ENGINE/log/dataGen_${SQL}.log 2>&1 &
16 | 


--------------------------------------------------------------------------------