├── README.md ├── bin ├── runAll.sh ├── runFlinkBenchmark.sh └── runSparkBenchmark.sh ├── common ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── intel │ │ └── streaming_benchmark │ │ └── common │ │ ├── BenchLogUtil.java │ │ ├── ConfigLoader.java │ │ ├── DateUtils.java │ │ └── StreamBenchConfig.java │ └── scala │ └── com │ └── intel │ └── streaming_benchmark │ └── common │ ├── QueryConfig.scala │ ├── Schema.scala │ └── TableSchemaProvider.scala ├── conf ├── benchmarkConf.yaml ├── dataGenHosts ├── env └── queriesToRun ├── dataGen ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── intel │ │ └── streaming_benchmark │ │ ├── Datagen.java │ │ └── utils │ │ ├── ConfigLoader.java │ │ ├── Constants.java │ │ └── GetProducer.java │ └── scala │ └── com │ └── intel │ └── streaming_benchmark │ ├── ClickProducer.scala │ └── click.scala ├── flink ├── conf │ └── benchmarkConf.yaml ├── log │ ├── q1.sql.log │ ├── q10.sql.log │ ├── q11.sql.log │ ├── q12.sql.log │ ├── q2.sql.log │ ├── q3.sql.log │ ├── q4.sql.log │ ├── q5.sql.log │ ├── q6.sql.log │ ├── q7.sql.log │ ├── q8.sql.log │ └── q9.sql.log ├── pom.xml ├── query │ ├── q1.sql │ ├── q10.sql │ ├── q11.sql │ ├── q12.sql │ ├── q2.sql │ ├── q3.sql │ ├── q4.sql │ ├── q5.sql │ ├── q6.sql │ ├── q7.sql │ ├── q8.sql │ └── q9.sql ├── result │ └── result.log └── src │ └── main │ └── java │ └── com │ └── intel │ └── streaming_benchmark │ ├── flink │ └── Benchmark.java │ └── utils │ └── FlinkBenchConfig.java ├── pom.xml ├── spark ├── conf │ └── benchmarkConf.yaml ├── log │ ├── q1.sql.log │ ├── q2.sql.log │ ├── q3.sql.log │ ├── q4.sql.log │ ├── q5.sql.log │ ├── q6.sql.log │ ├── q7.sql.log │ ├── q8.sql.log │ └── q9.sql.log ├── pom.xml ├── query │ ├── q1.sql │ ├── q2.sql │ ├── q3.sql │ ├── q4.sql │ ├── q5.sql │ ├── q6.sql │ ├── q7.sql │ ├── q8.sql │ └── q9.sql ├── result │ └── result.log └── src │ └── main │ └── java │ └── com │ └── intel │ └── streaming_benchmark │ ├── spark │ └── Benchmark.java │ └── utils │ ├── SchemaProvider.java │ └── SparkBenchConfig.java └── utils └── dataGenerator.sh /README.md: -------------------------------------------------------------------------------- 1 | # Streaming_benchmark 2 | Streaming Benchmark is designed to measure the performance of stream processing system such as flink and spark. Three use cases are simulated (User Visit Session Analysis, Evaluation of Real-time Advertising and Shopping Record Analysis). Raw data is generated and stored in Kafka. Streams map into streaming tables and queries act on these tables. 3 | 4 | ## Building 5 | ``` 6 | mvn clean package 7 | ``` 8 | ## Prerequisites 9 | You should have Apache Kafka, Apache zookeeper, Apache Spark and Flink-1.9 installed in your cluster. 10 | 11 | ## Setup 12 | 1. Clone the project into your master. 13 | 2. Update conf/benchmarkConf.yaml (The properties of Kafka, Zookeeper, benchmark...) 14 | ``` 15 | streambench.zkHost ip1:2181,ip2:2181,ip3:2181... 16 | streambench.kafka.brokerList ip1:port1,ip1:port2... 17 | streambench.kafka.consumerGroup benchmark(default) 18 | ``` 19 | 3. Update flink/conf/benchmarkConf.yaml (The properties of flink) 20 | ``` 21 | streambench.flink.checkpointDuration 5000 22 | streambench.flink.timeType EventTime(Use EventTime or ProcessTime) 23 | ``` 24 | 4. Update conf/dataGenHosts (The hosts where data will be generated; suggest to generate data on kafka node) 25 | ``` 26 | ip1 27 | ip2 28 | ... 29 | ``` 30 | 5. Update conf/queriesToRun (The queries will be run) 31 | ``` 32 | q1.sql 33 | q2.sql 34 | q3.sql 35 | ... 36 | ``` 37 | 6. Update conf/env 38 | ``` 39 | export DATAGEN_TIME=100 (Running time for each query) 40 | export THREAD_PER_NODE=10(The number of thread to generate data for per node.) 41 | export FLINK_HOME={FLINK_HOME} 42 | export SPARK_HOME={SPARK_HOME} 43 | ``` 44 | 7. Copy the project to every node which will generate data (the same hosts in conf/dataGenHosts) and ensure that the master node can log in these hosts without password. 45 | 46 | ## Run Benchmark 47 | Start Zookeeper, kafka, Spark, Flink first. 48 | Run flink benchmark: `sh bin/runFlinkBenchmark.sh`. 49 | Run spark benchmark: `sh bin/runSparkBenchmark.sh`. 50 | Run both flink and spark benchmark: `sh bin/runAll.sh`. 51 | 52 | ## Result 53 | The results will be save on flink/result/result.log and spark/result/result.log and the format of result is just like below: 54 | ``` 55 | Finished time: 2019-10-30 19:07:26; q1.sql Runtime: 58s TPS:10709265 56 | Finished time: 2019-10-30 19:08:37; q2.sql Runtime: 57s TPS:8061793 57 | Finished time: 2019-10-30 19:09:51; q5.sql Runtime: 57s TPS:4979921 58 | ``` 59 | -------------------------------------------------------------------------------- /bin/runAll.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curDir=$(cd `dirname $0`;pwd) 4 | rootDir=$(dirname $curDir) 5 | 6 | if [ -e $rootDir/conf/env ]; then 7 | source $rootDir/conf/env 8 | fi 9 | 10 | mainClass1=com.intel.streaming_benchmark.flink.Benchmark 11 | mainClass2=com.intel.streaming_benchmark.spark.Benchmark 12 | dataGenClass=com.intel.streaming_benchmark.Datagen 13 | HOSTNAME=`hostname` 14 | 15 | echo "Run Flink benchmark!" 16 | for sql in `cat $rootDir/conf/queriesToRun`; 17 | do 18 | echo "Data generator start!" 19 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql flink"; done 20 | echo "RUNING $sql" 21 | nohup $FLINK_HOME/bin/flink run -c $mainClass1 $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 & 22 | sleep $DATAGEN_TIME 23 | FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true` 24 | $FLINK_HOME/bin/flink cancel $FLINK_ID 25 | echo $FLINK_ID 26 | sleep 10 27 | done 28 | 29 | sleep 30 30 | 31 | echo "Run Spark benchmark!" 32 | for sql in `cat $rootDir/conf/queriesToRun`; 33 | do 34 | echo "Data generator start!" 35 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $TPS $sql spark"; done 36 | echo "RUNING $sql" 37 | nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 & 38 | sleep $DATAGEN_TIME 39 | done -------------------------------------------------------------------------------- /bin/runFlinkBenchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curDir=$(cd `dirname $0`;pwd) 4 | rootDir=$(dirname $curDir) 5 | 6 | if [ -e $rootDir/conf/env ]; then 7 | source $rootDir/conf/env 8 | fi 9 | 10 | mainClass=com.intel.streaming_benchmark.flink.Benchmark 11 | dataGenClass=com.intel.streaming_benchmark.Datagen 12 | 13 | for sql in `cat $rootDir/conf/queriesToRun`; 14 | do 15 | echo "Data generator start!" 16 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql flink"; done 17 | echo "RUNING $sql" 18 | nohup $FLINK_HOME/bin/flink run -c $mainClass $rootDir/flink/target/flink-1.0-SNAPSHOT.jar $CONF $sql >> $rootDir/flink/log/${sql}.log 2>&1 & 19 | sleep $DATAGEN_TIME 20 | FLINK_ID=`"$FLINK_HOME/bin/flink" list | grep "$sql" | awk '{print $4}'; true` 21 | $FLINK_HOME/bin/flink cancel $FLINK_ID 22 | echo $FLINK_ID 23 | sleep 10 24 | done 25 | 26 | -------------------------------------------------------------------------------- /bin/runSparkBenchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curDir=$(cd `dirname $0`;pwd) 4 | rootDir=$(dirname $curDir) 5 | 6 | if [ -e $rootDir/conf/env ]; then 7 | source $rootDir/conf/env 8 | fi 9 | 10 | mainClass=com.intel.streaming_benchmark.spark.Benchmark 11 | dataGenClass=com.intel.streaming_benchmark.Datagen 12 | HOSTNAME=`hostname` 13 | 14 | for sql in `cat $rootDir/conf/queriesToRun`; 15 | do 16 | echo "Data generator start!" 17 | for host in `cat $rootDir/conf/dataGenHosts`;do ssh $host "sh $rootDir/utils/dataGenerator.sh $DATAGEN_TIME $THREAD_PER_NODE $sql spark"; done 18 | echo "RUNING $sql" 19 | nohup $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME >> $rootDir/spark/log/${sql}.log 2>&1 & 20 | # $SPARK_HOME/bin/spark-submit --master spark://${HOSTNAME}:7077 --class $mainClass --deploy-mode client $rootDir/spark/target/spark-1.0-SNAPSHOT.jar $CONF $sql $DATAGEN_TIME 21 | sleep $DATAGEN_TIME 22 | done 23 | -------------------------------------------------------------------------------- /common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | streaming_benchmark 7 | com.intel.streaming_benchmark 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | common 13 | 14 | 15 | 16 | 17 | org.codehaus.mojo 18 | build-helper-maven-plugin 19 | 1.4 20 | 21 | 22 | add-source 23 | generate-sources 24 | 25 | add-source 26 | 27 | 28 | 29 | src/main/scala 30 | src/main/java 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | net.alchim31.maven 39 | scala-maven-plugin 40 | 3.2.2 41 | 42 | 43 | scala-compile-first 44 | process-resources 45 | 46 | add-source 47 | compile 48 | 49 | 50 | 51 | scala-test-compile 52 | process-test-resources 53 | 54 | testCompile 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /common/src/main/java/com/intel/streaming_benchmark/common/BenchLogUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.intel.streaming_benchmark.common; 19 | 20 | import java.io.File; 21 | import java.io.PrintWriter; 22 | 23 | public class BenchLogUtil { 24 | private static PrintWriter out; 25 | 26 | public static void init() throws Exception { 27 | File file = new File("/tmp/benchlog-flink.txt"); 28 | out = new PrintWriter(file); 29 | } 30 | 31 | public static void logMsg(String msg) { 32 | try { 33 | if (out == null) { 34 | init(); 35 | } 36 | } catch (Exception e) { 37 | e.printStackTrace(); 38 | } 39 | out.println(msg); 40 | out.flush(); 41 | System.out.println(msg); 42 | } 43 | 44 | public static void close() { 45 | if (out != null) { 46 | out.close(); 47 | } 48 | } 49 | 50 | public static void handleError(String msg) { 51 | System.err.println(msg); 52 | System.exit(1); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /common/src/main/java/com/intel/streaming_benchmark/common/ConfigLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.intel.streaming_benchmark.common; 19 | 20 | import java.io.BufferedReader; 21 | import java.io.FileNotFoundException; 22 | import java.io.FileReader; 23 | import java.io.IOException; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | public class ConfigLoader { 28 | private String ConfigFileName = null; 29 | private Map store; 30 | 31 | public ConfigLoader(String filename){ 32 | ConfigFileName = filename; 33 | store = new HashMap(); 34 | // Load and parse config 35 | try { 36 | BufferedReader br = new BufferedReader(new FileReader(filename)); 37 | String line = br.readLine(); 38 | while(line != null){ 39 | if ((line.length()>0) && (line.charAt(0)!='#')) { 40 | String[] words = line.split("\\s+"); 41 | if (words.length == 2) { 42 | String key = words[0]; 43 | String value = words[1]; 44 | store.put(key, value); 45 | } else if (words.length == 1) { 46 | String key = words[0]; 47 | store.put(key, ""); 48 | } else { 49 | if (!line.startsWith("streambench")) 50 | System.out.println("Warning: unknown config parsed, skip:" + line); 51 | } 52 | } 53 | line = br.readLine(); 54 | } 55 | } catch (FileNotFoundException e) { 56 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:"); 57 | } catch (IOException e) { 58 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:"); 59 | e.printStackTrace(); 60 | } 61 | } 62 | 63 | public String getProperty(String key){ 64 | if (store.containsKey(key)) 65 | return (String) store.get(key); 66 | else { 67 | System.out.println("ERROR: Unknown config key:" + key); 68 | return null; 69 | } 70 | } 71 | 72 | public void merge(String fileName){ 73 | 74 | try{ 75 | BufferedReader br = new BufferedReader(new FileReader(fileName)); 76 | String line = br.readLine(); 77 | while(line != null) { 78 | if ((line.length() > 0) && (line.charAt(0) != '#')) { 79 | String[] words = line.split("\\s+"); 80 | String key = words[0]; 81 | String value = words[1]; 82 | if(store.containsKey(key)){ 83 | store.replace(key,value); 84 | }else { 85 | store.put(key, value); 86 | } 87 | } 88 | line = br.readLine(); 89 | } 90 | }catch (FileNotFoundException e) { 91 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:"); 92 | } catch (IOException e) { 93 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:"); 94 | e.printStackTrace(); 95 | } 96 | 97 | 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /common/src/main/java/com/intel/streaming_benchmark/common/DateUtils.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.common; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.text.SimpleDateFormat; 7 | import java.util.Calendar; 8 | import java.util.Date; 9 | import java.util.Random; 10 | 11 | /** 12 | * Time data format converter 13 | */ 14 | public class DateUtils { 15 | public static final int dayOfMillis = 86400000; 16 | public static final String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; 17 | public static final String DATE_FORMAT = "yyyy-MM-dd"; 18 | public static final String DATEKEY_FORMAT = "yyyyMMdd"; 19 | 20 | /** 21 | * Convert millisecond timestamps into: yyyy-MM-dd HH:mm:ss 22 | * 23 | * @param time 24 | * @return 25 | */ 26 | public static String parseLong2String(long time) { 27 | return parseLong2String(time, TIME_FORMAT); 28 | } 29 | 30 | /** 31 | * Convert millisecond timestamps into defined date format 32 | * 33 | * @param time 34 | * @param pattern 35 | * @return 36 | */ 37 | public static String parseLong2String(long time, String pattern) { 38 | return parseLong2String(time, new SimpleDateFormat(pattern)); 39 | } 40 | 41 | /** 42 | * Convert millisecond timestamps into date according to formatter 43 | * 44 | * @param time 45 | * @param sdf 46 | * @return 47 | */ 48 | public static String parseLong2String(long time, SimpleDateFormat sdf) { 49 | Calendar cal = Calendar.getInstance(); 50 | cal.setTimeInMillis(time); 51 | return sdf.format(cal.getTime()); 52 | } 53 | 54 | /** 55 | * Convert string time into long timestamps 56 | * 57 | * @param date time type,format:yyyy-MM-dd HH:mm:ss 58 | * @return 59 | */ 60 | public static long parseString2Long(String date) { 61 | return parseString2Long(date, TIME_FORMAT); 62 | } 63 | 64 | /** 65 | * Convert string time into long timestamps according to the time format string 66 | * 67 | * @param date 68 | * @param pattern 69 | * @return 70 | */ 71 | public static long parseString2Long(String date, String pattern) { 72 | return parseString2Long(date, new SimpleDateFormat(pattern)); 73 | } 74 | 75 | /** 76 | * Convert string time into long timestamps according to the time format string 77 | * 78 | * @param date 79 | * @param sdf 80 | * @return 81 | */ 82 | public static long parseString2Long(String date, SimpleDateFormat sdf) { 83 | try { 84 | return sdf.parse(date).getTime(); 85 | } catch (Exception e) { 86 | throw new RuntimeException(e); 87 | } 88 | } 89 | 90 | /** 91 | * Convert long timestamps into the value according to the time type 92 | * 93 | * @param millis milliseconds timestamp 94 | * @param type time type 95 | * @return 96 | */ 97 | public static int getSpecificDateValueOfDateTypeEnum(long millis, DateTypeEnum type) { 98 | Calendar cal = Calendar.getInstance(); 99 | cal.setTimeInMillis(millis); 100 | switch (type) { 101 | case YEAR: 102 | return cal.get(Calendar.YEAR); 103 | case MONTH: 104 | return cal.get(Calendar.MONTH) + 1; 105 | case DAY: 106 | return cal.get(Calendar.DAY_OF_MONTH); 107 | case HOUR: 108 | return cal.get(Calendar.HOUR_OF_DAY); 109 | case MINUTE: 110 | return cal.get(Calendar.MINUTE); 111 | case SECOND: 112 | return cal.get(Calendar.SECOND); 113 | case MILLISECOND: 114 | return cal.get(Calendar.MILLISECOND); 115 | } 116 | 117 | throw new IllegalArgumentException("Parameter exception"); 118 | } 119 | 120 | /** 121 | * get the date of the day,format:yyyy-MM-dd 122 | * 123 | * @return Date of the day 124 | */ 125 | public static String getTodayDate() { 126 | return new SimpleDateFormat(DATE_FORMAT).format(new Date()); 127 | } 128 | 129 | /** 130 | * Get a random milliseconds timestamps of today 131 | * 132 | * @param random 133 | * @return 134 | */ 135 | public static long getRandomTodayTimeOfMillis(Random random) { 136 | Calendar cal = Calendar.getInstance(); 137 | cal.set(Calendar.HOUR_OF_DAY, 0); 138 | cal.set(Calendar.MINUTE, 0); 139 | cal.set(Calendar.SECOND, 0); 140 | cal.set(Calendar.MILLISECOND, 0); 141 | if (random.nextDouble() <= 0.7) { 142 | // [0-21] => 70% 143 | int millis = dayOfMillis / 8 * 7; 144 | cal.add(Calendar.MILLISECOND, 1 + random.nextInt(millis)); 145 | } else { 146 | // [1-23] => 30% 147 | int millis = dayOfMillis / 24; 148 | cal.add(Calendar.MILLISECOND, millis + random.nextInt(millis * 23)); 149 | } 150 | return cal.getTimeInMillis(); 151 | } 152 | 153 | /** 154 | * Time type 155 | */ 156 | public static enum DateTypeEnum { 157 | YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND 158 | } 159 | 160 | /** 161 | * Judge if time1 is before time2 162 | * 163 | * @param time1 164 | * @param time2 165 | * @return Judgement result 166 | */ 167 | public static boolean before(String time1, String time2) { 168 | try { 169 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT); 170 | Date dateTime1 = sdf.parse(time1); 171 | Date dateTime2 = sdf.parse(time2); 172 | 173 | if (dateTime1.before(dateTime2)) { 174 | return true; 175 | } 176 | } catch (Exception e) { 177 | e.printStackTrace(); 178 | } 179 | return false; 180 | } 181 | 182 | /** 183 | * Judge if time1 is after time2 184 | * 185 | * @param time1 186 | * @param time2 187 | * @return Judgement result 188 | */ 189 | public static boolean after(String time1, String time2) { 190 | try { 191 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT); 192 | Date dateTime1 = sdf.parse(time1); 193 | Date dateTime2 = sdf.parse(time2); 194 | 195 | if (dateTime1.after(dateTime2)) { 196 | return true; 197 | } 198 | } catch (Exception e) { 199 | e.printStackTrace(); 200 | } 201 | return false; 202 | } 203 | 204 | /** 205 | * Calculate time difference(Unit: second) 206 | * 207 | * @param time1 208 | * @param time2 209 | * @return difference 210 | */ 211 | public static int minus(String time1, String time2) { 212 | try { 213 | SimpleDateFormat sdf = new SimpleDateFormat(TIME_FORMAT); 214 | Date datetime1 = sdf.parse(time1); 215 | Date datetime2 = sdf.parse(time2); 216 | 217 | long millisecond = datetime1.getTime() - datetime2.getTime(); 218 | 219 | return Integer.valueOf(String.valueOf(millisecond / 1000)); 220 | } catch (Exception e) { 221 | e.printStackTrace(); 222 | } 223 | return 0; 224 | } 225 | 226 | /** 227 | *get year, month, day and hour 228 | * 229 | * @param datetime time(yyyy-MM-dd HH:mm:ss) 230 | * @return result(yyyy-MM-dd_HH) 231 | */ 232 | public static String getDateHour(String datetime) { 233 | String date = datetime.split(" ")[0]; 234 | String hourMinuteSecond = datetime.split(" ")[1]; 235 | String hour = hourMinuteSecond.split(":")[0]; 236 | return date + "_" + hour; 237 | } 238 | 239 | /** 240 | * get the date of yesterday(yyyy-MM-dd) 241 | * 242 | * @return the date of yesterday 243 | */ 244 | public static String getYesterdayDate() { 245 | Calendar cal = Calendar.getInstance(); 246 | cal.setTime(new Date()); 247 | cal.add(Calendar.DAY_OF_YEAR, -1); 248 | 249 | Date date = cal.getTime(); 250 | 251 | SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT); 252 | return sdf.format(date); 253 | } 254 | 255 | /** 256 | * format date,reserve minute 257 | * yyyyMMddHHmm 258 | * 259 | * @param date 260 | * @return 261 | */ 262 | public static String formatTimeMinute(Date date) { 263 | SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm"); 264 | return sdf.format(date); 265 | } 266 | 267 | public static String fileToString(File file) throws Exception{ 268 | FileInputStream inStream = new FileInputStream(file); 269 | ByteArrayOutputStream outStream = new ByteArrayOutputStream(); 270 | try { 271 | 272 | Boolean reading = true; 273 | while (reading) { 274 | int c = inStream.read(); 275 | if(c == -1){ 276 | reading = false; 277 | }else{ 278 | outStream.write(c); 279 | } 280 | } 281 | outStream.flush(); 282 | }catch (Exception e){ 283 | System.err.println(e.getMessage()); 284 | }finally { 285 | inStream.close(); 286 | } 287 | return new String(outStream.toByteArray(), "UTF-8"); 288 | } 289 | 290 | } 291 | -------------------------------------------------------------------------------- /common/src/main/java/com/intel/streaming_benchmark/common/StreamBenchConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.intel.streaming_benchmark.common; 19 | 20 | /** 21 | * All name of configurations used in StreamBench are defined here. Later I plan to refactor 22 | * property name. With this mapping layer, the underlying Java/Scala code don't need to be 23 | * changed. 24 | */ 25 | public class StreamBenchConfig { 26 | // ===================================== 27 | // General StreamBench Conf 28 | // ===================================== 29 | public static String ZK_HOST = "streambench.zkHost"; 30 | 31 | public static String CONSUMER_GROUP = "streambench.kafka.consumerGroup"; 32 | 33 | public static String KAFKA_BROKER_LIST = "streambench.kafka.brokerList"; 34 | 35 | public static String DATAGEN_TIME = "streambench.dataGen.time"; 36 | 37 | public static String DATAGEN_THROUGHPUT = "streambench.dataGen.throughput"; 38 | 39 | 40 | 41 | // ===================================== 42 | // Data Generator Related Conf 43 | // ===================================== 44 | // public static String DATAGEN_RECORDS_PRE_INTERVAL = "hibench.streambench.datagen.recordsPerInterval"; 45 | // 46 | // public static String DATAGEN_INTERVAL_SPAN = "hibench.streambench.datagen.intervalSpan"; 47 | // 48 | // public static String DATAGEN_TOTAL_RECORDS = "hibench.streambench.datagen.totalRecords"; 49 | // 50 | // public static String DATAGEN_TOTAL_ROUNDS = "hibench.streambench.datagen.totalRounds"; 51 | // 52 | // public static String DATAGEN_RECORD_LENGTH = "hibench.streambench.datagen.recordLength"; 53 | // 54 | // public static String DATAGEN_PRODUCER_NUMBER = "hibench.streambench.datagen.producerNumber"; 55 | // ===================================== 56 | // Spark Streaming Related Conf 57 | // ===================================== 58 | // public static String SPARK_BATCH_INTERVAL = "hibench.streambench.spark.batchInterval"; 59 | // 60 | // public static String SPARK_CHECKPOINT_PATH = "hibench.streambench.spark.checkpointPath"; 61 | // 62 | // public static String SPARK_ENABLE_WAL = "hibench.streambench.spark.enableWAL"; 63 | // 64 | // public static String SPARK_USE_DIRECT_MODE = "hibench.streambench.spark.useDirectMode"; 65 | // 66 | // public static String SPARK_STORAGE_LEVEL = "hibench.streambench.spark.storageLevel"; 67 | // 68 | // public static String SPARK_RECEIVER_NUMBER = "hibench.streambench.spark.receiverNumber"; 69 | 70 | // ====================================== 71 | // Flink Related Conf 72 | // ====================================== 73 | 74 | 75 | public static String FLINK_CHECKPOINTDURATION = "streambench.flink.checkpointDuration"; 76 | 77 | public static String FLINK_RESULT_DIR = "streambench.flink.result.dir"; 78 | 79 | public static String FLINK_TIMETYPE = "streambench.flink.timeType"; 80 | 81 | 82 | public static String SQL_LOCATION= "streambench.flink.sqlLocation"; 83 | 84 | 85 | 86 | 87 | } 88 | -------------------------------------------------------------------------------- /common/src/main/scala/com/intel/streaming_benchmark/common/QueryConfig.scala: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.common 2 | 3 | object QueryConfig { 4 | val queryScene: Map[String, String] = Map( 5 | "q1.sql" -> "Shopping_record", 6 | "q2.sql" -> "Real_time_Advertising", 7 | "q3.sql" -> "Real_time_Advertising", 8 | "q4.sql" -> "Real_time_Advertising", 9 | "q5.sql" -> "User_visit_session_record", 10 | "q6.sql" -> "User_visit_session_record", 11 | "q7.sql" -> "User_visit_session_record", 12 | "q8.sql" -> "User_visit_session_record", 13 | "q9.sql" -> "Real_time_Advertising", 14 | "q10.sql" -> "User_visit_session_record", 15 | "q11.sql" -> "User_visit_session_record", 16 | "q12.sql" -> "User_visit_session_record" 17 | ) 18 | 19 | val queryTables: Map[String, String] = Map( 20 | "q1.sql" -> "shopping", 21 | "q2.sql" -> "click", 22 | "q3.sql" -> "imp", 23 | "q4.sql" -> "dau,click", 24 | "q5.sql" -> "userVisit", 25 | "q6.sql" -> "userVisit", 26 | "q7.sql" -> "userVisit", 27 | "q8.sql" -> "userVisit", 28 | "q9.sql" -> "dau,click", 29 | "q10.sql" -> "userVisit", 30 | "q11.sql" -> "userVisit", 31 | "q12.sql" -> "userVisit" 32 | ) 33 | 34 | def getScene(query: String): String ={ 35 | if (queryScene.contains(query)) { 36 | queryScene(query) 37 | } else { 38 | throw new IllegalArgumentException(s"$query does not exist!") 39 | } 40 | } 41 | 42 | def getTables(query: String): String ={ 43 | if (queryTables.contains(query)) { 44 | queryTables(query) 45 | } else { 46 | throw new IllegalArgumentException(s"$query does not exist!") 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /common/src/main/scala/com/intel/streaming_benchmark/common/Schema.scala: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.common 2 | 3 | trait Schema { 4 | 5 | def getFieldNames: Array[String] 6 | 7 | def getFieldTypes: Array[String] 8 | 9 | 10 | } 11 | 12 | -------------------------------------------------------------------------------- /common/src/main/scala/com/intel/streaming_benchmark/common/TableSchemaProvider.scala: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.common 2 | 3 | case class Column( 4 | name: String, 5 | index: Int, 6 | types: String 7 | 8 | ) 9 | 10 | trait TableSchema extends Schema { 11 | 12 | val columns: Array[Column] 13 | 14 | def getFieldNames: Array[String] = columns.map(_.name) 15 | 16 | def getFieldTypes: Array[String] = 17 | columns.map(column => column.types) 18 | 19 | } 20 | 21 | object Shopping extends TableSchema { 22 | 23 | override val columns = Array[Column]( 24 | Column("userId", 0, "String"), 25 | Column("commodity", 1, "String"), 26 | Column("times", 2, "LONG") 27 | ) 28 | } 29 | 30 | object Click extends TableSchema { 31 | 32 | override val columns = Array[Column]( 33 | Column("click_time", 0, "Long"), 34 | Column("strategy", 1, "String"), 35 | Column("site", 2, "String"), 36 | Column("pos_id", 3, "String"), 37 | Column("poi_id", 4, "String"), 38 | Column("device_id", 5, "String") 39 | ) 40 | } 41 | 42 | object Imp extends TableSchema { 43 | 44 | override val columns = Array[Column]( 45 | Column("imp_time", 0, "Long"), 46 | Column("strategy", 1, "String"), 47 | Column("site", 2, "String"), 48 | Column("pos_id", 3, "String"), 49 | Column("poi_id", 4, "String"), 50 | Column("cost", 5, "Double"), 51 | Column("device_id", 6, "String") 52 | ) 53 | } 54 | 55 | object Dau extends TableSchema { 56 | 57 | override val columns = Array[Column]( 58 | Column("dau_time", 0, "Long"), 59 | Column("device_id", 1, "String") 60 | ) 61 | } 62 | 63 | object UserVisit extends TableSchema { 64 | 65 | override val columns = Array[Column]( 66 | Column("date", 0, "String"), 67 | Column("userId", 1, "Long"), 68 | Column("sessionId", 2, "String"), 69 | Column("pageId", 3, "Long"), 70 | Column("actionTime", 4, "String"), 71 | Column("searchKeyword", 5, "String"), 72 | Column("clickCategoryId", 6, "String"), 73 | Column("clickProductId", 7, "String"), 74 | Column("orderCategoryIds", 8, "String"), 75 | Column("orderProductIds", 9, "String"), 76 | Column("payCategoryIds", 10, "String"), 77 | Column("payProductIds", 11, "String"), 78 | Column("cityId", 12, "String") 79 | ) 80 | } 81 | 82 | object TableSchemaProvider { 83 | val schemaMap: Map[String, Schema] = Map( 84 | "shopping" -> Shopping, 85 | "click" -> Click, 86 | "imp" -> Imp, 87 | "dau" -> Dau, 88 | "userVisit" -> UserVisit 89 | ) 90 | 91 | def getSchema(tableName: String): Schema = { 92 | if (schemaMap.contains(tableName)) { 93 | schemaMap(tableName) 94 | } else { 95 | throw new IllegalArgumentException(s"$tableName does not exist!") 96 | } 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /conf/benchmarkConf.yaml: -------------------------------------------------------------------------------- 1 | streambench.zkHost 10.1.2.166:2181 2 | streambench.kafka.brokerList 10.1.2.143:9093,10.1.2.143:9094,10.1.2.143:9095,10.1.2.143:9096,10.1.2.143:9097,10.1.2.143:9098,10.1.2.143:9099,10.1.2.143:9100,10.1.2.143:9101,10.1.2.143:9102,10.1.2.159:9093,10.1.2.159:9094,10.1.2.159:9095,10.1.2.159:9096,10.1.2.159:9097,10.1.2.159:9098,10.1.2.159:9099,10.1.2.159:9100,10.1.2.159:9101,10.1.2.159:9102,10.1.2.166:9093,10.1.2.166:9094,10.1.2.166:9095,10.1.2.166:9096,10.1.2.166:9097,10.1.2.166:9098,10.1.2.166:9099,10.1.2.166:9100,10.1.2.166:9101,10.1.2.166:9102,10.1.2.164:9093,10.1.2.164:9094,10.1.2.164:9095,10.1.2.164:9096,10.1.2.164:9097,10.1.2.164:9098,10.1.2.164:9099,10.1.2.164:9100,10.1.2.164:9101,10.1.2.164:9102 3 | streambench.kafka.consumerGroup kafka_to_hdfs2 4 | -------------------------------------------------------------------------------- /conf/dataGenHosts: -------------------------------------------------------------------------------- 1 | 10.1.2.143 2 | 10.1.2.159 3 | 10.1.2.164 4 | 10.1.2.166 -------------------------------------------------------------------------------- /conf/env: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BASE_HOME=$(cd `dirname $0`;pwd) 4 | BENCH_HOME=$(dirname $BASE_HOME) 5 | 6 | export DATAGEN_TIME=200 7 | export THREAD_PER_NODE=10 8 | export CONF=$BENCH_HOME/conf/benchmarkConf.yaml 9 | export FLINK_HOME=/opt/Beaver/flink 10 | export SPARK_HOME=/opt/Beaver/spark 11 | -------------------------------------------------------------------------------- /conf/queriesToRun: -------------------------------------------------------------------------------- 1 | q9.sql 2 | -------------------------------------------------------------------------------- /dataGen/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | streaming_benchmark 7 | com.intel.streaming_benchmark 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | dataGen 13 | 14 | 15 | com.alibaba 16 | fastjson 17 | 1.2.58 18 | 19 | 20 | 21 | org.apache.kafka 22 | kafka_2.11 23 | 0.10.2.1 24 | 25 | 26 | 27 | 28 | com.intel.streaming_benchmark 29 | common 30 | 1.0-SNAPSHOT 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | org.apache.maven.plugins 39 | maven-shade-plugin 40 | 2.4.3 41 | 42 | 43 | 44 | 45 | *:* 46 | 47 | META-INF/*.SF 48 | META-INF/*.DSA 49 | META-INF/*.RSA 50 | 51 | 52 | 53 | 54 | 55 | junit:junit 56 | org.slf4j:slf4j-simple 57 | org.slf4j:slf4j-log4j12 58 | com.101tec:zkclient 59 | com.github.sgroschupf:zkclient 60 | org.apache.httpcomponents:httpclient 61 | 62 | 63 | 64 | 65 | 66 | package 67 | 68 | shade 69 | 70 | 71 | 72 | 73 | 74 | net.alchim31.maven 75 | scala-maven-plugin 76 | 3.2.0 77 | 78 | 79 | compile-scala 80 | compile 81 | 82 | add-source 83 | compile 84 | 85 | 86 | 87 | test-compile-scala 88 | test-compile 89 | 90 | add-source 91 | testCompile 92 | 93 | 94 | 95 | 96 | 2.11.8 97 | 98 | 99 | 100 | org.codehaus.mojo 101 | build-helper-maven-plugin 102 | 1.4 103 | 104 | 105 | add-source 106 | generate-sources 107 | 108 | add-source 109 | 110 | 111 | 112 | ../common/src/main/scala 113 | ../common/src/main/java 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-compiler-plugin 123 | 124 | 1.8 125 | 1.8 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | maven-clean-plugin 134 | 3.1.0 135 | 136 | 137 | 138 | maven-resources-plugin 139 | 3.0.2 140 | 141 | 142 | maven-compiler-plugin 143 | 3.8.0 144 | 145 | 146 | maven-surefire-plugin 147 | 2.22.1 148 | 149 | 150 | maven-jar-plugin 151 | 3.0.2 152 | 153 | 154 | maven-install-plugin 155 | 2.5.2 156 | 157 | 158 | maven-deploy-plugin 159 | 2.8.2 160 | 161 | 162 | 163 | maven-site-plugin 164 | 3.7.1 165 | 166 | 167 | net.alchim31.maven 168 | scala-maven-plugin 169 | 3.2.2 170 | 171 | 172 | scala-compile-first 173 | process-resources 174 | 175 | add-source 176 | compile 177 | 178 | 179 | 180 | scala-test-compile 181 | process-test-resources 182 | 183 | testCompile 184 | 185 | 186 | 187 | 188 | 189 | org.apache.maven.plugins 190 | maven-compiler-plugin 191 | 3.2 192 | 193 | 1.8 194 | 1.8 195 | UTF-8 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /dataGen/src/main/java/com/intel/streaming_benchmark/Datagen.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark; 2 | 3 | import com.intel.streaming_benchmark.common.ConfigLoader; 4 | import com.intel.streaming_benchmark.common.QueryConfig; 5 | import com.intel.streaming_benchmark.utils.GetProducer; 6 | 7 | import java.util.concurrent.ExecutorService; 8 | import java.util.concurrent.Executors; 9 | 10 | public class Datagen { 11 | public static void main(String[] args) { 12 | 13 | System.out.println("------------------Already input args[]------------------"); 14 | //the time to generate data 15 | Long time = Long.valueOf(args[0]); 16 | System.out.println("------------------time: " + time + "s-------------------"); 17 | //the topic of Kafka 18 | String sqlName = args[2]; 19 | System.out.println("------------------sql: " + sqlName + "------------------"); 20 | String scene = QueryConfig.getScene(sqlName); 21 | 22 | ConfigLoader configLoader = new ConfigLoader(args[3]); 23 | System.out.println("------------------config: " + args[3] + "---------------"); 24 | //the number of thread for datagen 25 | int producerNumber = Integer.valueOf(args[1]); 26 | System.out.println("----------Thread_per_node:" + producerNumber + "--------"); 27 | ExecutorService pool = Executors.newFixedThreadPool(producerNumber); 28 | for(int i = 0; i < producerNumber; i++){ 29 | pool.execute(new GetProducer(scene, time, configLoader)); 30 | } 31 | System.out.println("============ StreamBench Data Generator ============"); 32 | pool.shutdown(); 33 | System.out.println("======== StreamBench Data Generator Finished ========"); 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /dataGen/src/main/java/com/intel/streaming_benchmark/utils/ConfigLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.intel.streaming_benchmark.utils; 19 | 20 | import java.io.BufferedReader; 21 | import java.io.FileNotFoundException; 22 | import java.io.FileReader; 23 | import java.io.IOException; 24 | import java.util.HashMap; 25 | import java.util.Map; 26 | 27 | public class ConfigLoader { 28 | private String ConfigFileName = null; 29 | private Map store; 30 | 31 | public ConfigLoader(String filename){ 32 | ConfigFileName = filename; 33 | store = new HashMap(); 34 | // Load and parse config 35 | try { 36 | BufferedReader br = new BufferedReader(new FileReader(filename)); 37 | String line = br.readLine(); 38 | while(line != null){ 39 | if ((line.length()>0) && (line.charAt(0)!='#')) { 40 | String[] words = line.split("\\s+"); 41 | if (words.length == 2) { 42 | String key = words[0]; 43 | String value = words[1]; 44 | store.put(key, value); 45 | } else if (words.length == 1) { 46 | String key = words[0]; 47 | store.put(key, ""); 48 | } else { 49 | if (!line.startsWith("hibench")) 50 | System.out.println("Warning: unknown config parsed, skip:" + line); 51 | } 52 | } 53 | line = br.readLine(); 54 | } 55 | } catch (FileNotFoundException e) { 56 | System.out.println("ERROR: Config file not found! Should not happen. Caused by:"); 57 | } catch (IOException e) { 58 | System.out.println("ERROR: IO exception during read file. Should not happen. Caused by:"); 59 | e.printStackTrace(); 60 | } 61 | } 62 | 63 | public String getProperty(String key){ 64 | if (store.containsKey(key)) 65 | return (String) store.get(key); 66 | else { 67 | System.out.println("ERROR: Unknown config key:" + key); 68 | return null; 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /dataGen/src/main/java/com/intel/streaming_benchmark/utils/Constants.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.utils; 2 | 3 | /** 4 | * Project Basic dataUtil.Constants 5 | */ 6 | public interface Constants { 7 | 8 | String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR = "|"; 9 | String SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR_ESCAOE = "\\|"; 10 | 11 | /** 12 | * Project Configuration dataUtil.Constants 13 | */ 14 | String JDBC_DRIVER = "jdbc.driver"; 15 | String JDBC_DATASOURCE_SIZE = "jdbc.datasource.size"; 16 | String JDBC_URL = "jdbc.url"; 17 | String JDBC_USER = "jdbc.user"; 18 | String JDBC_PASSWORD = "jdbc.password"; 19 | 20 | String SPARK_SQL_JDBC_URL = "spark.sql.jdbc.url"; 21 | String SPARK_SQL_JDBC_URL_PROD = "spark.sql.jdbc.url.prod"; 22 | 23 | String SPARK_LOCAL = "spark.local"; 24 | 25 | String KAFKA_METADATA_BROKER_LIST = "metadata.broker.list"; 26 | String KAFKA_TOPICS = "kafka.topics"; 27 | String KAFKA_ZOOKEEPER_URL = "zookeeper.connect.url"; 28 | 29 | 30 | /** 31 | * Spark Application dataUtil.Constants 32 | */ 33 | String SPARK_APP_NAME_SESSION = "UserVisitSessionAnalyzeSpark_"; 34 | String SPARK_APP_NAME_PRODUCT = "AreaTop3ProductSpark_"; 35 | String SPARK_APP_NAME_AD = "AdClickRealTimeStateSpark"; 36 | 37 | String FIELD_ACTION_TIME = "action_time"; 38 | String FIELD_SESSION_ID = "session_id"; 39 | String FIELD_SEARCH_KEYWORDS = "search_keyword"; 40 | String FIELD_CLICK_CATEGORY_ID = "click_category_id"; 41 | String FIELD_AGE = "age"; 42 | String FIELD_PROFESSIONAL = "professional"; 43 | String FIELD_CITY = "city"; 44 | String FIELD_SEX = "sex"; 45 | 46 | 47 | String FIELD_CATEGORY_ID = "categoryId"; 48 | String FIELD_CLICK_COUNT = "clickCount"; 49 | String FIELD_ORDER_COUNT = "orderCount"; 50 | String FIELD_PAY_COUNT = "payCount"; 51 | 52 | String SESSION_COUNT = "session_count"; 53 | 54 | String TIME_PERIOD_1s_4s = "1s_4s"; 55 | String TIME_PERIOD_4s_7s = "4s_7s"; 56 | String TIME_PERIOD_7s_10s = "7s_10s"; 57 | String TIME_PERIOD_10s_30s = "10s_30s"; 58 | String TIME_PERIOD_30s_60s = "30s_60s"; 59 | String TIME_PERIOD_1m_3m = "1m_3m"; 60 | String TIME_PERIOD_3m_10m = "3m_10m"; 61 | String TIME_PERIOD_10m_30m = "10m_30m"; 62 | String TIME_PERIOD_30m = "30m"; 63 | 64 | String STEP_PERIOD_1_3 = "1_3"; 65 | String STEP_PERIOD_4_6 = "4_6"; 66 | String STEP_PERIOD_7_9 = "7_9"; 67 | String STEP_PERIOD_10_29 = "10_29"; 68 | String STEP_PERIOD_30_59 = "30_59"; 69 | String STEP_PERIOD_60 = "60"; 70 | 71 | /** 72 | * Source Table Column Names 73 | */ 74 | String UVA_FIELD_USER_ID = "user_id"; 75 | String UVA_FIELD_DATE = "date"; 76 | String UVA_FIELD_SESSION_ID = "session_id"; 77 | String UVA_FIELD_ACTION_TIME = "action_time"; 78 | 79 | /** 80 | * Task dataUtil.Constants 81 | */ 82 | String PARAM_SAMPLE_TYPE = "sampleType"; 83 | String PARAM_SESSION_RATIO = "sessionRatio"; 84 | String PARAM_START_DATE = "startDate"; 85 | String PARAM_END_DATE = "endDate"; 86 | String PARAM_START_AGE = "startAge"; 87 | String PARAM_END_AGE = "endAge"; 88 | String PARAM_PROFESSIONALS = "professionals"; 89 | String PARAM_CITIES = "cities"; 90 | String PARAM_SEX = "sex"; 91 | String PARAM_KEYWORDS = "keywords"; 92 | String PARAM_CATEGORY_IDS = "categoryIds"; 93 | String FIELD_VISIT_LENGTH = "visitLength"; 94 | String FIELD_STEP_LENGTH = "stepLength"; 95 | String FIELD_START_TIME = "startTime"; 96 | } 97 | -------------------------------------------------------------------------------- /dataGen/src/main/java/com/intel/streaming_benchmark/utils/GetProducer.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.utils; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.intel.streaming_benchmark.ClickProducer; 5 | import com.intel.streaming_benchmark.common.ConfigLoader; 6 | import com.intel.streaming_benchmark.common.StreamBenchConfig; 7 | import org.apache.kafka.clients.producer.KafkaProducer; 8 | import org.apache.kafka.clients.producer.ProducerConfig; 9 | import org.apache.kafka.clients.producer.ProducerRecord; 10 | import java.net.InetAddress; 11 | import java.text.SimpleDateFormat; 12 | import java.util.Properties; 13 | import java.util.Random; 14 | 15 | public class GetProducer extends Thread{ 16 | 17 | private String topic; 18 | private Long time; 19 | private ConfigLoader cl; 20 | public GetProducer(String topic, Long time , ConfigLoader cl){ 21 | 22 | super(); 23 | this.topic = topic; 24 | this.time = time; 25 | this.cl = cl; 26 | } 27 | 28 | @Override 29 | public void run() { 30 | 31 | System.out.println(Thread.currentThread().getName() + "======="); 32 | 33 | if (topic.equals("Shopping_record")){ 34 | datagenTopic1(cl); 35 | } 36 | else if(topic.equals("Real_time_Advertising")){ 37 | datagenTopic2(cl); 38 | } 39 | else if(topic.equals("User_visit_session_record")){ 40 | new ClickProducer(time, cl).run(); 41 | }else{ 42 | System.out.println("No such scene!"); 43 | } 44 | 45 | } 46 | 47 | private KafkaProducer createProducer(ConfigLoader cl) { 48 | 49 | Properties properties = new Properties(); 50 | properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer"); 51 | properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer"); 52 | properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST)); 53 | return new KafkaProducer<>(properties); 54 | } 55 | 56 | private void datagenTopic1(ConfigLoader cl) { 57 | 58 | String[] commodities = {"milk", "bag", "book","desk","sweet", "food", "disk","pen", "shoe", "animal","phone", "paper", "cup", "light", "glass", "power", "GameBoy", "chopsticks"}; 59 | Random random = new Random(); 60 | KafkaProducer producer = createProducer(cl); 61 | SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 62 | long start = System.currentTimeMillis(); 63 | Boolean flag = true; 64 | Long count = 0L; 65 | Long totalLength = 0L; 66 | String threadName = Thread.currentThread().getName(); 67 | 68 | try { 69 | 70 | InetAddress address = InetAddress.getLocalHost(); 71 | String hostName = address.getHostName().toString(); 72 | while(flag){ 73 | byte[] message = (hostName + "_" + count + "_" + threadName + "," + commodities[random.nextInt(commodities.length)] +"," + System.currentTimeMillis()).getBytes(); 74 | producer.send(new ProducerRecord("shopping", message)); 75 | count = count + 1; 76 | totalLength = totalLength + message.length; 77 | if((System.currentTimeMillis() - start) > time*1000){ 78 | flag = false; 79 | } 80 | } 81 | }catch (Exception e){ 82 | e.printStackTrace(); 83 | } 84 | 85 | producer.close(); 86 | } 87 | 88 | private void datagenTopic2(ConfigLoader cl){ 89 | Long count = 0L; 90 | Long totalLength = 0L; 91 | 92 | KafkaProducer producer = createProducer(cl); 93 | long start = System.currentTimeMillis(); 94 | Boolean flag = true; 95 | 96 | Random random = new Random(); 97 | String strategy_all[] ={"t1","t2","t3","t4","t5","t6"};//t1:strategy1, t2:strategy2,,, t6:strategy6 98 | String site_all[] ={"1","2","3"};//1:baidu media,2:toutiao media,3: weibo media 99 | String pos_id_all[] ={"a","b","c"};//a:ad space,b:ad space,c:ad space 100 | String poi_id_all[] ={"1001","1002","1003"};//1001:ad material,1002:ad material,1003:ad material 101 | String cost_all[] ={"0.01","0.02","0.03"};//cost 102 | String device_id_all[] ={"aaaaa","bbbbb","ccccc","ddddd","eeeee","fffff","ggggg"};//device 103 | while(flag){ 104 | 105 | try{ 106 | JSONObject imp = new JSONObject(); 107 | imp.put("imp_time",Long.valueOf(System.currentTimeMillis())); 108 | imp.put("strategy",strategy_all[random.nextInt(strategy_all.length-1)]); 109 | imp.put("site",pos_id_all[random.nextInt(site_all.length-1)]); 110 | imp.put("pos_id",strategy_all[random.nextInt(pos_id_all.length-1)]); 111 | imp.put("poi_id",poi_id_all[random.nextInt(poi_id_all.length-1)]); 112 | imp.put("cost",cost_all[random.nextInt(cost_all.length-1)]); 113 | imp.put("device_id",device_id_all[random.nextInt(device_id_all.length-1)]); 114 | //send exposure log 115 | byte[] imp_message = imp.toJSONString().getBytes(); 116 | producer.send(new ProducerRecord("imp",imp_message)); 117 | count++; 118 | totalLength = totalLength + imp_message.length; 119 | 120 | if (random.nextInt(4) ==1){//the probablity of triggerring Click 121 | JSONObject click =imp; 122 | click.remove("imp_time"); 123 | click.remove("cost"); 124 | click.put("click_time",Long.valueOf(System.currentTimeMillis())); 125 | byte[] click_message = click.toJSONString().getBytes(); 126 | producer.send(new ProducerRecord("click",click_message)); 127 | count++; 128 | totalLength = totalLength + click_message.length; 129 | 130 | if (random.nextInt(2) ==1){//dau time,?50 131 | JSONObject dau = new JSONObject(); 132 | dau.put("dau_time",Long.valueOf(System.currentTimeMillis())); 133 | dau.put("device_id",click.get("device_id").toString()); 134 | byte[] dau_message = dau.toJSONString().getBytes(); 135 | producer.send(new ProducerRecord("dau",dau_message)); 136 | count++; 137 | totalLength = totalLength + dau_message.length; 138 | } 139 | } 140 | if((System.currentTimeMillis() - start) > time*1000){ 141 | flag = false; 142 | } 143 | 144 | }catch (Exception e){ 145 | e.printStackTrace(); 146 | } 147 | } 148 | } 149 | 150 | 151 | } 152 | -------------------------------------------------------------------------------- /dataGen/src/main/scala/com/intel/streaming_benchmark/ClickProducer.scala: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark 2 | 3 | import java.net.InetAddress 4 | import java.util.Properties 5 | import com.alibaba.fastjson.JSONObject 6 | import com.intel.streaming_benchmark.click.{cityTypeSize, citys, keywordSize, keywords, productNumbers, professionalTypeSize, professionals, random, sexTypeSize, sexs, userNumbers} 7 | import com.intel.streaming_benchmark.common.{ConfigLoader, DateUtils, StreamBenchConfig} 8 | import com.intel.streaming_benchmark.utils.Constants 9 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} 10 | 11 | import scala.collection.mutable.ArrayBuffer 12 | 13 | class ClickProducer(val time:Long, val cl: ConfigLoader){ 14 | var total = 0L 15 | var length = 0L 16 | var threadName = Thread.currentThread().getName 17 | var hostName = InetAddress.getLocalHost.getHostName 18 | var seed = 0 19 | def run(): Unit = { 20 | // mockUserInfo() 21 | // mockProductInfo 22 | mockUserVisitAction(time) 23 | 24 | } 25 | 26 | private def createProducer = { 27 | val properties = new Properties 28 | properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer") 29 | properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer") 30 | properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST)) 31 | new KafkaProducer[Array[Byte], Array[Byte]](properties) 32 | } 33 | 34 | 35 | /** 36 | * Simulation code for generating user information 37 | * 38 | * @param 39 | * @return 40 | */ 41 | def mockUserInfo() = { 42 | val producer = createProducer 43 | for (i <- 0 until userNumbers) { 44 | val userId = i.toLong 45 | val age = (total % 60).toInt 46 | val userName = s"user_${i}" 47 | val name = s"name_${i}" 48 | val professional = professionals((total % professionalTypeSize).toInt) 49 | val city = citys((total%cityTypeSize).toInt)._2 50 | val sex = sexs((total % sexTypeSize).toInt) 51 | producer.send(new ProducerRecord("userInfo", UserInfo( 52 | userId, userName, name, 53 | age, professional, city, sex).formatted(",").getBytes())) 54 | } 55 | } 56 | 57 | /** 58 | * Simulation code for generating data of userVisitAction 59 | * 60 | * @param 61 | * @return 62 | */ 63 | 64 | def mockUserVisitAction(time: Long) = { 65 | val date: String = DateUtils.getTodayDate() 66 | val producer = createProducer 67 | val start: Long = System.currentTimeMillis() 68 | 69 | // get action time according the time of last action 70 | def getCurrentActionTime(preActionTime: Long): Long = { 71 | preActionTime + total % 60 72 | } 73 | 74 | // generate a produceID and productCategoryNumber 75 | def generateProduceAndCategoryId(): (Long, Long) = { 76 | val produceID = total % productNumbers 77 | (produceID, produceID % click.productCategoryNumbers) 78 | } 79 | 80 | // generate date for pageView 81 | def generatePageView(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = { 82 | if (times < 20) { 83 | // pageView ID:[0,100) 84 | val pageId: Long = total % 100 85 | val actionTime: Long = getCurrentActionTime(preActionTime) 86 | val searchKeyword: String = "" 87 | val clickCategoryId: String = "" 88 | val clickProductId: String = "" 89 | val orderCategoryIds: String = "" 90 | val orderProductIds: String = "" 91 | val payCategoryIds: String = "" 92 | val payProductIds: String = "" 93 | 94 | // Add data 95 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes() 96 | producer.send(new ProducerRecord("userVisit", message)) 97 | length = length + message.length 98 | total = total + 1 99 | // Go to next action 100 | val (t1, t2, t3) = 101 | if (times < 3) { 102 | (4, 7, 9) 103 | } else if (times < 10) { 104 | (2, 4, 7) 105 | } else { 106 | (1, 2, 3) 107 | } 108 | val tmp = seed % 10 109 | seed = seed + 1 110 | if (tmp <= t1) { 111 | // Visit 112 | generatePageView(times + 1, userId, sessionId, cityId, actionTime) 113 | } else if (tmp <= t2) { 114 | // Search 115 | generateSearch(times + 1, userId, sessionId, cityId, actionTime) 116 | } else if (tmp <= t3) { 117 | // Click 118 | generateClick(times + 1, userId, sessionId, cityId, actionTime) 119 | } else { 120 | // nothings, finish 121 | } 122 | 123 | } 124 | } 125 | 126 | // Generate data for searching 127 | def generateSearch(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = { 128 | if (times < 20) { 129 | // search ID:[100,150) 130 | val pageId: Long = total % 50 + 100 131 | val actionTime = getCurrentActionTime(preActionTime) 132 | val searchKeyword: String = keywords((total % keywordSize).toInt) 133 | val clickCategoryId: String = "" 134 | val clickProductId: String = "" 135 | val orderCategoryIds: String = "" 136 | val orderProductIds: String = "" 137 | val payCategoryIds: String = "" 138 | val payProductIds: String = "" 139 | 140 | // Add data 141 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes() 142 | producer.send(new ProducerRecord("userVisit",message)) 143 | length = length + message.length 144 | total = total + 1 145 | // Go to next action 146 | val (t1, t2, t3) = 147 | if (times < 3) { 148 | (2, 5, 8) 149 | } else if (times < 10) { 150 | (1, 2, 5) 151 | } else { 152 | (1, 2, 3) 153 | } 154 | val tmp = seed % 10 155 | seed = seed + 1 156 | if (tmp <= t1) { 157 | // Visit 158 | generatePageView(times + 1, userId, sessionId, cityId, actionTime) 159 | } else if (tmp <= t2) { 160 | // Search 161 | generateSearch(times + 1, userId, sessionId, cityId, actionTime) 162 | } else if (tmp <= t3) { 163 | // Click 164 | generateClick(times + 1, userId, sessionId, cityId, actionTime) 165 | } else { 166 | // nothings, finish 167 | } 168 | } 169 | } 170 | 171 | // Generate data for clicking 172 | def generateClick(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = { 173 | if (times < 20) { 174 | // click ID:[150,300) 175 | val pageId: Long = total % 150 + 150 176 | val actionTime = getCurrentActionTime(preActionTime) 177 | val searchKeyword: String = "" 178 | val (productID, categoryID) = generateProduceAndCategoryId() 179 | val clickProductId: String = productID.toString 180 | val clickCategoryId: String = categoryID.toString 181 | val orderCategoryIds: String = "" 182 | val orderProductIds: String = "" 183 | val payCategoryIds: String = "" 184 | val payProductIds: String = "" 185 | 186 | // Add data 187 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes() 188 | producer.send(new ProducerRecord("userVisit", message)) 189 | // Go to next action 190 | total = total + 1 191 | length = length + message.length 192 | 193 | val (t1, t2, t3, t4) = 194 | if (times < 3) { 195 | (3, 6, 15, 18) 196 | } else if (times < 10) { 197 | (2, 4, 11, 15) 198 | } else { 199 | (1, 2, 6, 8) 200 | } 201 | 202 | val tmp = seed % 20 203 | seed = seed + 1 204 | if (tmp <= t1) { 205 | // Visit 206 | generatePageView(times + 1, userId, sessionId, cityId, actionTime) 207 | } else if (tmp <= t2) { 208 | // Search 209 | generateSearch(times + 1, userId, sessionId, cityId, actionTime) 210 | } else if (tmp <= t3) { 211 | // Order 212 | generateOrder(times + 1, userId, sessionId, cityId, actionTime) 213 | } else if (tmp <= t4) { 214 | // Click 215 | generateClick(times + 1, userId, sessionId, cityId, actionTime) 216 | } else { 217 | // nothings, finish 218 | } 219 | 220 | } 221 | } 222 | 223 | // Generate date for order 224 | def generateOrder(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long): Unit = { 225 | if (times < 20) { 226 | // order ID:[300,301) 227 | val pageId: Long = 300 228 | val actionTime = getCurrentActionTime(preActionTime) 229 | val searchKeyword: String = "" 230 | val clickProductId: String = "" 231 | val clickCategoryId: String = "" 232 | // There may be some product ordered together, range:[1,6) 233 | val randomProductNumbers = total % 5 + 1 234 | val bf = ArrayBuffer[(Long, Long)]() 235 | for (j <- 0 until randomProductNumbers.toInt) { 236 | bf += generateProduceAndCategoryId() 237 | } 238 | val nbf = bf.distinct 239 | 240 | val orderCategoryIds: String = nbf.map(_._2).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR) 241 | val orderProductIds: String = nbf.map(_._1).mkString(Constants.SPLIT_CATEGORY_OR_PRODUCT_ID_SEPARATOR) 242 | val payCategoryIds: String = "" 243 | val payProductIds: String = "" 244 | 245 | // Add data 246 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes() 247 | producer.send(new ProducerRecord("userVisit", message)) 248 | total = total + 1 249 | length = length + message.length 250 | // Go to next action 251 | val (t1, t2, t3) = 252 | if (times <= 3) { 253 | (1, 2, 9) 254 | } else if (times < 10) { 255 | (1, 2, 8) 256 | } else { 257 | (1, 2, 7) 258 | } 259 | 260 | val tmp = seed % 10 261 | seed = seed + 1 262 | 263 | if (tmp <= t1) { 264 | // Visit 265 | generatePageView(times + 1, userId, sessionId, cityId, actionTime) 266 | } else if (tmp <= t2) { 267 | // Search 268 | generateSearch(times + 1, userId, sessionId, cityId, actionTime) 269 | } else if (tmp <= t3) { 270 | // Pay 271 | generatePay(times + 1, userId, sessionId, cityId, actionTime, productIds = orderProductIds, categoryIds = orderCategoryIds) 272 | } else { 273 | // nothings, finish 274 | } 275 | 276 | } 277 | } 278 | 279 | // Generate data for pay 280 | def generatePay(times: Int, userId: Long, sessionId: String, cityId: Int, preActionTime: Long, productIds: String, categoryIds: String): Unit = { 281 | if (times <= 20) { 282 | // pay ID:301 283 | val pageId: Long = 301 284 | val actionTime = getCurrentActionTime(preActionTime) 285 | val searchKeyword: String = "" 286 | val clickProductId: String = "" 287 | val clickCategoryId: String = "" 288 | val orderCategoryIds: String = "" 289 | val orderProductIds: String = "" 290 | val payCategoryIds: String = categoryIds 291 | val payProductIds: String = productIds 292 | 293 | // Add data 294 | val message = UserVisitAction(date, userId, sessionId, pageId, actionTime, searchKeyword, clickCategoryId, clickProductId, orderCategoryIds, orderProductIds, payCategoryIds, payProductIds, cityId).formatted(",").getBytes() 295 | producer.send(new ProducerRecord("userVisit", message)) 296 | 297 | total = total + 1 298 | length = length + message.length 299 | // Go to next action 300 | val (t1, t2) = 301 | if (times < 10) { 302 | (4, 8) 303 | } else { 304 | (1, 3) 305 | } 306 | 307 | val tmp = seed % 10 308 | seed = seed + 1 309 | 310 | if (tmp <= t1) { 311 | // Visit 312 | generatePageView(times + 1, userId, sessionId, cityId, actionTime) 313 | } else if (tmp <= t2) { 314 | // Search 315 | generateSearch(times + 1, userId, sessionId, cityId, actionTime) 316 | } else { 317 | // nothings, finish 318 | } 319 | 320 | } 321 | } 322 | 323 | var flag: Boolean = true 324 | while (flag) { 325 | val startTime = System.currentTimeMillis() 326 | val userId: Long = random.nextInt(userNumbers) 327 | val sessionId = hostName + "_" + threadName + "_"+ total 328 | val cityId = citys((total % cityTypeSize).toInt)._1 329 | seed = random.nextInt(100) 330 | // action主要分为:浏览、搜索、点击、下单及支付 331 | /** 332 | * Suppose the access chain has several situations: 333 | * 1. Visit -> Search-> Click -> Order -> Pay 334 | * 2. Search -> Click -> Order -> Pay 335 | * 3. Visit -> Click -> Order -> Pay 336 | * Note:Visit, Search, Click can be generated continuously while Pay and Order can not appear successfully 337 | * ======> 338 | * After visiting, there may be search, click and visit action. 339 | * After searching, there may be click, search and search action. 340 | * After clicking, there may be visit, search, order and click action. 341 | * After ordering, there may be search, visit and pay action. 342 | * After paying, there may be search and visit action支付之后可能存在搜索和浏览两种情况 343 | * Note:After all action, there may be finish action. 344 | **/ 345 | 346 | // 80% visit, 20% click 347 | if (total % 5 < 4) { 348 | // generate data for visit 349 | generatePageView(0, userId, sessionId, cityId, startTime) 350 | } else { 351 | // generate data for search 352 | generateSearch(0, userId, sessionId, cityId, startTime) 353 | } 354 | 355 | if ( (System.currentTimeMillis() - start) > time*1000) { 356 | flag = false 357 | } 358 | 359 | } 360 | } 361 | 362 | /** 363 | * Simulation code for generating product 364 | * 365 | * @param 366 | * @return 367 | */ 368 | def mockProductInfo() = { 369 | val producer = createProducer 370 | val buffer = ArrayBuffer[ProductInfo]() 371 | for (i <- 0 until productNumbers) { 372 | val productID: Long = i.toLong 373 | val productName: String = s"product_${productID}" 374 | // 60% third party products; 40% proprietary products 375 | val extendInfo: String = { 376 | val obj = new JSONObject() 377 | if (random.nextDouble() <= 0.4) { 378 | // proprietary product 379 | obj.put("product_type", "0") 380 | } else { 381 | // third party products 382 | obj.put("product_type", "1") 383 | } 384 | obj.toJSONString 385 | } 386 | producer.send(new ProducerRecord("productInfo", ProductInfo(productID, productName, extendInfo).formatted(",").getBytes())) 387 | 388 | } 389 | } 390 | 391 | } 392 | -------------------------------------------------------------------------------- /dataGen/src/main/scala/com/intel/streaming_benchmark/click.scala: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark 2 | 3 | import java.util.Random 4 | 5 | 6 | object click { 7 | 8 | val random = new Random 9 | val splitSymbol = "," 10 | 11 | val userNumbers = 1000 12 | 13 | val userVisitSessionNumbers = 10000 14 | 15 | val productNumbers = 10000 16 | 17 | val productCategoryNumbers = 50 18 | 19 | val professionals = Array("Programmer", "Teacher", "Cook", "Driver", "Doctor", "Nurse", "Designer", "Farmer", "Worker", "Assistant") 20 | val professionalTypeSize = professionals.length 21 | 22 | val citys: Array[(Int, String)] = Array("Shanghai", "Beijing", "Shenzhen", "Guangzhou", "Nanjing", "Hangzhou", "Changsha", "Nanchang", "Zhangjiajie", "Hong Kong", "Macao").zipWithIndex.map(_.swap) 23 | val cityTypeSize = citys.length 24 | 25 | val sexs = Array("male", "female", "unknown") 26 | val sexTypeSize = sexs.length 27 | // search key word 28 | val keywords = Array("Hot Pot", "Cake", "Chongqing spicy chicken", "Chongqing facet", 29 | "Biscuits", "Fish", "International Trade Building or Cetra Building", "Pacific Mall", "Japanese cuisine", "Hot Spring") 30 | val keywordSize = keywords.length 31 | 32 | var count = 0 33 | } 34 | 35 | 36 | case class ProductInfo( 37 | productID: Long, 38 | productName: String, 39 | extendInfo: String 40 | ) { 41 | /** 42 | * Format 43 | * 44 | * @param splitSymbol 45 | * @return 46 | */ 47 | def formatted(splitSymbol: String = "^"): String = { 48 | s"${productID}${splitSymbol}${productName}${splitSymbol}${extendInfo}" 49 | } 50 | } 51 | 52 | object ProductInfo { 53 | /** 54 | * column name of the table 55 | */ 56 | val columnNames = Array("product_id", "product_name", "extend_info") 57 | 58 | /** 59 | * Parse row data and return the object; if parsing fails return None 60 | * 61 | * @param line 62 | * @param splitSymbol 63 | * @return 64 | */ 65 | def parseProductInfo(line: String, splitSymbol: String = "\\^"): Option[ProductInfo] = { 66 | val arr = line.split(splitSymbol) 67 | if (arr.length == 3) { 68 | Some( 69 | new ProductInfo( 70 | arr(0).toLong, 71 | arr(1), 72 | arr(2) 73 | ) 74 | ) 75 | } else None 76 | } 77 | } 78 | 79 | 80 | 81 | case class UserInfo( 82 | userId: Long, 83 | userName: String, 84 | name: String, 85 | age: Int, 86 | professional: String, 87 | city: String, 88 | sex: String 89 | ) { 90 | /** 91 | * Format time 92 | * 93 | * @param splitSymbol 94 | * @return 95 | */ 96 | def formatted(splitSymbol: String = ","): String = { 97 | s"${userId}${splitSymbol}${userName}${splitSymbol}${name}${splitSymbol}${age}${splitSymbol}${professional}${splitSymbol}${city}${splitSymbol}${sex}" 98 | } 99 | } 100 | 101 | object UserInfo { 102 | /** 103 | * column name of the table 104 | */ 105 | val columnNames = Array("user_id", "user_name", "name", "age", "professional", "city", "sex") 106 | 107 | /** 108 | * Parse row data and return the object; if parsing fails return None 109 | * 110 | * @param line 111 | * @param splitSymbol 112 | * @return 113 | */ 114 | def parseUserInfo(line: String, splitSymbol: String = ","): Option[UserInfo] = { 115 | val arr = line.split(splitSymbol) 116 | if (arr.length == 7) { 117 | Some(new UserInfo( 118 | arr(0).toLong, 119 | arr(1), 120 | arr(2), 121 | arr(3).toInt, 122 | arr(4), 123 | arr(5), 124 | arr(6) 125 | )) 126 | } else None 127 | } 128 | } 129 | 130 | 131 | case class UserVisitAction( 132 | date: String, 133 | userId: Long, 134 | sessionId: String, 135 | pageId: Long, 136 | actionTime: Long, 137 | searchKeyword: String, 138 | clickCategoryId: String, 139 | clickProductId: String, 140 | orderCategoryIds: String, 141 | orderProductIds: String, 142 | payCategoryIds: String, 143 | payProductIds: String, 144 | cityId: Int 145 | ) { 146 | /** 147 | * Format time 148 | * 149 | * @param splitSymbol 150 | * @return 151 | */ 152 | def formatted(splitSymbol: String = ","): String = { 153 | s"${date}${splitSymbol}${userId}${splitSymbol}${sessionId}${splitSymbol}${pageId}${splitSymbol}${actionTime}${splitSymbol}${searchKeyword}${splitSymbol}${clickCategoryId}${splitSymbol}${clickProductId}${splitSymbol}${orderCategoryIds}${splitSymbol}${orderProductIds}${splitSymbol}${payCategoryIds}${splitSymbol}${payProductIds}${splitSymbol}${cityId}" 154 | } 155 | } 156 | 157 | object UserVisitAction { 158 | /** 159 | * column name of the table 160 | */ 161 | val columnNames = Array("date", "user_id", "session_id", "page_id", "action_time", "search_keyword", "click_category_id", "click_product_id", "order_category_ids", "order_product_ids", "pay_category_ids", "pay_product_ids", "city_id") 162 | 163 | /** 164 | * Parse row data and return the object; if parsing fails return None 165 | * 166 | * @param line 167 | * @param splitSymbol 168 | * @return 169 | */ 170 | def parseUserVisitAction(line: String, splitSymbol: String = ","): Option[UserVisitAction] = { 171 | val arr = line.split(splitSymbol) 172 | if (arr.length == 13) { 173 | Some( 174 | new UserVisitAction( 175 | arr(0), 176 | arr(1).toLong, 177 | arr(2), 178 | arr(3).toLong, 179 | arr(4).toLong, 180 | arr(5), 181 | arr(6), 182 | arr(7), 183 | arr(8), 184 | arr(9), 185 | arr(10), 186 | arr(11), 187 | arr(12).toInt 188 | ) 189 | ) 190 | } else None 191 | } 192 | } 193 | 194 | 195 | -------------------------------------------------------------------------------- /flink/conf/benchmarkConf.yaml: -------------------------------------------------------------------------------- 1 | streambench.flink.checkpointDuration 5000 2 | streambench.flink.timeType EventTime -------------------------------------------------------------------------------- /flink/log/q1.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q1.sql.log -------------------------------------------------------------------------------- /flink/log/q10.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q10.sql.log -------------------------------------------------------------------------------- /flink/log/q11.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q11.sql.log -------------------------------------------------------------------------------- /flink/log/q12.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q12.sql.log -------------------------------------------------------------------------------- /flink/log/q2.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q2.sql.log -------------------------------------------------------------------------------- /flink/log/q3.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q3.sql.log -------------------------------------------------------------------------------- /flink/log/q4.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q4.sql.log -------------------------------------------------------------------------------- /flink/log/q5.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q5.sql.log -------------------------------------------------------------------------------- /flink/log/q6.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q6.sql.log -------------------------------------------------------------------------------- /flink/log/q7.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q7.sql.log -------------------------------------------------------------------------------- /flink/log/q8.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q8.sql.log -------------------------------------------------------------------------------- /flink/log/q9.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/log/q9.sql.log -------------------------------------------------------------------------------- /flink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | streaming_benchmark 7 | com.intel.streaming_benchmark 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | flink 13 | 14 | 15 | com.intel.streaming_benchmark 16 | common 17 | 1.0-SNAPSHOT 18 | 19 | 20 | com.alibaba 21 | fastjson 22 | 1.2.58 23 | 24 | 25 | 26 | src/main/java 27 | 28 | 29 | src/main/resources 30 | true 31 | 32 | 33 | src/main/java 34 | 35 | ../*.java 36 | 37 | 38 | 39 | 40 | 41 | 42 | org.codehaus.mojo 43 | build-helper-maven-plugin 44 | 1.4 45 | 46 | 47 | add-source 48 | generate-sources 49 | 50 | add-source 51 | 52 | 53 | 54 | ../common/src/main/scala 55 | ../common/src/main/java 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | net.alchim31.maven 66 | scala-maven-plugin 67 | 3.2.2 68 | 69 | 70 | scala-compile-first 71 | process-resources 72 | 73 | add-source 74 | compile 75 | 76 | 77 | 78 | scala-test-compile 79 | process-test-resources 80 | 81 | testCompile 82 | 83 | 84 | 85 | 86 | 87 | org.apache.maven.plugins 88 | maven-compiler-plugin 89 | 3.2 90 | 91 | 1.8 92 | 1.8 93 | UTF-8 94 | 95 | 96 | 97 | 98 | 99 | org.apache.maven.plugins 100 | maven-compiler-plugin 101 | 3.8.0 102 | 103 | 1.8 104 | 1.8 105 | 106 | 107 | 108 | org.apache.maven.plugins 109 | maven-resources-plugin 110 | 3.0.2 111 | 112 | UTF-8 113 | 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-shade-plugin 119 | 2.4.3 120 | 121 | 122 | 123 | 124 | *:* 125 | 126 | META-INF/*.SF 127 | META-INF/*.DSA 128 | META-INF/*.RSA 129 | 130 | 131 | 132 | 133 | 134 | junit:junit 135 | org.slf4j:slf4j-simple 136 | org.slf4j:slf4j-log4j12 137 | com.101tec:zkclient 138 | com.github.sgroschupf:zkclient 139 | org.apache.httpcomponents:httpclient 140 | 141 | 142 | 143 | 144 | 145 | package 146 | 147 | shade 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /flink/query/q1.sql: -------------------------------------------------------------------------------- 1 | select 2 | commodity, count(userId) num, TUMBLE_START(rowtime, INTERVAL '10' SECOND),TUMBLE_END(rowtime, INTERVAL '10' SECOND), UNIX_TIMESTAMP(TUMBLE_START(rowtime, INTERVAL '10' SECOND)) - UNIX_TIMESTAMP(TO_TIMESTAMP(min(times))) 3 | from 4 | shopping 5 | group by 6 | TUMBLE(rowtime, INTERVAL '10' SECOND), commodity -------------------------------------------------------------------------------- /flink/query/q10.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.dt, a.h, COUNT(sessionId) num 3 | FROM 4 | (SELECT 5 | sessionId, MAX(actionTime)-MIN(actionTime) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h 6 | FROM 7 | userVisit 8 | GROUP BY 9 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a 10 | WHERE 11 | a.len < 100 12 | GROUP BY 13 | a.dt, a.h -------------------------------------------------------------------------------- /flink/query/q11.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.dt, a.h, SUM(a.len) total 3 | FROM 4 | (SELECT 5 | sessionId, MAX(actionTime)-MIN(actionTime) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h 6 | FROM 7 | userVisit 8 | GROUP BY 9 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP))) a 10 | WHERE 11 | a.len < 1 12 | GROUP BY 13 | a.dt, a.h -------------------------------------------------------------------------------- /flink/query/q12.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT 3 | * 4 | FROM 5 | (SELECT 6 | *, ROW_NUMBER() OVER (PARTITION BY w.cityId ORDER BY w.num DESC) as rownum 7 | FROM 8 | (SELECT 9 | TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num 10 | FROM 11 | userVisit 12 | WHERE 13 | payProductIds IS NOT NULL 14 | GROUP BY 15 | cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND) 16 | ) w 17 | ) v 18 | WHERE 19 | v.rownum <= 10 -------------------------------------------------------------------------------- /flink/query/q2.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), count(*) click_count 3 | FROM 4 | click 5 | GROUP BY 6 | strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /flink/query/q3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | strategy, site, pos_id, TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), SUM(cost) 3 | FROM 4 | imp 5 | GROUP BY 6 | strategy, site, pos_id, TUMBLE(rowtime, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /flink/query/q4.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id) 3 | FROM 4 | click a 5 | JOIN 6 | dau b 7 | ON 8 | a.device_id = b.device_id AND a.rowtime BETWEEN b.rowtime - INTERVAL '1' second AND b.rowtime + INTERVAL '1' second 9 | GROUP BY 10 | b.device_id, a.strategy, a.site, a.pos_id, TUMBLE(a.rowtime, INTERVAL '10' SECOND) 11 | -------------------------------------------------------------------------------- /flink/query/q5.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sessionId, MAX(actionTime)-MIN(actionTime) as len 3 | FROM 4 | userVisit 5 | GROUP BY 6 | sessionId, TUMBLE(rowtime, INTERVAL '10' SECOND) 7 | 8 | -------------------------------------------------------------------------------- /flink/query/q6.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sessionId, (MAX(actionTime)-MIN(actionTime)) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num 3 | FROM 4 | userVisit 5 | GROUP BY 6 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), TUMBLE(rowtime, INTERVAL '10' SECOND) 7 | HAVING 8 | (MAX(actionTime)-MIN(actionTime)) < 100 -------------------------------------------------------------------------------- /flink/query/q7.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | TUMBLE_START(rowtime, INTERVAL '10' SECOND), TUMBLE_END(rowtime, INTERVAL '10' SECOND), cityId, payProductIds, count(*) num 3 | FROM 4 | userVisit 5 | WHERE 6 | payProductIds IS NOT NULL 7 | GROUP BY 8 | cityId, payProductIds, TUMBLE(rowtime, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /flink/query/q8.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | TUMBLE_START(rowtime, INTERVAL '10' SECOND) startTime, TUMBLE_END(rowtime, INTERVAL '10' SECOND) finish, cityId, count(clickCategoryId) as sequence 3 | FROM 4 | userVisit 5 | WHERE 6 | clickCategoryId IS NOT NULL 7 | GROUP BY 8 | cityId, TUMBLE(rowtime, INTERVAL '10' SECOND) -------------------------------------------------------------------------------- /flink/query/q9.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1, count(*) 3 | FROM 4 | (SELECT device_id, strategy, site, pos_id FROM click) a 5 | JOIN 6 | (SELECT device_id, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'yyyyMMdd') as var1, FROM_UNIXTIME(CAST(dau_time/1000 AS BIGINT), 'HH') as var2 FROM dau) b 7 | ON 8 | a.device_id = b.device_id 9 | GROUP BY 10 | a.device_id, a.strategy, a.site, a.pos_id, b.var2, b.var1 -------------------------------------------------------------------------------- /flink/result/result.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/flink/result/result.log -------------------------------------------------------------------------------- /flink/src/main/java/com/intel/streaming_benchmark/flink/Benchmark.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.flink; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.intel.streaming_benchmark.common.*; 5 | import com.intel.streaming_benchmark.utils.FlinkBenchConfig; 6 | import org.apache.flink.api.common.JobExecutionResult; 7 | import org.apache.flink.api.common.accumulators.IntCounter; 8 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 9 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 10 | import org.apache.flink.api.java.tuple.*; 11 | import org.apache.flink.configuration.Configuration; 12 | import org.apache.flink.streaming.api.TimeCharacteristic; 13 | import org.apache.flink.streaming.api.datastream.DataStream; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; 18 | import org.apache.flink.table.api.EnvironmentSettings; 19 | import org.apache.flink.table.api.Table; 20 | import org.apache.flink.table.api.TableConfig; 21 | import org.apache.flink.table.api.java.StreamTableEnvironment; 22 | import org.apache.flink.types.Row; 23 | import org.apache.flink.util.Collector; 24 | import com.alibaba.fastjson.JSONObject; 25 | import javax.annotation.Nullable; 26 | import java.io.BufferedWriter; 27 | import java.io.File; 28 | import java.io.FileWriter; 29 | import java.text.SimpleDateFormat; 30 | import java.util.Properties; 31 | 32 | public class Benchmark { 33 | public static void main(String[] args) throws Exception { 34 | if (args.length < 2) 35 | BenchLogUtil.handleError("Usage: RunBench "); 36 | //root Config 37 | ConfigLoader cl = new ConfigLoader(args[0]); 38 | String benchmarkConfDir = new File(args[0]).getParent(); 39 | 40 | //flink config 41 | String flinkConf = benchmarkConfDir + "/../flink/conf/benchmarkConf.yaml"; 42 | cl.merge(flinkConf); 43 | 44 | // Prepare configuration 45 | FlinkBenchConfig conf = new FlinkBenchConfig(); 46 | conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST); 47 | conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST); 48 | conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP); 49 | conf.checkpointDuration = Long.parseLong(cl.getProperty(StreamBenchConfig.FLINK_CHECKPOINTDURATION)); 50 | conf.timeType = cl.getProperty(StreamBenchConfig.FLINK_TIMETYPE); 51 | conf.topic = QueryConfig.getTables(args[1]); 52 | conf.sqlLocation = benchmarkConfDir + "/../flink/query"; 53 | conf.resultLocation = benchmarkConfDir + "/../flink/result"; 54 | conf.sqlName = args[1]; 55 | runQuery(conf); 56 | } 57 | 58 | public static void runQuery(FlinkBenchConfig config) throws Exception{ 59 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 60 | env.enableCheckpointing(config.checkpointDuration); 61 | if(config.timeType.equals("EventTime")){ 62 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 63 | }else{ 64 | env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); 65 | } 66 | 67 | TableConfig tc = new TableConfig(); 68 | EnvironmentSettings builder = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 69 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,builder); 70 | 71 | Properties properties = new Properties(); 72 | properties.setProperty("zookeeper.connect", config.zkHost); 73 | properties.setProperty("group.id", config.consumerGroup); 74 | properties.setProperty("bootstrap.servers", config.brokerList); 75 | 76 | String[] topics = config.topic.split(","); 77 | 78 | //generate table 79 | for(int i = 0; i < topics.length; i++){ 80 | // source stream 81 | FlinkKafkaConsumer010 consumer = new FlinkKafkaConsumer010(topics[i], new SimpleStringSchema(),properties); 82 | consumer.setStartFromLatest(); 83 | // consumer.setStartFromEarliest(); 84 | //add stream source for flink 85 | DataStream stream = env.addSource(consumer); 86 | // stream parse need table schema 87 | String[] fieldNames = TableSchemaProvider.getSchema(topics[i]).getFieldNames(); 88 | // TypeInformation returnType = TypeExtractor.createTypeInfo(); 89 | DataStream streamParsed; 90 | 91 | if(config.timeType.equals("EventTime")){ 92 | if(topics[i].equals("shopping")){ 93 | streamParsed = stream.flatMap(new DeserializeShopping()).assignTimestampsAndWatermarks(new ShoppingWatermarks()); 94 | }else if(topics[i].equals("click")){ 95 | streamParsed = stream.flatMap(new DeserializeClick()).assignTimestampsAndWatermarks(new ClickWatermarks()); 96 | }else if(topics[i].equals("imp")){ 97 | streamParsed = stream.flatMap(new DeserializeImp()).assignTimestampsAndWatermarks(new ImpWatermarks()); 98 | }else if(topics[i].equals("dau")){ 99 | streamParsed = stream.flatMap(new DeserializeDau()).assignTimestampsAndWatermarks(new DauWatermarks()); 100 | }else if(topics[i].equals("userVisit")){ 101 | streamParsed = stream.flatMap(new DeserializeUserVisit()).assignTimestampsAndWatermarks(new UserVisitWatermarks()); 102 | }else{ 103 | System.out.println("No such topic, please check your benchmarkConf.yaml"); 104 | return; 105 | } 106 | 107 | }else{ 108 | if(topics[i].equals("shopping")){ 109 | streamParsed = stream.flatMap(new DeserializeShopping()); 110 | }else if(topics[i].equals("click")){ 111 | streamParsed = stream.flatMap(new DeserializeClick()); 112 | }else if(topics[i].equals("imp")){ 113 | streamParsed = stream.flatMap(new DeserializeImp()); 114 | }else if(topics[i].equals("dau")){ 115 | streamParsed = stream.flatMap(new DeserializeDau()); 116 | }else if(topics[i].equals("userVisit")){ 117 | streamParsed = stream.flatMap(new DeserializeUserVisit()); 118 | }else{ 119 | System.out.println("No such topic, please check your benchmarkConf.yaml"); 120 | return; 121 | } 122 | } 123 | 124 | tableEnv.registerTable(topics[i], tableEnv.fromDataStream(streamParsed, FieldString(fieldNames, config.timeType))); 125 | } 126 | 127 | //runQuery 128 | File file = new File(config.sqlLocation + "/" + config.sqlName); 129 | if (!file.exists()) { 130 | return; 131 | } 132 | try { 133 | String queryString = DateUtils.fileToString(file); 134 | Table table = tableEnv.sqlQuery(queryString); 135 | table.printSchema(); 136 | DataStream> tuple2DataStream = tableEnv.toRetractStream(table, Row.class); 137 | tuple2DataStream.print(); 138 | } catch (Exception e) { 139 | e.printStackTrace(); 140 | } 141 | 142 | JobExecutionResult execute = env.execute(config.sqlName); 143 | JobExecutionResult jobExecutionResult = execute.getJobExecutionResult(); 144 | long netRuntime = jobExecutionResult.getNetRuntime(); 145 | System.out.println("----------------runtime---------------- :" + netRuntime); 146 | long count = 0; 147 | for(int i = 0; i < topics.length; i++){ 148 | Integer tmp = (Integer)jobExecutionResult.getAccumulatorResult(topics[i]); 149 | count = count + tmp.longValue(); 150 | } 151 | File resultFile = new File(config.resultLocation + "/result.log" ); 152 | if (!resultFile.exists()) { 153 | resultFile.createNewFile(); 154 | } 155 | FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log", true); 156 | BufferedWriter bufferWriter = new BufferedWriter(fileWriter); 157 | bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(System.currentTimeMillis()) + "; " + config.sqlName + " Runtime: " + netRuntime/1000 + " TPS:" + count/(netRuntime/1000) + "\r\n"); 158 | bufferWriter.close(); 159 | 160 | } 161 | 162 | private static String FieldString(String[] fieldNames, String timeType){ 163 | String fileds = ""; 164 | for(int i =0; i< fieldNames.length; i++){ 165 | fileds = fileds + fieldNames[i] + ","; 166 | } 167 | if(timeType.equals("EventTime")){ 168 | fileds = fileds + "rowtime.rowtime"; 169 | }else{ 170 | fileds = fileds + "rowtime.proctime"; 171 | } 172 | return fileds; 173 | } 174 | 175 | public static class ShoppingWatermarks implements AssignerWithPeriodicWatermarks> { 176 | Long currentMaxTimestamp = 0L; 177 | final Long maxOutOfOrderness = 2000L; 178 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 179 | 180 | @Nullable 181 | @Override 182 | public Watermark getCurrentWatermark() { 183 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness); 184 | return watermark; 185 | } 186 | 187 | @Override 188 | public long extractTimestamp(Tuple3 element, long previousElementTimestamp) { 189 | Long timestamp = Long.valueOf(element.f2); 190 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 191 | return timestamp; 192 | } 193 | } 194 | 195 | 196 | public static class ClickWatermarks implements AssignerWithPeriodicWatermarks> { 197 | Long currentMaxTimestamp = 0L; 198 | final Long maxOutOfOrderness = 2000L; 199 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 200 | 201 | @Nullable 202 | @Override 203 | public Watermark getCurrentWatermark() { 204 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness); 205 | return watermark; 206 | } 207 | 208 | @Override 209 | public long extractTimestamp(Tuple6 element, long previousElementTimestamp) { 210 | Long timestamp = Long.valueOf(element.f0); 211 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 212 | return timestamp; 213 | } 214 | } 215 | 216 | 217 | public static class ImpWatermarks implements AssignerWithPeriodicWatermarks> { 218 | Long currentMaxTimestamp = 0L; 219 | final Long maxOutOfOrderness = 2000L; 220 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 221 | 222 | @Nullable 223 | @Override 224 | public Watermark getCurrentWatermark() { 225 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness); 226 | return watermark; 227 | } 228 | 229 | @Override 230 | public long extractTimestamp(Tuple7 element, long previousElementTimestamp) { 231 | Long timestamp = Long.valueOf(element.f0); 232 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 233 | return timestamp; 234 | } 235 | } 236 | 237 | 238 | public static class DauWatermarks implements AssignerWithPeriodicWatermarks> { 239 | Long currentMaxTimestamp = 0L; 240 | final Long maxOutOfOrderness = 2000L; 241 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 242 | 243 | @Nullable 244 | @Override 245 | public Watermark getCurrentWatermark() { 246 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness); 247 | return watermark; 248 | } 249 | 250 | @Override 251 | public long extractTimestamp(Tuple2 element, long previousElementTimestamp) { 252 | Long timestamp = Long.valueOf(element.f0); 253 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 254 | return timestamp; 255 | } 256 | } 257 | 258 | 259 | public static class UserVisitWatermarks implements AssignerWithPeriodicWatermarks> { 260 | Long currentMaxTimestamp = 0L; 261 | final Long maxOutOfOrderness = 2000L; 262 | SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 263 | 264 | @Nullable 265 | @Override 266 | public Watermark getCurrentWatermark() { 267 | Watermark watermark = new Watermark(currentMaxTimestamp - maxOutOfOrderness); 268 | return watermark; 269 | } 270 | 271 | @Override 272 | public long extractTimestamp(Tuple13 element, long previousElementTimestamp) { 273 | Long timestamp = Long.valueOf(element.f4); 274 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 275 | return timestamp; 276 | } 277 | } 278 | 279 | 280 | 281 | public static class DeserializeShopping extends RichFlatMapFunction> { 282 | 283 | // Counter numLines; 284 | private IntCounter shopping = new IntCounter(); 285 | @Override 286 | public void open(Configuration parameters) throws Exception { 287 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines"); 288 | getRuntimeContext().addAccumulator("shopping", this.shopping); 289 | super.open(parameters); 290 | } 291 | 292 | @Override 293 | public void flatMap(String s, Collector> collector) throws Exception { 294 | this.shopping.add(1); 295 | String[] split = s.split(","); 296 | collector.collect(new Tuple3(split[0], split[1], Long.valueOf(split[2]))); 297 | } 298 | } 299 | 300 | public static class DeserializeClick extends RichFlatMapFunction> { 301 | 302 | private IntCounter click = new IntCounter(); 303 | @Override 304 | public void open(Configuration parameters) throws Exception { 305 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines"); 306 | getRuntimeContext().addAccumulator("click", this.click); 307 | super.open(parameters); 308 | } 309 | 310 | @Override 311 | public void flatMap(String input, Collector> collector) throws Exception { 312 | this.click.add(1); 313 | JSONObject obj = JSON.parseObject(input); 314 | // JSONObject obj = new JSONObject(input); 315 | Tuple6 tuple = new Tuple6<>( 316 | obj.getLong("click_time"), 317 | obj.getString("strategy"), 318 | obj.getString("site"), 319 | obj.getString("pos_id"), 320 | obj.getString("poi_id"), 321 | obj.getString("device_id") 322 | ); 323 | collector.collect(tuple); 324 | } 325 | } 326 | 327 | public static class DeserializeImp extends RichFlatMapFunction> { 328 | 329 | private IntCounter imp = new IntCounter(); 330 | @Override 331 | public void open(Configuration parameters) throws Exception { 332 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines"); 333 | getRuntimeContext().addAccumulator("imp", this.imp); 334 | super.open(parameters); 335 | } 336 | 337 | @Override 338 | public void flatMap(String input, Collector> collector) throws Exception { 339 | this.imp.add(1); 340 | JSONObject obj = JSON.parseObject(input); 341 | // JSONObject obj = new JSONObject(input); 342 | Tuple7 tuple = new Tuple7<>( 343 | obj.getLong("imp_time"), 344 | obj.getString("strategy"), 345 | obj.getString("site"), 346 | obj.getString("pos_id"), 347 | obj.getString("poi_id"), 348 | obj.getDouble("cost"), 349 | obj.getString("device_id") 350 | ); 351 | collector.collect(tuple); 352 | } 353 | } 354 | 355 | public static class DeserializeDau extends RichFlatMapFunction> { 356 | 357 | private IntCounter dau = new IntCounter(); 358 | @Override 359 | public void open(Configuration parameters) throws Exception { 360 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines"); 361 | getRuntimeContext().addAccumulator("dau", this.dau); 362 | super.open(parameters); 363 | } 364 | 365 | @Override 366 | public void flatMap(String input, Collector> collector) throws Exception { 367 | this.dau.add(1); 368 | JSONObject obj = JSON.parseObject(input); 369 | // JSONObject obj = new JSONObject(input); 370 | Tuple2 tuple = new Tuple2<>( 371 | obj.getLong("dau_time"), 372 | obj.getString("device_id") 373 | ); 374 | collector.collect(tuple); 375 | } 376 | } 377 | 378 | 379 | public static class DeserializeUserVisit extends RichFlatMapFunction> { 380 | 381 | private IntCounter userVisit = new IntCounter(); 382 | @Override 383 | public void open(Configuration parameters) throws Exception { 384 | //numLines = getRuntimeContext().getMetricGroup().addGroup("flink_test_metric").counter("numLines"); 385 | getRuntimeContext().addAccumulator("userVisit", this.userVisit); 386 | super.open(parameters); 387 | } 388 | 389 | @Override 390 | public void flatMap(String s, Collector> collector) throws Exception { 391 | this.userVisit.add(1); 392 | String[] split = s.split(","); 393 | Tuple13 tuple = new Tuple13<>( 394 | split[0], 395 | Long.valueOf(split[1]), 396 | split[2], 397 | Long.valueOf(split[3]), 398 | Long.valueOf(split[4]), 399 | split[5], 400 | split[6], 401 | split[7], 402 | split[8], 403 | split[9], 404 | split[10], 405 | split[11], 406 | Integer.valueOf(split[12]) 407 | ); 408 | collector.collect(tuple); 409 | } 410 | } 411 | 412 | } 413 | -------------------------------------------------------------------------------- /flink/src/main/java/com/intel/streaming_benchmark/utils/FlinkBenchConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.intel.streaming_benchmark.utils; 19 | 20 | import java.io.Serializable; 21 | 22 | public class FlinkBenchConfig implements Serializable { 23 | // public String testCase; 24 | 25 | // Kafka related 26 | public String zkHost; 27 | public String brokerList; 28 | public String topic; 29 | public String consumerGroup; 30 | // public String offsetReset; 31 | // public String reportTopic; 32 | 33 | // Flink related 34 | public long checkpointDuration; 35 | public String resultLocation; 36 | public String sqlLocation; 37 | public String sqlName; 38 | public String timeType; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.intel.streaming_benchmark 8 | streaming_benchmark 9 | pom 10 | 1.0-SNAPSHOT 11 | 12 | common 13 | spark 14 | flink 15 | dataGen 16 | 17 | 18 | 19 | 20 | org.scala-lang 21 | scala-library 22 | 2.11.8 23 | 24 | 25 | org.scala-lang 26 | scala-compiler 27 | 2.11.8 28 | 29 | 30 | 31 | org.eclipse.tycho 32 | tycho-compiler-jdt 33 | 0.21.0 34 | 35 | 36 | 37 | org.eclipse.tycho 38 | tycho-compiler-jdt 39 | 0.21.0 40 | 41 | 42 | 43 | 44 | org.apache.flink 45 | flink-table-api-java 46 | 1.9.0 47 | 48 | 49 | 50 | org.apache.flink 51 | flink-table-planner-blink_2.11 52 | 1.9.0 53 | 54 | 55 | 56 | 57 | org.apache.flink 58 | flink-streaming-java_2.11 59 | 1.9.0 60 | 61 | 62 | 63 | 64 | com.alibaba 65 | fastjson 66 | 1.2.58 67 | 68 | 69 | 70 | 71 | 72 | org.apache.flink 73 | flink-streaming-scala_2.11 74 | 1.9.0 75 | 76 | 77 | 78 | 79 | org.apache.flink 80 | flink-connector-kafka-0.10_2.11 81 | 1.9.0 82 | 83 | 84 | 85 | 86 | org.apache.kafka 87 | kafka_2.11 88 | 0.10.2.1 89 | 90 | 91 | 92 | 93 | 94 | 95 | maven-compiler-plugin 96 | 3.8.0 97 | 98 | 1.8 99 | 1.8 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /spark/conf/benchmarkConf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/conf/benchmarkConf.yaml -------------------------------------------------------------------------------- /spark/log/q1.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q1.sql.log -------------------------------------------------------------------------------- /spark/log/q2.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q2.sql.log -------------------------------------------------------------------------------- /spark/log/q3.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q3.sql.log -------------------------------------------------------------------------------- /spark/log/q4.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q4.sql.log -------------------------------------------------------------------------------- /spark/log/q5.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q5.sql.log -------------------------------------------------------------------------------- /spark/log/q6.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q6.sql.log -------------------------------------------------------------------------------- /spark/log/q7.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q7.sql.log -------------------------------------------------------------------------------- /spark/log/q8.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q8.sql.log -------------------------------------------------------------------------------- /spark/log/q9.sql.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haojinIntel/streaming_benchmark/dfe8372dc16378657e252eb9a4b08631bc6e1ad0/spark/log/q9.sql.log -------------------------------------------------------------------------------- /spark/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | streaming_benchmark 7 | com.intel.streaming_benchmark 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | spark 13 | 14 | 15 | 16 | com.intel.streaming_benchmark 17 | common 18 | 1.0-SNAPSHOT 19 | 20 | 21 | 22 | org.apache.spark 23 | spark-sql-kafka-0-10_2.11 24 | 2.3.1 25 | compile 26 | 27 | 28 | kafka-clients 29 | org.apache.kafka 30 | 31 | 32 | 33 | 34 | 35 | org.apache.kafka 36 | kafka-clients 37 | 0.10.2.1 38 | 39 | 40 | 41 | org.apache.spark 42 | spark-streaming-kafka-0-10_2.11 43 | 2.3.1 44 | 45 | 46 | 47 | org.apache.spark 48 | spark-streaming_2.11 49 | 2.3.1 50 | compile 51 | 52 | 53 | org.apache.spark 54 | spark-sql_2.11 55 | 2.3.1 56 | compile 57 | 58 | 59 | org.apache.spark 60 | spark-sql-kafka-0-10_2.11 61 | 2.3.1 62 | 63 | 64 | 65 | 66 | com.fasterxml.jackson.core 67 | jackson-databind 68 | 2.6.5 69 | 70 | 71 | 72 | net.jpountz.lz4 73 | lz4 74 | 1.3.0 75 | 76 | 77 | 78 | 79 | 80 | src/main/java 81 | 82 | 83 | src/main/resources 84 | true 85 | 86 | 87 | src/main/java 88 | 89 | ../*.java 90 | 91 | 92 | 93 | 94 | 95 | 96 | org.codehaus.mojo 97 | build-helper-maven-plugin 98 | 1.4 99 | 100 | 101 | add-source 102 | generate-sources 103 | 104 | add-source 105 | 106 | 107 | 108 | ../common/src/main/scala 109 | ../common/src/main/java 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | net.alchim31.maven 120 | scala-maven-plugin 121 | 3.2.2 122 | 123 | 124 | scala-compile-first 125 | process-resources 126 | 127 | add-source 128 | compile 129 | 130 | 131 | 132 | scala-test-compile 133 | process-test-resources 134 | 135 | testCompile 136 | 137 | 138 | 139 | 140 | 141 | org.apache.maven.plugins 142 | maven-compiler-plugin 143 | 3.2 144 | 145 | 1.8 146 | 1.8 147 | UTF-8 148 | 149 | 150 | 151 | 152 | 153 | org.apache.maven.plugins 154 | maven-compiler-plugin 155 | 3.8.0 156 | 157 | 1.8 158 | 1.8 159 | 160 | 161 | 162 | org.apache.maven.plugins 163 | maven-resources-plugin 164 | 3.0.2 165 | 166 | UTF-8 167 | 168 | 169 | 170 | 171 | org.apache.maven.plugins 172 | maven-shade-plugin 173 | 2.4.3 174 | 175 | 176 | package 177 | 178 | shade 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | org.apache.maven.plugins 192 | maven-shade-plugin 193 | 2.4.3 194 | 195 | 196 | package 197 | 198 | shade 199 | 200 | 201 | 202 | 203 | *:* 204 | 205 | META-INF/*.SF 206 | META-INF/*.DSA 207 | META-INF/*.RSA 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /spark/query/q1.sql: -------------------------------------------------------------------------------- 1 | select 2 | commodity, count(userId) num, WINDOW(times, '10 seconds').start, WINDOW(times, '10 seconds').end 3 | from 4 | shopping 5 | group BY 6 | WINDOW(times, '10 seconds'), commodity -------------------------------------------------------------------------------- /spark/query/q2.sql: -------------------------------------------------------------------------------- 1 | select 2 | strategy, site, pos_id, WINDOW(click_time, '10 seconds').start, pos_id, WINDOW(click_time, '10 seconds').end, count(*) click_count 3 | from 4 | click 5 | GROUP BY 6 | strategy, site, pos_id, WINDOW(click_time, '10 seconds') -------------------------------------------------------------------------------- /spark/query/q3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | strategy, site, pos_id, WINDOW(imp_time, '10 seconds').start, pos_id, WINDOW(imp_time, '10 seconds').end, SUM(cost) 3 | FROM 4 | imp 5 | GROUP BY 6 | strategy, site, pos_id, WINDOW(imp_time, '10 seconds') -------------------------------------------------------------------------------- /spark/query/q4.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | b.device_id, a.strategy, a.site, a.pos_id, count(b.device_id) 3 | FROM 4 | click a 5 | JOIN 6 | dau b 7 | ON 8 | a.device_id = b.device_id AND a.click_time BETWEEN b.dau_time - INTERVAL 1 second AND b.dau_time + INTERVAL 1 second 9 | GROUP BY 10 | b.device_id, a.strategy, a.site, a.pos_id, WINDOW(a.click_time, '10 seconds') 11 | -------------------------------------------------------------------------------- /spark/query/q5.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm , MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as timmm2, count(*) 3 | FROM 4 | userVisit 5 | GROUP BY 6 | sessionId, WINDOW(actionTime, '10 seconds') -------------------------------------------------------------------------------- /spark/query/q6.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sessionId, MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss')) as len, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)) as dt, HOUR(CAST(actionTime AS TIMESTAMP)) as h, COUNT(sessionId) num 3 | FROM 4 | userVisit 5 | GROUP BY 6 | sessionId, DAYOFMONTH(CAST(actionTime AS TIMESTAMP)), HOUR(CAST(actionTime AS TIMESTAMP)), WINDOW(actionTime, '10 seconds') 7 | HAVING 8 | (MAX(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))-MIN(TO_UNIX_TIMESTAMP(actionTime, 'yyyy-MM-dd HH:mm:ss'))) < 100 -------------------------------------------------------------------------------- /spark/query/q7.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | WINDOW(actionTime, '10 seconds').start starts, WINDOW(actionTime, '10 seconds').end finish , cityId, payProductIds, count(*) 3 | FROM 4 | userVisit 5 | WHERE 6 | payProductIds IS NOT NULL 7 | GROUP BY 8 | cityId, payProductIds, WINDOW(actionTime, '10 seconds') -------------------------------------------------------------------------------- /spark/query/q8.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | WINDOW(actionTime, '10 seconds').start start, WINDOW(actionTime, '10 seconds').end finish ,count(*) as sequence 3 | FROM 4 | userVisit 5 | WHERE 6 | clickCategoryId IS NOT NULL 7 | GROUP BY 8 | cityId, WINDOW(actionTime, '10 seconds') -------------------------------------------------------------------------------- /spark/query/q9.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.device_id, a.strategy, a.site, a.pos_id, b.var1, count(*) 3 | FROM 4 | (SELECT device_id, strategy, site, pos_id FROM click) a 5 | JOIN 6 | (SELECT device_id, dau_time as var1 FROM dau) b 7 | ON 8 | a.device_id = b.device_id 9 | GROUP BY 10 | a.device_id, a.strategy, a.site, a.pos_id, b.var1 -------------------------------------------------------------------------------- /spark/result/result.log: -------------------------------------------------------------------------------- 1 | Finished time: 2019-11-05 20:56:58; q9.sql Runtime: 62 TPS:5884 2 | -------------------------------------------------------------------------------- /spark/src/main/java/com/intel/streaming_benchmark/spark/Benchmark.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.spark; 2 | 3 | import com.intel.streaming_benchmark.common.*; 4 | import com.intel.streaming_benchmark.utils.SchemaProvider; 5 | import com.intel.streaming_benchmark.utils.SparkBenchConfig; 6 | import org.apache.spark.api.java.JavaSparkContext; 7 | import org.apache.spark.api.java.function.MapPartitionsFunction; 8 | import org.apache.spark.sql.Dataset; 9 | import org.apache.spark.sql.Row; 10 | import org.apache.spark.sql.RowFactory; 11 | import org.apache.spark.sql.SparkSession; 12 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; 13 | import org.apache.spark.sql.streaming.StreamingQuery; 14 | import org.apache.spark.sql.streaming.Trigger; 15 | import org.apache.spark.util.LongAccumulator; 16 | import com.alibaba.fastjson.JSONObject; 17 | import java.io.BufferedWriter; 18 | import java.io.File; 19 | import java.io.FileWriter; 20 | import java.sql.Timestamp; 21 | import java.util.*; 22 | 23 | public class Benchmark { 24 | public static void main(String[] args) throws Exception { 25 | if (args.length < 2) 26 | BenchLogUtil.handleError("Usage: RunBench "); 27 | 28 | ConfigLoader cl = new ConfigLoader(args[0]); 29 | String benchmarkConfDir = new File(args[0]).getParent(); 30 | //spark config 31 | String sparkConf = benchmarkConfDir + "/../spark/conf/benchmarkConf.yaml"; 32 | cl.merge(sparkConf); 33 | // Prepare configuration 34 | SparkBenchConfig conf = new SparkBenchConfig(); 35 | conf.brokerList = cl.getProperty(StreamBenchConfig.KAFKA_BROKER_LIST); 36 | conf.zkHost = cl.getProperty(StreamBenchConfig.ZK_HOST); 37 | conf.consumerGroup = cl.getProperty(StreamBenchConfig.CONSUMER_GROUP); 38 | conf.topic = QueryConfig.getTables(args[1]); 39 | conf.sqlLocation = benchmarkConfDir + "/../spark/query"; 40 | conf.resultLocation = benchmarkConfDir + "/../spark/result"; 41 | conf.sqlName = args[1]; 42 | conf.runTime = Integer.valueOf(args[2]); 43 | runQuery(conf); 44 | } 45 | 46 | public static void runQuery(SparkBenchConfig config) throws Exception { 47 | 48 | //create SparkSession 49 | SparkSession spark = SparkSession 50 | .builder() 51 | .appName(config.sqlName) 52 | // .master("local[2]") 53 | .getOrCreate(); 54 | JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); 55 | 56 | String[] topics = config.topic.split(","); 57 | Dataset df; 58 | LongAccumulator longAccumulator = jsc.sc().longAccumulator(); 59 | Long startTime= System.currentTimeMillis(); 60 | 61 | //generate table 62 | for(int i = 0; i < topics.length; i++){ 63 | ExpressionEncoder encoder = SchemaProvider.provideSchema(topics[i]); 64 | if(topics[i].equals("shopping")){ 65 | //read data from kafka and get primary data which need to be paresd to mutiple columns. 66 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() { 67 | @Override 68 | public Iterator call(Iterator input) throws Exception { 69 | List rows = new ArrayList<>(); 70 | while (input.hasNext()) { 71 | longAccumulator.add(1); 72 | Row next = input.next(); 73 | String[] split = next.getString(0).split(","); 74 | rows.add(RowFactory.create(split[0],split[1],Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[2]))))); 75 | } 76 | return rows.iterator(); 77 | } 78 | }, encoder).withWatermark("times", "4 seconds"); 79 | // . 80 | }else if(topics[i].equals("click")){ 81 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() { 82 | @Override 83 | public Iterator call(Iterator input) throws Exception { 84 | List rows = new ArrayList<>(); 85 | while (input.hasNext()) { 86 | longAccumulator.add(1); 87 | JSONObject obj = JSONObject.parseObject(input.next().getString(0)); 88 | // JSONObject obj = new JSONObject(input.next().getString(0)); 89 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("click_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getString("device_id"))); 90 | } 91 | return rows.iterator(); 92 | } 93 | }, encoder).withWatermark("click_time", "4 seconds"); 94 | 95 | }else if(topics[i].equals("imp")){ 96 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() { 97 | @Override 98 | public Iterator call(Iterator input) throws Exception { 99 | List rows = new ArrayList<>(); 100 | while (input.hasNext()) { 101 | longAccumulator.add(1); 102 | JSONObject obj = JSONObject.parseObject(input.next().getString(0)); 103 | // JSONObject obj = new JSONObject(input.next().getString(0)); 104 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("imp_time"))), obj.getString("strategy"), obj.getString("site"), obj.getString("pos_id"), obj.getString("poi_id"), obj.getDouble("cost"), obj.getString("device_id"))); 105 | } 106 | return rows.iterator(); 107 | } 108 | }, encoder).withWatermark("imp_time", "4 seconds"); 109 | }else if(topics[i].equals("dau")){ 110 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList) 111 | .option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() { 112 | @Override 113 | public Iterator call(Iterator input) throws Exception { 114 | List rows = new ArrayList<>(); 115 | while (input.hasNext()) { 116 | longAccumulator.add(1); 117 | JSONObject obj = JSONObject.parseObject(input.next().getString(0)); 118 | // JSONObject obj = new JSONObject(input.next().getString(0)); 119 | rows.add(RowFactory.create(Timestamp.valueOf(DateUtils.parseLong2String(obj.getLong("dau_time"))), obj.getString("device_id"))); 120 | } 121 | return rows.iterator(); 122 | } 123 | }, encoder).withWatermark("dau_time", "4 seconds"); 124 | }else if(topics[i].equals("userVisit")){ 125 | df = spark.readStream().format("kafka").option("kafka.bootstrap.servers", config.brokerList).option("subscribe", topics[i]).load().selectExpr("CAST(value AS STRING)").mapPartitions(new MapPartitionsFunction() { 126 | @Override 127 | public Iterator call(Iterator input) throws Exception { 128 | List rows = new ArrayList<>(); 129 | while (input.hasNext()) { 130 | longAccumulator.add(1); 131 | String[] split = input.next().getString(0).split(","); 132 | rows.add(RowFactory.create(split[0], Long.valueOf(split[1]), split[2], Long.valueOf(split[3]), Timestamp.valueOf(DateUtils.parseLong2String(Long.valueOf(split[4]))), split[5], split[6], split[7], split[8], split[9], split[10], split[11], Integer.valueOf(split[12]))); 133 | } 134 | return rows.iterator(); 135 | } 136 | }, encoder).withWatermark("actionTime", "4 seconds"); 137 | }else{ 138 | System.out.println("No such topic, please check your benchmarkConf.yaml"); 139 | return; 140 | } 141 | 142 | df.createOrReplaceTempView(topics[i]); 143 | } 144 | 145 | //runQuery 146 | File file = new File(config.sqlLocation + "/" + config.sqlName); 147 | if (!file.exists()) { 148 | return; 149 | } 150 | try { 151 | String queryString = DateUtils.fileToString(file); 152 | Dataset sql = spark.sql(queryString); 153 | StreamingQuery start = sql.writeStream().outputMode("append").format("console").trigger(Trigger.ProcessingTime("30 seconds")).start(); 154 | start.awaitTermination(config.runTime * 1000); 155 | System.out.println("2 Total number: " + longAccumulator.value()); 156 | 157 | } catch (Exception e) { 158 | e.printStackTrace(); 159 | } 160 | Long finishTime= System.currentTimeMillis(); 161 | Long runningTime = (finishTime - startTime) / 1000; 162 | File resultFile = new File(config.resultLocation + "/result.log" ); 163 | if (!resultFile.exists()) { 164 | resultFile.createNewFile(); 165 | } 166 | FileWriter fileWriter = new FileWriter(config.resultLocation + "/result.log" , true); 167 | BufferedWriter bufferWriter = new BufferedWriter(fileWriter); 168 | bufferWriter.write("Finished time: "+ DateUtils.parseLong2String(finishTime) + "; " + config.sqlName + " Runtime: " + runningTime + " TPS:" + longAccumulator.value()/runningTime + "\r\n"); 169 | bufferWriter.close(); 170 | 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /spark/src/main/java/com/intel/streaming_benchmark/utils/SchemaProvider.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.utils; 2 | 3 | import org.apache.spark.sql.Row; 4 | import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; 5 | import org.apache.spark.sql.catalyst.encoders.RowEncoder; 6 | import org.apache.spark.sql.types.DataTypes; 7 | import org.apache.spark.sql.types.StructType; 8 | 9 | public class SchemaProvider { 10 | 11 | public static ExpressionEncoder provideSchema(String topic){ 12 | StructType type = new StructType(); 13 | if(topic.equals("shopping")){ 14 | type = type.add("userID", DataTypes.StringType) 15 | .add("commodity", DataTypes.StringType) 16 | .add("times", DataTypes.TimestampType); 17 | }else if(topic.equals("click")){ 18 | type = type.add("click_time", DataTypes.TimestampType) 19 | .add("strategy", DataTypes.StringType) 20 | .add("site", DataTypes.StringType) 21 | .add("pos_id", DataTypes.StringType) 22 | .add("poi_id", DataTypes.StringType) 23 | .add("device_id", DataTypes.StringType); 24 | }else if(topic.equals("imp")){ 25 | type = type.add("imp_time", DataTypes.TimestampType) 26 | .add("strategy", DataTypes.StringType) 27 | .add("site", DataTypes.StringType) 28 | .add("pos_id", DataTypes.StringType) 29 | .add("poi_id", DataTypes.StringType) 30 | .add("cost", DataTypes.DoubleType) 31 | .add("device_id", DataTypes.StringType); 32 | }else if(topic.equals("dau")){ 33 | type = type.add("dau_time", DataTypes.TimestampType) 34 | .add("device_id", DataTypes.StringType); 35 | }else if(topic.equals("userVisit")){ 36 | type = type.add("date", DataTypes.StringType) 37 | .add("userId", DataTypes.LongType) 38 | .add("sessionId", DataTypes.StringType) 39 | .add("pageId", DataTypes.LongType) 40 | .add("actionTime", DataTypes.TimestampType) 41 | .add("searchKeyword", DataTypes.StringType) 42 | .add("clickCategoryId", DataTypes.StringType) 43 | .add("clickProductId", DataTypes.StringType) 44 | .add("orderCategoryIds", DataTypes.StringType) 45 | .add("orderProductIds", DataTypes.StringType) 46 | .add("payCategoryIds", DataTypes.StringType) 47 | .add("payProductIds", DataTypes.StringType) 48 | .add("cityId", DataTypes.IntegerType); 49 | }else { 50 | System.out.println("No such table schema!!!"); 51 | return null; 52 | } 53 | 54 | return RowEncoder.apply(type); 55 | 56 | } 57 | 58 | 59 | } 60 | -------------------------------------------------------------------------------- /spark/src/main/java/com/intel/streaming_benchmark/utils/SparkBenchConfig.java: -------------------------------------------------------------------------------- 1 | package com.intel.streaming_benchmark.utils; 2 | 3 | public class SparkBenchConfig { 4 | // Kafka related 5 | public String zkHost; 6 | public String brokerList; 7 | public String topic; 8 | public String consumerGroup; 9 | public String valueDeserializer; 10 | public String keyDeserializer; 11 | 12 | 13 | // public String offsetReset; 14 | // public String reportTopic; 15 | 16 | // Spark related 17 | public long checkpointDuration; 18 | public String resultLocation; 19 | public String sqlLocation; 20 | public String sqlName; 21 | public String timeType; 22 | 23 | 24 | public int runTime; 25 | 26 | } 27 | -------------------------------------------------------------------------------- /utils/dataGenerator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curDir=$(cd `dirname $0`;pwd) 4 | #curDir=`dirname $0` 5 | echo $curDir 6 | rootDir=$(dirname $curDir) 7 | echo $rootDir 8 | 9 | DATAGEN_TIME=$1 10 | THREAD_PER_NODE=$2 11 | SQL=$3 12 | ENGINE=$4 13 | 14 | 15 | /opt/Beaver/jdk/bin/java -cp $rootDir/dataGen/target/dataGen-1.0-SNAPSHOT.jar com.intel.streaming_benchmark.Datagen $DATAGEN_TIME $THREAD_PER_NODE $SQL $rootDir/conf/benchmarkConf.yaml >> $rootDir/$ENGINE/log/dataGen_${SQL}.log 2>&1 & 16 | --------------------------------------------------------------------------------