├── bin ├── zookeeper.properties ├── config.properties ├── frontserver_deploy.sh ├── spark_deploy.sh └── kafka_deploy.sh ├── frontend ├── GroupManager │ ├── .DS_Store │ ├── conf │ │ └── gmConfig │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── frontend │ │ │ ├── DecisionCollector.java │ │ │ ├── DecisionCollector_EG.java │ │ │ ├── GroupManager.java │ │ │ ├── GroupTableUpdater.java │ │ │ └── InfoSender.java │ └── pom.xml ├── trace │ ├── tracesort.py │ ├── fault_tolerance │ │ ├── ft.conf │ │ └── sort.py │ ├── trace_parser.py │ └── trace_parser_mulit.py ├── webphp │ ├── player.php │ ├── update_EG.php │ ├── update.php │ └── player_EG.php ├── DecisionMaker │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── frontend │ │ │ ├── HistoryObject.java │ │ │ ├── HistoryData.java │ │ │ └── DecisionMaker.java │ ├── algorithms │ │ ├── HistoryData_EG.java │ │ └── HistoryData_DUCB.java │ └── pom.xml └── Communicator │ ├── pom.xml │ └── src │ └── main │ └── java │ └── frontend │ └── Communicator.java ├── README.md ├── conf └── frontends │ ├── ' │ └── frontend-1 └── sbin ├── setup-all.sh ├── run_kafka.sh ├── run_zookeeper.sh ├── start-service-all.sh ├── run_uploadtrace_EG.sh ├── run_uploadtrace.sh ├── run_groupmanager.sh ├── run_decisionmaker.sh ├── install.sh └── run_communicator.sh /bin/zookeeper.properties: -------------------------------------------------------------------------------- 1 | # add some configurations 2 | tickTime=2000 3 | initLimit=5 4 | syncLimit=2 5 | -------------------------------------------------------------------------------- /frontend/GroupManager/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nsdi2017-ddn/pytheas/HEAD/frontend/GroupManager/.DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pytheas 2 | 3 | This is a new version of pytheas. 4 | For this version, we want to standardize the project and make it easy to deploy and use. 5 | -------------------------------------------------------------------------------- /conf/frontends/': -------------------------------------------------------------------------------- 1 | # This is an example 2 | ms0626.utah.cloudlab.us 3 | #c220g1-030821.wisc.cloudlab.us 4 | #c220g1-030830.wisc.cloudlab.us 5 | #c220g1-030827.wisc.cloudlab.us 6 | -------------------------------------------------------------------------------- /conf/frontends/frontend-1: -------------------------------------------------------------------------------- 1 | # This is an example 2 | ms0626.utah.cloudlab.us 3 | #c220g1-030821.wisc.cloudlab.us 4 | #c220g1-030830.wisc.cloudlab.us 5 | #c220g1-030827.wisc.cloudlab.us 6 | -------------------------------------------------------------------------------- /frontend/GroupManager/conf/gmConfig: -------------------------------------------------------------------------------- 1 | Time BufRate AvgBitrate JoinTime Asn City Country ConnType State Os Liveorvod ObjectId PlayerType InitBitrate InitCdn decision socre 2 | decision socre 3 | -------------------------------------------------------------------------------- /frontend/trace/tracesort.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | 5 | fin = open(sys.argv[1]) 6 | records = fin.readlines() 7 | fin.close() 8 | records = sorted(records, key=lambda record:record.split("\t", 1)[0]) 9 | fout = open(sys.argv[2], "w") 10 | fout.writelines(records) 11 | fout.close() 12 | 13 | -------------------------------------------------------------------------------- /frontend/trace/fault_tolerance/ft.conf: -------------------------------------------------------------------------------- 1 | set terminal png truecolor size 600,400 font 'Helvetica,16' 2 | set autoscale 3 | set xrange [0:] 4 | set yrange [0:] 5 | set key right nobox 6 | set style data lines 7 | set output "ft.png" 8 | set ylabel "Average Cost" font ",18" offset 1,0,0 9 | set xlabel "Time (sec)" font ",18" 10 | plot "r1" using 1 title "frontend1", \ 11 | "r1" using 2 title "frontend2" 12 | -------------------------------------------------------------------------------- /bin/config.properties: -------------------------------------------------------------------------------- 1 | #config file of communicator 2 | 3 | managementLabelsNum=2 4 | 5 | updateTopic=internal_groups 6 | uploadTopic=upload 7 | decisionTopic=decision 8 | subscribeTopic=subscribe 9 | forwardTopic=external_groups 10 | sampleTopic=sample 11 | aliveTopic=alive 12 | 13 | clusterID=frontend1 14 | 15 | backendBrokers=10.11.10.2:9092 16 | 17 | frontend1=10.11.10.3:9092 18 | frontend2=10.11.10.4:9092 19 | -------------------------------------------------------------------------------- /frontend/trace/fault_tolerance/sort.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | import sys 3 | 4 | fin = open(sys.argv[1]) 5 | list1 = [] 6 | list11 = [] 7 | list2 = [] 8 | list22 = [] 9 | RPS = 100 10 | for line in fin: 11 | record = line.split(",") 12 | if record[0] == '0': 13 | list1.append(record[2].strip()) 14 | else: 15 | list2.append(record[2].strip()) 16 | i = 0 17 | s = 0 18 | for record in list1: 19 | i+=1 20 | s+=float(record) 21 | if i == RPS: 22 | i = 0 23 | list11.append(s/RPS) 24 | s = 0 25 | i = 0 26 | s = 0 27 | for record in list2: 28 | i+=1 29 | s+=float(record) 30 | if i == RPS: 31 | i = 0 32 | list22.append(s/RPS) 33 | s = 0 34 | fout = open(sys.argv[2], 'w') 35 | for i in range(min(len(list11), len(list22))): 36 | fout.write("%f\t%f\n"%(list11[i], list22[i])) 37 | print abs(len(list11) - len(list22)) 38 | 39 | -------------------------------------------------------------------------------- /sbin/setup-all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | [ -z $PYTHEAS_HOME ] && export PYTHEAS_HOME=$(cd `dirname $0`/..; pwd) 4 | 5 | PYTHEAS_CONF_DIR=${PYTHEAS_HOME}/conf 6 | PYTHEAS_BIN_DIR=${PYTHEAS_HOME}/bin 7 | PYTHEAS_LOG_DIR=${PYTHEAS_HOME}/log 8 | debug=${DEBUG:-1} 9 | 10 | mkdir -p $PYTHEAS_LOG_DIR 11 | mkdir -p ${PYTHEAS_LOG_DIR}/setup 12 | rm -rf ${PYTHEAS_LOG_DIR}/setup/* 13 | 14 | for frontend in `ls ${PYTHEAS_CONF_DIR}/frontends/frontend-*`; do 15 | echo "**************************************************************************" 16 | echo "* Setting up ${frontend##*/}" 17 | echo "**************************************************************************" 18 | machinelist=$(cat $frontend | sed "s/#.*$//;/^$/d") 19 | for host in $machinelist 20 | do 21 | log_file=${PYTHEAS_LOG_DIR}/setup/${frontend##*/} 22 | echo -e "Setting up $host.\nSee $log_file for log" 23 | if [[ ${debug} -ne 0 ]]; then 24 | bash ${PYTHEAS_HOME}/sbin/install.sh $host ${frontend##*/} $PYTHEAS_BIN_DIR | tee -a $log_file 25 | else 26 | bash ${PYTHEAS_HOME}/sbin/install.sh $host ${frontend##*/} $PYTHEAS_BIN_DIR >> $log_file 27 | fi 28 | done 29 | done 30 | -------------------------------------------------------------------------------- /sbin/run_kafka.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | 4 | 5 | SSH="junchenj@"$1 6 | PRE=-o\ "StrictHostKeyChecking=no" 7 | 8 | FRONT_SERVER="front_server" 9 | SPARK="spark" 10 | KAFKA="kafka" 11 | TRACE="trace" 12 | 13 | REMOTE_USER_ROOT="/users/junchenj" 14 | REMOTE_SYS_ROOT="/usr/share" 15 | REMOTE_LOG_ROOT="/users/junchenj/log" 16 | LOG_FILE=$REMOTE_LOG_ROOT"/log_kafka" 17 | 18 | if [ "$#" -ne 1 ]; then 19 | echo "Error: need exactly one argument" 20 | echo "Format: sh run_kafka.sh host" 21 | exit 22 | fi 23 | 24 | echo "" 25 | echo "" 26 | echo "" 27 | echo "**************************************************************************" 28 | echo "* Running kafka on "$1 29 | echo "**************************************************************************" 30 | 31 | echo log file $LOG_FILE 32 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/kafka-server-stop.sh' 33 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$KAFKA && sudo bin/kafka-server-start.sh config/server.properties > $LOG_FILE 2>&1 &'" 34 | 35 | echo "**************************************************************************" 36 | echo "* Done kafka on "$1 37 | echo "**************************************************************************" 38 | echo "" 39 | echo "" 40 | echo "" 41 | 42 | -------------------------------------------------------------------------------- /sbin/run_zookeeper.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | 4 | 5 | SSH="junchenj@"$1 6 | PRE=-o\ "StrictHostKeyChecking=no" 7 | 8 | FRONT_SERVER="front_server" 9 | SPARK="spark" 10 | KAFKA="kafka" 11 | TRACE="trace" 12 | 13 | REMOTE_USER_ROOT="/users/junchenj" 14 | REMOTE_SYS_ROOT="/usr/share" 15 | REMOTE_LOG_ROOT="/users/junchenj/log" 16 | LOG_FILE=$REMOTE_LOG_ROOT"/log_zookeeper" 17 | 18 | if [ "$#" -ne 1 ]; then 19 | echo "Error: need exactly one argument" 20 | echo "Format: sh run_zookeeper.sh host" 21 | exit 22 | fi 23 | 24 | echo "" 25 | echo "" 26 | echo "" 27 | echo "**************************************************************************" 28 | echo "* Running zookeeper on "$1 29 | echo "**************************************************************************" 30 | 31 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/zookeeper-server-stop.sh' 32 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$KAFKA && sudo nohup bin/zookeeper-server-start.sh config/zookeeper.properties > $LOG_FILE 2>&1 &'" 33 | 34 | echo "**************************************************************************" 35 | echo "* Done zookeeper on "$1 36 | echo "**************************************************************************" 37 | echo "" 38 | echo "" 39 | echo "" 40 | 41 | -------------------------------------------------------------------------------- /bin/frontserver_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Auto install httpd and configure the environment 4 | # 5 | # Author: Shijie Sun 6 | # Email: septimus145@gmail.com 7 | # August, 2016 8 | 9 | if [[ $UID != 0 ]]; then 10 | echo "Please run this script with sudo:" 11 | echo "sudo $0 $*" 12 | exit 1 13 | fi 14 | 15 | # Install editor 16 | sudo apt-get update 17 | which vim >&/dev/null || sudo apt-get install -y vim 18 | which tmux >&/dev/null || sudo apt-get install -y tmux 19 | 20 | # Install jdk and maven 21 | which javac >&/dev/null || sudo apt-get install -y default-jdk 22 | which mvn >&/dev/null || sudo apt-get install -y maven 23 | if [ -z $JAVA_HOME ]; then 24 | JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ') 25 | echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment 26 | export JAVA_HOME=$JAVA_HOME 27 | fi 28 | 29 | # Install httpd and php5 30 | sudo apt-get install -y apache2 php libapache2-mod-php 31 | 32 | # Configure the httpd 33 | #sudo cp update.php /var/www/html 34 | #sudo cp player.php /var/www/html 35 | #sudo cp player_EG.php /var/www/html 36 | sudo mkdir /var/www/info 37 | sudo chmod 777 /var/www/info 38 | sudo sed -i -e "s/\(KeepAlive \).*/\1"Off"/" \ 39 | /etc/apache2/apache2.conf 40 | sudo service apache2 reload 41 | 42 | echo Success 43 | exit 0 44 | -------------------------------------------------------------------------------- /frontend/webphp/player.php: -------------------------------------------------------------------------------- 1 | $_POST["payload"], 33 | "group_id" => $group_id 34 | ); 35 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL; 36 | file_put_contents($path . '/info_queue',$in,FILE_APPEND|LOCK_EX); 37 | } 38 | 39 | ?> 40 | -------------------------------------------------------------------------------- /bin/spark_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Auto install Spark Streaming and configure the environment 4 | # 5 | # Author: Shijie Sun 6 | # Email: septimus145@gmail.com 7 | # July, 2016 8 | 9 | if [[ $UID != 0 ]]; then 10 | echo "Please run this script with sudo:" 11 | echo "sudo $0 $*" 12 | exit 1 13 | fi 14 | 15 | # Install editor 16 | sudo apt-get update 17 | which vim >&/dev/null || sudo apt-get install -y vim 18 | which tmux >&/dev/null || sudo apt-get install -y tmux 19 | 20 | # Install jdk and maven 21 | which javac >&/dev/null || sudo apt-get install -y default-jdk 22 | which mvn >&/dev/null || sudo apt-get install -y maven 23 | if [ -z $JAVA_HOME ]; then 24 | JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ') 25 | echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment 26 | export JAVA_HOME=$JAVA_HOME 27 | fi 28 | 29 | # Download the spark 30 | spark_path="/usr/share/spark/" 31 | wget http://www-eu.apache.org/dist/spark/spark-1.6.2/spark-1.6.2-bin-hadoop2.6.tgz 32 | sudo tar -xvzf spark-1.6.2-bin-hadoop2.6.tgz -C /usr/share 33 | sudo mv /usr/share/spark-1.6.2-bin-hadoop2.6 /usr/share/spark 34 | rm spark-1.6.2-bin-hadoop2.6.tgz 35 | echo "spark.io.compression.codec lzf" | sudo tee --append /usr/share/spark/conf/spark-defaults.conf 36 | 37 | #sudo mkdir -p /var/spark_tmp 38 | #sudo cp ./entry.dat /var/spark_tmp/ 39 | echo Success 40 | exit 0 41 | -------------------------------------------------------------------------------- /frontend/webphp/update_EG.php: -------------------------------------------------------------------------------- 1 | 2) { 25 | $epsilon = floatval($decisions[0]); 26 | // get random decision 27 | if (rand(0, 100) < $epsilon * 100) { 28 | $out = $decisions[rand(0,count($decisions)-3)+2]; 29 | } 30 | // get best decision 31 | else { 32 | $out = $decisions[1]; 33 | } 34 | } 35 | } 36 | 37 | // response 38 | if (empty($out)) 39 | echo "Oops"; 40 | else 41 | echo $out; 42 | 43 | // Encode the info with json and write it into file 44 | $info = array( 45 | "update" => $_POST["payload"], 46 | "group_id" => $group_id 47 | ); 48 | 49 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL; 50 | //echo $in; 51 | file_put_contents('/var/www/info/info_queue',$in,FILE_APPEND|LOCK_EX); 52 | 53 | ?> 54 | -------------------------------------------------------------------------------- /frontend/webphp/update.php: -------------------------------------------------------------------------------- 1 | 0) 35 | $out = $decision[0]; 36 | } 37 | 38 | // response 39 | if (empty($out)) 40 | echo "Oops"; 41 | else 42 | echo $out; 43 | 44 | // Encode the info with json and write it into file 45 | $info = array( 46 | "update" => $_POST["payload"], 47 | "group_id" => $group_id 48 | ); 49 | 50 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL; 51 | //echo $in; 52 | file_put_contents('/var/www/info/info_queue',$in,FILE_APPEND|LOCK_EX); 53 | 54 | ?> 55 | -------------------------------------------------------------------------------- /sbin/start-service-all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function start_logic_node { 4 | host=$1 5 | . "${PYTHEAS_HOME}/sbin/run_decisionmaker.sh" -f $host -p 6 | } 7 | 8 | function start_comm_node { 9 | host=$1 10 | . "${PYTHEAS_HOME}/sbin/run_communicator.sh" -f $host -b $host -p 11 | } 12 | 13 | function start_pubsub_node { 14 | host=$1 15 | . "${PYTHEAS_HOME}/sbin/run_zookeeper.sh" $host 16 | . "${PYTHEAS_HOME}/sbin/run_kafka.sh" $host 17 | } 18 | 19 | function start_front_node { 20 | host=$1 21 | commnode=$2 22 | . "${PYTHEAS_HOME}/sbin/run_groupmanager.sh" -l UCB -f $host -k $commnode -p 23 | } 24 | 25 | 26 | 27 | if [ -z "${PYTHEAS_HOME}" ]; then 28 | export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)" 29 | fi 30 | 31 | PYTHEAS_CONF_DIR=${PYTHEAS_HOME}/conf 32 | PYTHEAS_BIN_DIR=${PYTHEAS_HOME}/bin 33 | 34 | FRONTENDLIST=`cat "${PYTHEAS_CONF_DIR}/frontends"` 35 | 36 | for frontend in `echo "$FRONTENDLIST"|sed "s/#.*$//;/^$/d"`; do 37 | echo "Setting up $frontend" 38 | set -f 39 | MACHINELIST=(${frontend//;/ }) 40 | pubsubnode=${MACHINELIST[0]} 41 | echo "Starting publish/subcribe node $pubsubnode" 42 | start_pubsub_node $pubsubnode 43 | commnode=${MACHINELIST[0]} 44 | echo "Starting communication node $commnode" 45 | start_comm_node $commnode 46 | logicnode=${MACHINELIST[0]} 47 | echo "Starting computing node $logicnode" 48 | start_logic_node $logicnode 49 | for index in "${!MACHINELIST[@]}"; do 50 | if [ $index -ne 0 ]; then 51 | frontnode=${MACHINELIST[$index]} 52 | echo "Starting front node $frontnode" 53 | start_front_node $frontnode $commnode 54 | fi 55 | done 56 | done 57 | 58 | -------------------------------------------------------------------------------- /frontend/webphp/player_EG.php: -------------------------------------------------------------------------------- 1 | 2) { 24 | $epsilon = floatval($decisions[0]); 25 | // get random decision 26 | if (rand(0, 100) < $epsilon * 100) { 27 | $decision = $decisions[rand(0,count($decisions)-3)+2]; 28 | } 29 | // get best decision 30 | else { 31 | $decision = $decisions[1]; 32 | } 33 | } 34 | if (empty($decision) || !in_array($decision, $decision_list)) { 35 | $decision = $decision_list[array_rand($decision_list, 1)]; 36 | } 37 | echo $decision; 38 | } 39 | 40 | // update 41 | if ($_POST['method'] == 'update') { 42 | // Encode the info with json and write it into file 43 | $info = array( 44 | "update" => $_POST["payload"], 45 | "group_id" => $group_id 46 | ); 47 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL; 48 | file_put_contents($path . '/info_queue',$in,FILE_APPEND|LOCK_EX); 49 | } 50 | 51 | ?> 52 | -------------------------------------------------------------------------------- /sbin/run_uploadtrace_EG.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | 4 | 5 | SSH="junchenj@"$1 6 | PRE=-o\ "StrictHostKeyChecking=no" 7 | 8 | LOCAL_DDN="/Users/junchenjiang/Documents/research/ddn-controller/proto/shijie/DDN" 9 | FRONT_SERVER="front_server" 10 | SPARK="spark" 11 | KAFKA="kafka" 12 | TRACE="trace" 13 | 14 | REMOTE_USER_ROOT="/users/junchenj" 15 | REMOTE_SYS_ROOT="/usr/share" 16 | 17 | 18 | if [ "$#" -ne 2 ]; then 19 | echo "Error: need exactly two arguments" 20 | echo "Format: sh run_uploadtrace.sh host tracefile" 21 | exit 22 | fi 23 | 24 | echo "" 25 | echo "" 26 | echo "" 27 | echo "**************************************************************************" 28 | echo "* Starting Uploading trace to "$1 29 | echo "**************************************************************************" 30 | 31 | Tracefile=$2 32 | TraceUnsorted='trace_raw.txt' 33 | scp $PRE -r $LOCAL_DDN/$TRACE $SSH:$REMOTE_USER_ 34 | scp $PRE -r $Tracefile $SSH:$REMOTE_USER_ROOT/$TRACE/$TraceUnsorted 35 | 36 | TraceSorted='trace_sort.txt' 37 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./tracesort.py '$TraceUnsorted' '$TraceSorted 38 | ssh $PRE $SSH /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 39 | FrontendIp=$(cat temp) 40 | rm temp 41 | 42 | scp $PRE httpd_deploy.sh $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/ 43 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER/'; sudo sh httpd_deploy.sh' 44 | 45 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./trace_parser.py http://'$FrontendIp'/player_EG.php '$TraceSorted 46 | 47 | echo "**************************************************************************" 48 | echo "* Done Uploading trace to "$1 49 | echo "**************************************************************************" 50 | echo "" 51 | echo "" 52 | echo "" 53 | 54 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/src/main/java/frontend/HistoryObject.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.util.*; 4 | import java.io.Serializable; 5 | 6 | import scala.Tuple2; 7 | 8 | import org.apache.spark.api.java.JavaRDD; 9 | import org.apache.spark.api.java.JavaPairRDD; 10 | import org.apache.spark.streaming.api.java.*; 11 | 12 | public abstract class HistoryObject implements Serializable { 13 | 14 | public JavaPairRDD>> pairDData; 15 | public int windowSize; //seconds 16 | 17 | public HistoryObject(JavaStreamingContext jssc) { 18 | List>>> tmpDataList = new ArrayList<>(); 19 | //// for test 20 | //Map> testMap = new HashMap<>(); 21 | //List testList = new ArrayList(); 22 | //testList.add(7000.0); 23 | //testList.add(200.0); 24 | //testMap.put("decision1", testList); 25 | //tmpDataList.add(new Tuple2("group1", testMap)); 26 | JavaRDD>>> dData = 27 | jssc.sparkContext().parallelize(tmpDataList); 28 | this.pairDData = JavaPairRDD.fromJavaRDD(dData); 29 | } 30 | 31 | public void updateData(JavaPairRDD>> newPairDData) { 32 | this.pairDData = newPairDData; 33 | } 34 | 35 | /* 36 | * implement this method for combination of old data and new data 37 | */ 38 | public abstract Tuple2>> combineCall(Tuple2>>, Iterable>> 40 | >> tuple2); 41 | 42 | /* 43 | * implement this method for decision making 44 | */ 45 | public abstract String getDecision(Map> decisionStatMap); 46 | } 47 | -------------------------------------------------------------------------------- /sbin/run_uploadtrace.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | 4 | 5 | SSH="junchenj@"$1 6 | PRE=-o\ "StrictHostKeyChecking=no" 7 | if [ -z "${PYTHEAS_HOME}" ]; then 8 | export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)" 9 | fi 10 | 11 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend 12 | FRONT_SERVER="front_server" 13 | SPARK="spark" 14 | KAFKA="kafka" 15 | TRACE="trace" 16 | 17 | REMOTE_USER_ROOT="/users/junchenj" 18 | REMOTE_SYS_ROOT="/usr/share" 19 | 20 | 21 | if [ "$#" -ne 2 ]; then 22 | echo "Error: need exactly two arguments" 23 | echo "Format: sh run_uploadtrace.sh host tracefile" 24 | exit 25 | fi 26 | 27 | echo "" 28 | echo "" 29 | echo "" 30 | echo "**************************************************************************" 31 | echo "* Starting Uploading trace to "$1 32 | echo "**************************************************************************" 33 | 34 | Tracefile=$2 35 | TraceUnsorted='trace_raw.txt' 36 | scp $PRE -r $FRONTEND_HOME/$TRACE $SSH:$REMOTE_USER_ 37 | scp $PRE -r $Tracefile $SSH:$REMOTE_USER_ROOT/$TRACE/$TraceUnsorted 38 | 39 | TraceSorted='trace_sort.txt' 40 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./tracesort.py '$TraceUnsorted' '$TraceSorted 41 | ssh $PRE $SSH /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 42 | FrontendIp=$(cat temp) 43 | rm temp 44 | 45 | scp $PRE httpd_deploy.sh $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/ 46 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER/'; sudo sh httpd_deploy.sh' 47 | 48 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./trace_parser.py http://'$FrontendIp'/player.php '$TraceSorted 49 | 50 | echo "**************************************************************************" 51 | echo "* Done Uploading trace to "$1 52 | echo "**************************************************************************" 53 | echo "" 54 | echo "" 55 | echo "" 56 | 57 | -------------------------------------------------------------------------------- /bin/kafka_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Auto install Kafka and configure the environment 4 | # 5 | # Author: Shijie Sun 6 | # Email: septimus145@gmail.com 7 | # July, 2016 8 | 9 | if [ $# -lt 2 ]; then 10 | echo "Usage: sudo $0 " 11 | echo -e "\n\t is all IP addresses of kafka servers, separated by comma" 12 | echo -e "\t is the sequence number of current host in host_list" 13 | echo -e "\ne.g. if want to run kafka with two hosts(10.1.1.2,10.1.1.3) and IP of current host is 10.1.1.3. Then host_list=\"10.1.1.2,10.1.1.3\", host_number=2\n" 14 | exit 1 15 | fi 16 | 17 | if [[ $UID != 0 ]]; then 18 | echo "Please run this script with sudo:" 19 | echo "sudo $0 $*" 20 | exit 1 21 | fi 22 | 23 | host_list=(${1//,/ }) 24 | 25 | # Install editor 26 | sudo apt-get update 27 | which vim >&/dev/null || sudo apt-get install -y vim 28 | which tmux >&/dev/null || sudo apt-get install -y tmux 29 | 30 | # Install jre 31 | which java >&/dev/null || sudo apt-get install -y default-jre 32 | if [ -z $JAVA_HOME ]; then 33 | JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ') 34 | echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment 35 | export JAVA_HOME=$JAVA_HOME 36 | fi 37 | 38 | # Download kafka 39 | kafka_path="/usr/share/kafka/" 40 | wget http://www-eu.apache.org/dist/kafka/0.10.0.0/kafka_2.11-0.10.0.0.tgz 41 | sudo tar -xvzf kafka_2.11-0.10.0.0.tgz -C /usr/share 42 | sudo mv /usr/share/kafka_2.11-0.10.0.0 /usr/share/kafka 43 | rm kafka_2.11-0.10.0.0.tgz 44 | 45 | # Configure the zookeeper 46 | cat zookeeper.properties | sudo tee --append $kafka_path/config/zookeeper.properties 47 | i=0 48 | while [ $i -lt ${#host_list[@]} ] 49 | do 50 | server_info="server."$(( i+1 ))"="${host_list[$i]}":2888:3888" 51 | echo $server_info | sudo tee --append $kafka_path/config/zookeeper.properties 52 | (( i++ )) 53 | done 54 | sudo mkdir -p /tmp/zookeeper 55 | sudo touch /tmp/zookeeper/myid 56 | echo $2 | sudo tee --append /tmp/zookeeper/myid 57 | 58 | # Configure the kafka 59 | znodes=${host_list[0]}":2181" 60 | i=1 61 | while [ $i -lt ${#host_list[@]} ] 62 | do 63 | znodes=$znodes","${host_list[$i]}":2181" 64 | (( i++ )) 65 | done 66 | sudo sed -i -e "s/\(broker.id=\).*/\1$2/" \ 67 | -e "s/\(zookeeper.connect=\).*/\1$znodes/" $kafka_path/config/server.properties 68 | echo "delete.topic.enable=true" | sudo tee --append $kafka_path/config/server.properties 69 | 70 | echo Success 71 | exit 0 72 | -------------------------------------------------------------------------------- /frontend/GroupManager/src/main/java/frontend/DecisionCollector.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.io.*; 4 | import java.util.Arrays; 5 | import java.util.Iterator; 6 | import java.util.Properties; 7 | import org.apache.kafka.clients.consumer.ConsumerRecord; 8 | import org.apache.kafka.clients.consumer.ConsumerRecords; 9 | import org.apache.kafka.clients.consumer.KafkaConsumer; 10 | import org.json.JSONObject; 11 | import org.json.JSONArray; 12 | 13 | /** 14 | * Fetch decisions from Kafka 15 | * 16 | * Author: Shijie Sun 17 | * Email: septimus145@gmail.com 18 | * August, 2016 19 | */ 20 | 21 | 22 | public class DecisionCollector implements Runnable { 23 | 24 | protected String brokerList = ""; // list of broker 25 | protected String hostname = ""; // name of current host 26 | public KafkaConsumer consumer = null; // kafka consumer 27 | 28 | public DecisionCollector( String hostname, String brokerList ) { 29 | this.hostname = hostname; 30 | this.brokerList = brokerList; 31 | // setup consumer 32 | Properties consumerProps = new Properties(); 33 | consumerProps.put("bootstrap.servers", brokerList); 34 | consumerProps.put("group.id", this.hostname); 35 | consumerProps.put("enable.auto.commit", "true"); 36 | consumerProps.put("auto.commit.interval.ms", "1000"); 37 | consumerProps.put("session.timeout.ms", "30000"); 38 | consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 40 | this.consumer = new KafkaConsumer<>(consumerProps); 41 | consumer.subscribe(Arrays.asList("decision")); 42 | } 43 | 44 | public void run() { 45 | KafkaConsumer tconsumer = consumer; 46 | while (true) { 47 | ConsumerRecords records = tconsumer.poll(1000); 48 | for (ConsumerRecord record : records) { 49 | String[] decision = record.value().split(";"); 50 | try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/d_" + decision[0]), "utf-8"))) { 51 | writer.write(decision[1]); 52 | } catch (Exception e) { 53 | System.err.println("Caught Exception: " + e.getMessage()); 54 | } 55 | } 56 | try { 57 | Thread.sleep(1000); 58 | } catch(InterruptedException ex) { 59 | Thread.currentThread().interrupt(); 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /frontend/GroupManager/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | frontend 5 | GroupManager 6 | jar 7 | 1.0-SNAPSHOT 8 | GroupManager 9 | http://maven.apache.org 10 | 11 | UTF-8 12 | 13 | 14 | 15 | junit 16 | junit 17 | 3.8.1 18 | test 19 | 20 | 21 | org.json 22 | json 23 | 20160212 24 | 25 | 26 | 27 | org.apache.kafka 28 | kafka-clients 29 | 0.10.0.0 30 | 31 | 32 | 33 | org.apache.commons 34 | commons-lang3 35 | 3.0 36 | 37 | 38 | 39 | 40 | 41 | 42 | org.apache.maven.plugins 43 | maven-shade-plugin 44 | 2.4.3 45 | 46 | 47 | ${project.build.directory}/dependency-reduced-pom.xml 48 | 49 | 50 | 51 | package 52 | 53 | shade 54 | 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-compiler-plugin 62 | 3.3 63 | 64 | 1.7 65 | 1.7 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/algorithms/HistoryData_EG.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.util.*; 4 | import scala.Tuple2; 5 | import org.json.JSONObject; 6 | import org.json.JSONArray; 7 | import org.apache.spark.streaming.api.java.*; 8 | 9 | public class HistoryData extends HistoryObject { 10 | 11 | // self defined parameters 12 | public double epsilon = 0.9; 13 | // end of self defined 14 | 15 | public HistoryData(JavaStreamingContext jssc) { 16 | super(jssc); 17 | windowSize = 10000; 18 | } 19 | 20 | /* 21 | * implement this method for combination of old data and new data 22 | */ 23 | public Tuple2>> combineCall(Tuple2>>, Iterable>> 25 | >> tuple2) { 26 | Map> newData=null; 27 | Iterator>> iter; 28 | iter = tuple2._2()._1().iterator(); 29 | if (iter.hasNext()) 30 | newData = iter.next(); 31 | if (newData != null) { 32 | // calculate the average for new records 33 | for (Map.Entry> entry : newData.entrySet()) { 34 | List scores = entry.getValue(); 35 | double totalscore = 0; 36 | for (double score : scores) 37 | totalscore += score; 38 | // here becomes 2-elements list: [sum, size] 39 | List countedScore = new ArrayList<>(); 40 | countedScore.add(totalscore); 41 | countedScore.add((double)scores.size()); 42 | entry.setValue(countedScore); 43 | } 44 | } 45 | return new Tuple2(tuple2._1(), newData); 46 | } 47 | 48 | /* 49 | * implement this method for decision making 50 | */ 51 | public String getDecision(Map> decisionStatMap) { 52 | double maxScore = -Double.MAX_VALUE; 53 | String bestDecision = null; 54 | JSONArray jArray = new JSONArray(); 55 | for (Map.Entry> entry : decisionStatMap.entrySet()) { 56 | // TODO: what if here is divided by zero? 57 | if (entry.getValue().get(0) / entry.getValue().get(1) > maxScore) { 58 | if (bestDecision != null) 59 | jArray.put(bestDecision); 60 | bestDecision = entry.getKey(); 61 | maxScore = entry.getValue().get(0) / entry.getValue().get(1); 62 | } else { 63 | jArray.put(entry.getKey()); 64 | } 65 | } 66 | JSONObject jObject = new JSONObject(); 67 | jObject.put("random", jArray); 68 | jObject.put("best", bestDecision); 69 | jObject.put("epsilon", epsilon); 70 | return jObject.toString(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/src/main/java/frontend/HistoryData.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.util.*; 4 | import scala.Tuple2; 5 | import org.json.JSONObject; 6 | import org.json.JSONArray; 7 | import org.apache.spark.streaming.api.java.*; 8 | 9 | public class HistoryData extends HistoryObject { 10 | 11 | // self defined parameters 12 | public double epsilon = 0.9; 13 | // end of self defined 14 | 15 | public HistoryData(JavaStreamingContext jssc) { 16 | super(jssc); 17 | windowSize = 10000; 18 | } 19 | 20 | /* 21 | * implement this method for combination of old data and new data 22 | */ 23 | public Tuple2>> combineCall(Tuple2>>, Iterable>> 25 | >> tuple2) { 26 | Map> newData=null; 27 | Iterator>> iter; 28 | iter = tuple2._2()._1().iterator(); 29 | if (iter.hasNext()) 30 | newData = iter.next(); 31 | if (newData != null) { 32 | // calculate the average for new records 33 | for (Map.Entry> entry : newData.entrySet()) { 34 | List scores = entry.getValue(); 35 | double totalscore = 0; 36 | for (double score : scores) 37 | totalscore += score; 38 | // here becomes 2-elements list: [sum, size] 39 | List countedScore = new ArrayList<>(); 40 | countedScore.add(totalscore); 41 | countedScore.add((double)scores.size()); 42 | entry.setValue(countedScore); 43 | } 44 | } 45 | return new Tuple2(tuple2._1(), newData); 46 | } 47 | 48 | /* 49 | * implement this method for decision making 50 | */ 51 | public String getDecision(Map> decisionStatMap) { 52 | double maxScore = -Double.MAX_VALUE; 53 | String bestDecision = null; 54 | JSONArray jArray = new JSONArray(); 55 | for (Map.Entry> entry : decisionStatMap.entrySet()) { 56 | // TODO: what if here is divided by zero? 57 | if (entry.getValue().get(0) / entry.getValue().get(1) > maxScore) { 58 | if (bestDecision != null) 59 | jArray.put(bestDecision); 60 | bestDecision = entry.getKey(); 61 | maxScore = entry.getValue().get(0) / entry.getValue().get(1); 62 | } else { 63 | jArray.put(entry.getKey()); 64 | } 65 | } 66 | JSONObject jObject = new JSONObject(); 67 | jObject.put("random", jArray); 68 | jObject.put("best", bestDecision); 69 | jObject.put("epsilon", epsilon); 70 | return jObject.toString(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /sbin/run_groupmanager.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | Host="" 4 | KafkaNode="" 5 | Logic="" 6 | 7 | if [ "$#" -lt 6 ]; then 8 | echo "Error: need at least six arguments" 9 | echo "Format: sh run_groupmanager.sh [-option] --logic logic --frontend host --kafka kafkanode" 10 | exit 11 | fi 12 | 13 | Rebuild=false 14 | 15 | while test $# -gt 0; do 16 | case "$1" in 17 | -h|--help) 18 | echo "Format: sh run_groupmanager.sh [-option] host" 19 | echo "options:" 20 | echo "-h --help show brief help" 21 | echo "-p --package repackage before running" 22 | echo "-f --frontend set frontend host" 23 | echo "-l --logic set logic type (EG/UCB)" 24 | exit 0 25 | ;; 26 | -p|--package) 27 | Rebuild=true 28 | shift 29 | ;; 30 | -f|--frontend) 31 | Host=$2 32 | shift 33 | ;; 34 | -k|--kafka) 35 | KafkaNode=$2 36 | shift 37 | ;; 38 | -l|--logic) 39 | Logic=$2 40 | shift 41 | ;; 42 | -*) 43 | echo "invalid option "$1 44 | exit 0 45 | ;; 46 | *) 47 | shift 48 | ;; 49 | esac 50 | done 51 | 52 | 53 | SSH="junchenj@"$Host 54 | SSHKAFKA="junchenj"@$KafkaNode 55 | PRE=-o\ "StrictHostKeyChecking=no" 56 | if [ -z "${PYTHEAS_HOME}" ]; then 57 | export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)" 58 | fi 59 | 60 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend 61 | FRONT_SERVER="front_server" 62 | SPARK="spark" 63 | KAFKA="kafka" 64 | TRACE="trace" 65 | 66 | REMOTE_USER_ROOT="/users/junchenj" 67 | REMOTE_SYS_ROOT="/usr/share" 68 | REMOTE_LOG_ROOT="/users/junchenj/log" 69 | LOG_FILE=$REMOTE_LOG_ROOT"/log_groupmanager" 70 | 71 | 72 | echo "" 73 | echo "" 74 | echo "" 75 | echo "**************************************************************************" 76 | echo "* Starting GroupManager on "$Host 77 | echo "**************************************************************************" 78 | 79 | if [ "$Rebuild" = true ] ; then 80 | scp $PRE -r $FRONTEND_HOME/$FRONT_SERVER/GroupManager $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/ 81 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER'/GroupManager; mvn package > '$LOG_FILE 82 | fi 83 | 84 | ssh $PRE $SSHKAFKA /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 85 | KafkaIp=$(cat temp) 86 | echo "Got IP="$KafkaIp 87 | rm temp 88 | ssh $PRE $SSH "sh -c 'cd $REMOTE_USER_ROOT/$FRONT_SERVER/GroupManager && java -cp target/GroupManager-1.0-SNAPSHOT.jar frontend.GroupManager frontend1 $KafkaIp ../gmConfig $Logic > $LOG_FILE 2>&1 &'" 89 | 90 | echo "**************************************************************************" 91 | echo "* Done GroupManager on "$Host 92 | echo "**************************************************************************" 93 | echo "" 94 | echo "" 95 | echo "" 96 | 97 | -------------------------------------------------------------------------------- /sbin/run_decisionmaker.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | Host="" 4 | 5 | if [ "$#" -lt 2 ]; then 6 | echo "Error: need at least one argument" 7 | echo "Format: sh run_decisionmaker.sh [-option] --frontend " 8 | exit 9 | fi 10 | 11 | Rebuild=false 12 | 13 | while test $# -gt 0; do 14 | case "$1" in 15 | -h|--help) 16 | echo "Format: sh run_decisionmaker.sh [-option] -f " 17 | echo "options:" 18 | echo "-h --help show brief help" 19 | echo "-p --package repackage before running" 20 | echo "-f --frontend set frontend host" 21 | exit 0 22 | ;; 23 | -p|--package) 24 | Rebuild=true 25 | shift 26 | ;; 27 | -f|--frontend) 28 | Host=$2 29 | shift 30 | ;; 31 | -*) 32 | echo "invalid option "$1 33 | exit 0 34 | ;; 35 | *) 36 | shift 37 | ;; 38 | esac 39 | done 40 | 41 | 42 | SSH="junchenj@"$Host 43 | PRE=-o\ "StrictHostKeyChecking=no" 44 | 45 | if [ -z "${PYTHEAS_HOME}" ]; then 46 | export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)" 47 | fi 48 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend 49 | FRONT_SERVER="front_server" 50 | SPARK="spark" 51 | KAFKA="kafka" 52 | TRACE="trace" 53 | 54 | REMOTE_USER_ROOT="/users/junchenj" 55 | REMOTE_SYS_ROOT="/usr/share" 56 | REMOTE_LOG_ROOT="/users/junchenj/log" 57 | LOG_FILE=$REMOTE_LOG_ROOT"/log_decisionmaker" 58 | 59 | 60 | echo "" 61 | echo "" 62 | echo "" 63 | echo "**************************************************************************" 64 | echo "* Starting DecisionMaker on "$Host 65 | echo "**************************************************************************" 66 | 67 | if [ "$Rebuild" = true ] ; then 68 | scp $PRE -r $FRONTEND_HOME/$SPARK/DecisionMaker $SSH:$REMOTE_USER_ROOT/$SPARK/ 69 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$SPARK'/DecisionMaker; mvn package > '$LOG_FILE 70 | fi 71 | 72 | ##### get kafka pointer of the frontend 73 | ssh $PRE $SSH /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 74 | FrontendKafka=$(cat temp)':9092' 75 | FrontendZookeeper=$(cat temp)':2181' 76 | rm temp 77 | echo "Frontend Kafka="$FrontendKafka" / "$FrontendZookeeper 78 | 79 | CONF=$REMOTE_USER_ROOT/$SPARK"/config.properties" 80 | ##### get updateTopic 81 | TopicKey='updateTopic' 82 | ssh $PRE $SSH "cat $CONF | grep $TopicKey'=' | cut -d= -f2 | awk '{ print \$1}'" > temp 83 | TOPIC=$(cat temp) 84 | rm temp 85 | 86 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$SPARK; sudo bin/spark-submit --class frontend.DecisionMaker --master local --executor-memory 30G --total-executor-cores 1 --executor-cores 1 ~/spark/DecisionMaker/target/DecisionMaker-1.0-SNAPSHOT.jar $FrontendKafka $TOPIC decision 0.7 10 > $LOG_FILE 2>&1 &'" 87 | 88 | 89 | echo "**************************************************************************" 90 | echo "* Done DecisionMaker on "$Host 91 | echo "**************************************************************************" 92 | echo "" 93 | echo "" 94 | echo "" 95 | 96 | -------------------------------------------------------------------------------- /frontend/GroupManager/src/main/java/frontend/DecisionCollector_EG.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.io.*; 4 | import java.util.Arrays; 5 | import java.util.Properties; 6 | import org.apache.kafka.clients.consumer.ConsumerRecord; 7 | import org.apache.kafka.clients.consumer.ConsumerRecords; 8 | import org.apache.kafka.clients.consumer.KafkaConsumer; 9 | import org.json.JSONObject; 10 | import org.json.JSONArray; 11 | 12 | /** 13 | * Fetch decisions from Kafka 14 | * 15 | * Author: Shijie Sun 16 | * Email: septimus145@gmail.com 17 | * August, 2016 18 | */ 19 | 20 | 21 | public class DecisionCollector_EG implements Runnable { 22 | 23 | protected String brokerList = ""; // list of broker 24 | protected String hostname = ""; // name of current host 25 | public KafkaConsumer consumer = null; // kafka consumer 26 | 27 | public DecisionCollector_EG( String hostname, String brokerList ) { 28 | this.hostname = hostname; 29 | this.brokerList = brokerList; 30 | // setup consumer 31 | Properties consumerProps = new Properties(); 32 | consumerProps.put("bootstrap.servers", brokerList); 33 | consumerProps.put("group.id", this.hostname); 34 | consumerProps.put("enable.auto.commit", "true"); 35 | consumerProps.put("auto.commit.interval.ms", "1000"); 36 | consumerProps.put("session.timeout.ms", "30000"); 37 | consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 38 | consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | this.consumer = new KafkaConsumer<>(consumerProps); 40 | consumer.subscribe(Arrays.asList("decision")); 41 | } 42 | 43 | public void run() { 44 | KafkaConsumer tconsumer = consumer; 45 | while (true) { 46 | ConsumerRecords records = tconsumer.poll(1000); 47 | for (ConsumerRecord record : records) { 48 | String[] decision = record.value().split(";"); 49 | try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/d_" + decision[0]), "utf-8"))) { 50 | JSONObject jObject = new JSONObject(decision[1]); 51 | writer.write(String.valueOf(jObject.getDouble("epsilon"))); 52 | writer.newLine(); 53 | writer.write(jObject.getString("best")); 54 | JSONArray jArray = jObject.getJSONArray("random"); 55 | for (int i=0; i < jArray.length(); i++) { 56 | writer.newLine(); 57 | writer.write(jArray.getString(i)); 58 | } 59 | } catch (Exception e) { 60 | System.err.println("Caught Exception: " + e.getMessage()); 61 | } 62 | } 63 | try { 64 | Thread.sleep(1000); 65 | } catch(InterruptedException ex) { 66 | Thread.currentThread().interrupt(); 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /frontend/GroupManager/src/main/java/frontend/GroupManager.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.util.concurrent.ConcurrentHashMap; 4 | import java.net.InetAddress; 5 | import java.net.UnknownHostException; 6 | 7 | /** 8 | * Manage the groups of current cluster 9 | * 10 | * Retrive the info of updates from file and send them to Kafka server 11 | * Fetch group table from Kafka and maintain it 12 | * Fetch decisions from Kafka 13 | * 14 | * Author: Shijie Sun 15 | * Email: septimus145@gmail.com 16 | * August, 2016 17 | */ 18 | 19 | public class GroupManager { 20 | 21 | protected Thread decisionCollector = null; 22 | protected Thread groupTableUpdater = null; 23 | protected Thread infoSender = null; 24 | protected String hostname = ""; 25 | protected String kafkaBrokerList = ""; 26 | protected String clusterID = ""; 27 | public ConcurrentHashMap group2ClusterMap = null; 28 | 29 | public GroupManager( String clusterID, String kafkaServerList, String configFile ) { 30 | this.clusterID = clusterID; 31 | try { 32 | this.hostname = InetAddress.getLocalHost().getHostName(); 33 | } catch (UnknownHostException e){ 34 | this.hostname = "HOST"; 35 | } 36 | this.kafkaBrokerList = kafkaServerList.replace(",",":9092,") + ":9092"; 37 | this.group2ClusterMap = new ConcurrentHashMap<>(); 38 | 39 | this.groupTableUpdater = new Thread(new GroupTableUpdater(this.hostname, this.clusterID, this.kafkaBrokerList, this.group2ClusterMap)); 40 | this.groupTableUpdater.setDaemon(true); 41 | this.groupTableUpdater.start(); 42 | System.out.println("Group table updater ready."); 43 | 44 | this.decisionCollector = new Thread(new DecisionCollector(this.hostname, this.kafkaBrokerList)); 45 | this.decisionCollector.setDaemon(true); 46 | this.decisionCollector.start(); 47 | System.out.println("Decision collector ready."); 48 | 49 | this.infoSender = new Thread(new InfoSender(this.kafkaBrokerList, this.clusterID, this.group2ClusterMap, configFile)); 50 | this.infoSender.setDaemon(true); 51 | this.infoSender.start(); 52 | System.out.println("Info sender ready."); 53 | } 54 | 55 | public static void main( String[] args ) 56 | { 57 | if (args.length < 3) { 58 | System.out.println("Usage: java frontend.GroupManager cluster_ID kafka_server config_file"); 59 | System.out.println("\n\tcluster_ID is the ID of current cluster"); 60 | System.out.println("\n\tkafka_server is the list of IP of kafka servers, separated by comma"); 61 | System.out.println("\n\tconfig_file contains labels of update info and reduced labels"); 62 | return; 63 | } 64 | 65 | GroupManager gManager = new GroupManager(args[0], args[1], args[2]); 66 | 67 | while (true) 68 | { 69 | try { 70 | Thread.sleep(1000); 71 | } catch (InterruptedException e) { 72 | Thread.currentThread().interrupt(); 73 | // code for stopping current task so thread stops 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /sbin/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | echo "" 5 | echo "" 6 | echo "" 7 | echo "**************************************************************************" 8 | echo "* Setting up $2 : $HOST" 9 | echo "**************************************************************************" 10 | echo $0 $@ 11 | echo "" 12 | 13 | if [ "$#" -lt 3 ]; then 14 | echo "Error: need at least three arguments" 15 | echo "Format: sh setup_install.sh " 16 | exit 1; 17 | fi 18 | 19 | HOST=$1 20 | SSH=junchenj@$HOST 21 | PRE=-o\ "StrictHostKeyChecking=no" 22 | LOCAL_BIN_DIR=$3 23 | FRONT_SERVER=front_server 24 | SPARK=spark 25 | KAFKA=kafka 26 | TRACE=trace 27 | 28 | REMOTE_USER_ROOT="/users/junchenj" 29 | REMOTE_LOG_ROOT="/users/junchenj/log" 30 | 31 | echo "======== ${HOST}: Creating ${REMOTE_USER_ROOT} ========" 32 | ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT 33 | ssh $PRE $SSH 'mkdir -p '$REMOTE_LOG_ROOT 34 | 35 | [ -z $SPARK ] || { 36 | echo "======== ${HOST}: Setting up spark ========" 37 | ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$SPARK 38 | scp $PRE $LOCAL_BIN_DIR/'spark_deploy.sh' $SSH:$REMOTE_USER_ROOT/$SPARK 39 | scp $PRE $LOCAL_BIN_DIR/'config.properties' $SSH:$REMOTE_USER_ROOT/$SPARK 40 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$SPARK'; sudo bash spark_deploy.sh' 41 | } 42 | 43 | [ -z $KAFKA ] || { 44 | echo "======== ${HOST}: Setting up kafka ========" 45 | ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$KAFKA 46 | scp $PRE $LOCAL_BIN_DIR/'kafka_deploy.sh' $SSH:$REMOTE_USER_ROOT/$KAFKA 47 | scp $PRE $LOCAL_BIN_DIR/'zookeeper.properties' $SSH:$REMOTE_USER_ROOT/$KAFKA 48 | IP=$(ssh $PRE $SSH getent hosts \$\(hostname\) | awk '{print $1}') 49 | echo "Got IP = $IP" 50 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$KAFKA'; sudo ./kafka_deploy.sh '$IP' 1' 51 | } 52 | 53 | [ -z $FRONT_SERVER ] || { 54 | echo "======== "$HOST": Setting up front_server ========" 55 | ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$FRONT_SERVER 56 | scp $PRE $LOCAL_BIN_DIR/'frontserver_deploy.sh' $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER 57 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER'; sudo ./frontserver_deploy.sh' 58 | } 59 | 60 | echo "**************************************************************************" 61 | echo "* Finish setup of $2 : $HOST" 62 | echo "**************************************************************************" 63 | 64 | #echo "**************************************************************************" 65 | #echo "* Done Setting up on "$host 66 | #echo "* NEXT STEP: Open 6 new windws" 67 | #echo "* Run 'sh run_zookeeper.sh "$host"' in 1st window" 68 | #echo "* Run 'sh run_kafka.sh "$host"' in 2nd window" 69 | #echo "* Run 'sh run_groupmanager.sh -f "$host" -p' in 3rd window" 70 | #echo "* Run 'sh run_communicator.sh -f "$host" -b backendhost -p' in 4th window" 71 | #echo "* Run 'sh run_decisionmaker.sh -f "$host" -p' in 5th window" 72 | #echo "* Run 'sh run_uploadtrace.sh "$host" tracefile -p' in 6th window" 73 | #echo "**************************************************************************" 74 | #echo "" 75 | #echo "" 76 | #echo "" 77 | -------------------------------------------------------------------------------- /frontend/Communicator/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | frontend 5 | Communicator 6 | jar 7 | 1.0 8 | Communicator 9 | http://maven.apache.org 10 | 11 | UTF-8 12 | 13 | 14 | 15 | junit 16 | junit 17 | 3.8.1 18 | test 19 | 20 | 21 | org.json 22 | json 23 | 20160212 24 | 25 | 26 | org.apache.spark 27 | spark-core_2.10 28 | 1.6.2 29 | 30 | 31 | org.apache.spark 32 | spark-streaming_2.11 33 | 2.0.0 34 | 35 | 36 | org.apache.spark 37 | spark-streaming-kafka_2.10 38 | 1.6.2 39 | 40 | 41 | org.apache.kafka 42 | kafka-clients 43 | 0.10.0.0 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.apache.maven.plugins 51 | maven-shade-plugin 52 | 2.4.3 53 | 54 | 55 | ${project.build.directory}/dependency-reduced-pom.xml 56 | 57 | 58 | 59 | *:* 60 | 61 | META-INF/*.SF 62 | META-INF/*.DSA 63 | META-INF/*.RSA 64 | 65 | 66 | 67 | 68 | 69 | 70 | package 71 | 72 | shade 73 | 74 | 75 | 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-compiler-plugin 81 | 3.3 82 | 83 | 1.7 84 | 1.7 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | frontend 5 | DecisionMaker 6 | jar 7 | 1.0-SNAPSHOT 8 | DecisionMaker 9 | http://maven.apache.org 10 | 11 | UTF-8 12 | 13 | 14 | 15 | junit 16 | junit 17 | 3.8.1 18 | test 19 | 20 | 21 | org.json 22 | json 23 | 20160212 24 | 25 | 26 | org.apache.spark 27 | spark-core_2.10 28 | 1.6.2 29 | 30 | 31 | org.apache.spark 32 | spark-streaming_2.11 33 | 2.0.0 34 | 35 | 36 | org.apache.spark 37 | spark-streaming-kafka_2.10 38 | 1.6.2 39 | 40 | 41 | org.apache.kafka 42 | kafka-clients 43 | 0.10.0.0 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.apache.maven.plugins 51 | maven-shade-plugin 52 | 2.4.3 53 | 54 | 55 | ${project.build.directory}/dependency-reduced-pom.xml 56 | 57 | 58 | 59 | *:* 60 | 61 | META-INF/*.SF 62 | META-INF/*.DSA 63 | META-INF/*.RSA 64 | 65 | 66 | 67 | 68 | 69 | 70 | package 71 | 72 | shade 73 | 74 | 75 | 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-compiler-plugin 81 | 3.3 82 | 83 | 1.7 84 | 1.7 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /sbin/run_communicator.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | Host="" 4 | Backend="" 5 | 6 | if [ "$#" -lt 4 ]; then 7 | echo "Error: need at least four argument" 8 | echo "Format: sh run_communicator.sh [-option] --frontend host --backend host" 9 | exit 10 | fi 11 | 12 | Rebuild=false 13 | 14 | while test $# -gt 0; do 15 | case "$1" in 16 | -h|--help) 17 | echo "Format: sh run_communicator.sh [-option] host" 18 | echo "options:" 19 | echo "-h --help show brief help" 20 | echo "-p --package repackage before running" 21 | echo "-f --frontend set frontend host" 22 | echo "-b --backend set backend host" 23 | exit 0 24 | ;; 25 | -p|--package) 26 | Rebuild=true 27 | shift 28 | ;; 29 | -f|--frontend) 30 | Host=$2 31 | shift 32 | ;; 33 | -b|--backend) 34 | Backend=$2 35 | shift 36 | ;; 37 | -*) 38 | echo "invalid option "$1 39 | exit 0 40 | ;; 41 | *) 42 | shift 43 | ;; 44 | esac 45 | done 46 | 47 | SSH="junchenj@"$Host 48 | PRE=-o\ "StrictHostKeyChecking=no" 49 | 50 | if [ -z "${PYTHEAS_HOME}" ]; then 51 | export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)" 52 | fi 53 | 54 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend 55 | FRONT_SERVER="front_server" 56 | SPARK="spark" 57 | KAFKA="kafka" 58 | TRACE="trace" 59 | 60 | REMOTE_USER_ROOT="/users/junchenj" 61 | REMOTE_SYS_ROOT="/usr/share" 62 | REMOTE_LOG_ROOT="/users/junchenj/log" 63 | LOG_FILE=$REMOTE_LOG_ROOT"/log_communicator" 64 | 65 | 66 | echo "" 67 | echo "" 68 | echo "" 69 | echo "**************************************************************************" 70 | echo "* Starting Communicator on "$Host 71 | echo "**************************************************************************" 72 | 73 | CONF=$REMOTE_USER_ROOT/$SPARK"/config.properties" 74 | 75 | if [ "$Rebuild" = true ] ; then 76 | scp $PRE -r $FRONTEND_HOME/$SPARK/Communicator $SSH:$REMOTE_USER_ROOT/$SPARK/ 77 | ssh $PRE $SSH 'cd '$REMOTE_USER_ROOT/$SPARK/Communicator'; mvn package > '$LOG_FILE 78 | fi 79 | 80 | ##### get kafka pointer of the frontend 81 | ssh $PRE $SSH /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 82 | FrontendKafka=$(cat temp)':9092' 83 | FrontendZookeeper=$(cat temp)':2181' 84 | rm temp 85 | echo "Frontend Kafka="$FrontendKafka" / "$FrontendZookeeper 86 | 87 | ##### get kafka pointer of the frontend 88 | ssh $PRE junchenj@$Backend /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp 89 | BackendKafka=$(cat temp)':9092' 90 | BackendZookeeper=$(cat temp)':2181' 91 | rm temp 92 | echo "Backend Kafka="$BackendKafka" / "$BackendZookeeper 93 | 94 | ##### get and add topics 95 | for TopicKey in 'updateTopic' 'uploadTopic' 'decisionTopic' 'subscribeTopic' 'forwardTopic' 'sampleTopic' 'aliveTopic' 96 | do 97 | ssh $PRE $SSH "cat $CONF | grep $TopicKey'=' | cut -d= -f2 | awk '{ print \$1}'" > temp 98 | TOPIC=$(cat temp) 99 | rm temp 100 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/kafka-topics.sh --create --zookeeper '$FrontendZookeeper' --topic '$TOPIC' --partition 1 --replication-factor 1' 101 | echo "Added topic "$TopicKey"="$TOPIC 102 | done 103 | 104 | ssh $PRE $SSH -t "sed -i 's/frontend1=10.11.10.3:9092/frontend1="$FrontendKafka"/g' "$CONF 105 | ssh $PRE $SSH -t "sed -i 's/backendBrokers=10.11.10.2:9092/backendBrokers="$BackendKafka"/g' "$CONF 106 | ssh $PRE $SSH -t "sed -i 's/frontend2=10.11.10.4:9092//g' "$CONF 107 | 108 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$SPARK && sudo bin/spark-submit --class frontend.Communicator --master local --executor-memory 30G --total-executor-cores 1 --executor-cores 1 ~/spark/Communicator/target/Communicator-1.0.jar $CONF > $LOG_FILE 2>&1 &'" 109 | 110 | echo "**************************************************************************" 111 | echo "* Done Communicator on "$Host 112 | echo "**************************************************************************" 113 | echo "" 114 | echo "" 115 | echo "" 116 | 117 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/algorithms/HistoryData_DUCB.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.util.*; 4 | import scala.Tuple2; 5 | import org.apache.spark.streaming.api.java.*; 6 | 7 | public class HistoryData extends HistoryObject { 8 | 9 | // self defined parameters 10 | public double gamma = 0.8; 11 | public int precisionTime = 10; 12 | // end of self defined 13 | 14 | public HistoryData(JavaStreamingContext jssc) { 15 | super(jssc); 16 | windowSize = -1; 17 | } 18 | 19 | /* 20 | * implement this method for combination of old data and new data 21 | */ 22 | public Tuple2>> combineCall(Tuple2>>, Iterable>> 24 | >> tuple2) { 25 | Map> oldData=null, newData=null; 26 | Iterator>> iter; 27 | iter = tuple2._2()._1().iterator(); 28 | if (iter.hasNext()) 29 | newData = iter.next(); 30 | iter = tuple2._2()._2().iterator(); 31 | if (iter.hasNext()) 32 | oldData = iter.next(); 33 | if (newData != null) { 34 | // calculate the average for new records 35 | for (Map.Entry> entry : newData.entrySet()) { 36 | List scores = entry.getValue(); 37 | double totalscore = 0; 38 | for (double score : scores) 39 | totalscore += score; 40 | // here becomes 2-elements list: [sum, size] 41 | List countedScore = new ArrayList<>(); 42 | countedScore.add(totalscore); 43 | countedScore.add((double)scores.size()); 44 | entry.setValue(countedScore); 45 | } 46 | // combine the old data and new data 47 | if (oldData != null) { 48 | for (Map.Entry> oldEntry : oldData.entrySet()) { 49 | List newValue = newData.get(oldEntry.getKey()); 50 | List oldValue = oldEntry.getValue(); 51 | if (newValue != null) { 52 | newValue.set(0, newValue.get(0) + oldValue.get(0) * this.gamma); 53 | newValue.set(1, newValue.get(1) + oldValue.get(1) * this.gamma); 54 | } else { 55 | newData.put(oldEntry.getKey(), oldValue); 56 | } 57 | } 58 | } 59 | return new Tuple2(tuple2._1(), newData); 60 | } else { 61 | // discount the old data 62 | if (oldData != null) { 63 | for (Map.Entry> oldEntry : oldData.entrySet()) { 64 | List oldValue = oldEntry.getValue(); 65 | oldValue.set(0, oldValue.get(0) * this.gamma); 66 | oldValue.set(1, oldValue.get(1) * this.gamma); 67 | } 68 | } 69 | return new Tuple2(tuple2._1(), oldData); 70 | } 71 | } 72 | 73 | /* 74 | * implement this method for decision making 75 | */ 76 | public String getDecision(Map> decisionStatMap) { 77 | // here is just a convert from List to double[] 78 | // the latter is much easier for process here 79 | Map tmpMap = new HashMap(); 80 | for (Map.Entry> entry : decisionStatMap.entrySet()) { 81 | tmpMap.put(entry.getKey(), new double[]{entry.getValue().get(0), entry.getValue().get(1)}); 82 | } 83 | double N = 0; 84 | for (Map.Entry entry : tmpMap.entrySet()) { 85 | N += entry.getValue()[1]; 86 | if (entry.getValue()[1] > 0) 87 | entry.getValue()[0] /= entry.getValue()[1]; 88 | else 89 | entry.getValue()[0] = 0; 90 | } 91 | double score, maxScore; 92 | String bestDecision = ""; 93 | String decisions = ""; 94 | double[] bestDecisionInfo; 95 | double Bsqrt2logN = 0; 96 | for (int j = 0; j < precisionTime; j++) { 97 | maxScore = -Double.MAX_VALUE; 98 | Bsqrt2logN = 0; 99 | // if N <= 1, then it will be a negative number or zero. 100 | // in this case, we will not compute the Ct(y,i) 101 | if (N > 1) 102 | Bsqrt2logN = 1000 * Math.sqrt(2 * Math.log(N)); 103 | for (Map.Entry entry : tmpMap.entrySet()) { 104 | if (entry.getValue()[1] > 0) 105 | score = entry.getValue()[0] + Bsqrt2logN / Math.sqrt(entry.getValue()[1]); 106 | else 107 | score = 0; 108 | if (score > maxScore) { 109 | bestDecision = entry.getKey(); 110 | maxScore = score; 111 | } 112 | } 113 | decisions += bestDecision + ":"; 114 | bestDecisionInfo = tmpMap.get(bestDecision); 115 | bestDecisionInfo[1] += 1; 116 | N += 1; 117 | // discount for next precision 118 | N *= this.gamma; 119 | for (Map.Entry entry : tmpMap.entrySet()) { 120 | entry.getValue()[1] *= this.gamma; 121 | } 122 | } 123 | return decisions; 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /frontend/GroupManager/src/main/java/frontend/GroupTableUpdater.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.io.*; 4 | import java.util.Arrays; 5 | import java.util.ArrayList; 6 | import java.util.Iterator; 7 | import java.util.Properties; 8 | import java.util.concurrent.ConcurrentHashMap; 9 | import org.apache.kafka.clients.consumer.ConsumerRecord; 10 | import org.apache.kafka.clients.consumer.ConsumerRecords; 11 | import org.apache.kafka.clients.consumer.KafkaConsumer; 12 | import org.json.JSONObject; 13 | import org.json.JSONArray; 14 | 15 | /** 16 | * Fetch group table from Kafka and maintain it 17 | * 18 | * Author: Shijie Sun 19 | * Email: septimus145@gmail.com 20 | * August, 2016 21 | */ 22 | 23 | 24 | public class GroupTableUpdater implements Runnable { 25 | 26 | protected String brokerList = ""; // list of broker 27 | protected String hostname = ""; // name of current host 28 | public KafkaConsumer consumer = null; // kafka consumer 29 | public ConcurrentHashMap group2ClusterMap = null; 30 | 31 | public GroupTableUpdater( String hostname, String clusterID, String brokerList, ConcurrentHashMap group2ClusterMap ) { 32 | this.hostname = hostname; 33 | this.brokerList = brokerList; 34 | this.group2ClusterMap = group2ClusterMap; 35 | group2ClusterMap.put("null", clusterID); 36 | // setup consumer 37 | Properties consumerProps = new Properties(); 38 | consumerProps.put("bootstrap.servers", brokerList); 39 | consumerProps.put("group.id", this.hostname); 40 | consumerProps.put("enable.auto.commit", "true"); 41 | consumerProps.put("auto.commit.interval.ms", "1000"); 42 | consumerProps.put("session.timeout.ms", "30000"); 43 | consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 44 | consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 45 | this.consumer = new KafkaConsumer<>(consumerProps); 46 | consumer.subscribe(Arrays.asList("group_table")); 47 | } 48 | 49 | public void run() { 50 | while (true) { 51 | ConsumerRecords records = this.consumer.poll(1000); 52 | for (ConsumerRecord record : records) { 53 | JSONObject jObject = new JSONObject(record.value()); 54 | 55 | // if it is group to cluster map, save it 56 | if (jObject.has("GroupAssignment")) { 57 | JSONArray jArray = jObject.getJSONArray("GroupAssignment"); 58 | for (int i = 0; i < jArray.length(); i++) { 59 | group2ClusterMap.put(jArray.getJSONObject(i).getString("GroupName"), jArray.getJSONObject(i).getString("Cluster")); 60 | } 61 | } 62 | 63 | // if it is feature values to group map, generate new PHP code 64 | if (jObject.has("GroupingRules")) { 65 | try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/match.php"), "utf-8"))) { 66 | JSONObject jObjectRules = jObject.getJSONObject("GroupingRules"); 67 | ArrayList code = phpGenerator(jObjectRules); 68 | Iterator codeIter = code.iterator(); 69 | while (codeIter.hasNext()) { 70 | writer.write(codeIter.next()); 71 | writer.newLine(); 72 | } 73 | } catch (Exception e) { 74 | System.err.println("Caught Exception: " + e.getMessage()); 75 | } 76 | } 77 | } 78 | try { 79 | Thread.sleep(1000); 80 | } catch(InterruptedException ex) { 81 | Thread.currentThread().interrupt(); 82 | } 83 | } 84 | } 85 | 86 | private ArrayList phpGenerator (JSONObject jObjectRules) { 87 | ArrayList code = new ArrayList(); 88 | code.add(""); 98 | return code; 99 | } 100 | 101 | private void ruleParser (JSONObject jObjectRules, ArrayList code, int indent) { 102 | String indentStr = new String(new char[indent]).replace('\0', ' '); 103 | if (jObjectRules.has("Field")) { 104 | JSONArray jArrayTable = jObjectRules.getJSONArray("Table"); 105 | String field = jObjectRules.getString("Field"); 106 | for (int i = 0; i < jArrayTable.length(); i++) { 107 | JSONObject jObjectRule = jArrayTable.getJSONObject(i); 108 | code.add(indentStr + "if ($features[" + field + "] == \"" + jObjectRule.getString("Key") + "\") {"); 109 | ruleParser(jObjectRule.getJSONObject("Rule"), code, indent+2); 110 | code.add(indentStr + "}"); 111 | } 112 | } else if (jObjectRules.has("GroupName")) { 113 | code.add(indentStr + "$group_id = \"" + jObjectRules.getString("GroupName") + "\";"); 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /frontend/GroupManager/src/main/java/frontend/InfoSender.java: -------------------------------------------------------------------------------- 1 | package frontend; 2 | 3 | import java.io.*; 4 | import java.util.List; 5 | import java.util.Arrays; 6 | import java.util.Properties; 7 | import java.util.concurrent.ConcurrentHashMap; 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.apache.kafka.clients.producer.KafkaProducer; 10 | import org.apache.kafka.clients.producer.ProducerRecord; 11 | import org.json.JSONObject; 12 | 13 | /** 14 | * Retrive the info of updates from file and send them to Kafka server 15 | * 16 | * Author: Shijie Sun 17 | * Email: septimus145@gmail.com 18 | * August, 2016 19 | */ 20 | 21 | public class InfoSender implements Runnable { 22 | 23 | protected String brokerList = ""; // list of broker 24 | protected String clusterID = ""; 25 | public KafkaProducer producer = null; // kafka producer 26 | public ConcurrentHashMap group2ClusterMap = null; 27 | public int[] reducedUpdateLabelIndexes = null; 28 | 29 | public InfoSender( String brokerList, String clusterID, ConcurrentHashMap group2ClusterMap, String configFile ) { 30 | // get reduced labels index 31 | try (BufferedReader br = new BufferedReader(new FileReader(configFile))) { 32 | List updateLabelsList = Arrays.asList(br.readLine().split("\t")); 33 | String[] reducedUpdateLabels = br.readLine().split("\t"); 34 | reducedUpdateLabelIndexes = new int[reducedUpdateLabels.length]; 35 | for (int i=0; i < reducedUpdateLabels.length; i++) { 36 | reducedUpdateLabelIndexes[i] = updateLabelsList.indexOf(reducedUpdateLabels[i]); 37 | } 38 | System.out.println(Arrays.toString(this.reducedUpdateLabelIndexes)); 39 | } catch (Exception e) { 40 | System.err.println("Read config file failed: " + e.getMessage()); 41 | } 42 | 43 | this.brokerList = brokerList; 44 | this.clusterID = clusterID; 45 | this.group2ClusterMap = group2ClusterMap; 46 | // setup producer 47 | Properties producerProps = new Properties(); 48 | producerProps.put("bootstrap.servers", brokerList); 49 | producerProps.put("acks", "all"); 50 | producerProps.put("retries", 0); 51 | producerProps.put("batch.size", 16384); 52 | producerProps.put("linger.ms", 1); 53 | producerProps.put("buffer.memory", 33554432); 54 | producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 55 | producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 56 | this.producer = new KafkaProducer(producerProps); 57 | } 58 | 59 | public void run() { 60 | while(true) { 61 | try { 62 | Thread.sleep(100); 63 | } catch (InterruptedException e) { 64 | Thread.currentThread().interrupt(); 65 | } 66 | try { 67 | File file = new File("/var/www/info/info_queue"); 68 | File file2 = new File("/var/www/info/info_queue2"); 69 | file.renameTo(file2); 70 | } catch (Exception e2) { 71 | //System.err.println("Change file Exception: " + e2.getMessage()); 72 | } 73 | try (BufferedReader br = new BufferedReader(new FileReader("/var/www/info/info_queue2"))) { 74 | String line; 75 | String topic; 76 | int i = 0; 77 | // foreach record 78 | while ((line = br.readLine()) != null) { 79 | i++; 80 | // if record's group belongs to current cluster, topic is "internal_groups", otherwise topic is "external_groups" 81 | JSONObject jObject = new JSONObject(line); 82 | // for uploading 83 | ProducerRecord data = new ProducerRecord<>("upload", jObject.getString("update")); 84 | this.producer.send(data); 85 | // for processing 86 | String cluster; 87 | if (group2ClusterMap.containsKey(jObject.getString("group_id"))) 88 | cluster = group2ClusterMap.get(jObject.getString("group_id")); 89 | else // if no cluster to map to, deal this group wthin current cluster 90 | cluster = this.clusterID; 91 | if (cluster.equals(this.clusterID)) 92 | topic = "internal_groups"; 93 | else 94 | topic = "external_groups"; 95 | jObject.put("cluster_id", cluster); 96 | String[] update = jObject.getString("update").split("\t"); 97 | String[] reducedupdate = new String[this.reducedUpdateLabelIndexes.length]; 98 | for (int j=0; j < reducedUpdateLabelIndexes.length; j++) { 99 | reducedupdate[j] = update[reducedUpdateLabelIndexes[j]]; 100 | } 101 | jObject.put("update", StringUtils.join(reducedupdate, "\t")); 102 | data = new ProducerRecord<>(topic, jObject.toString()); 103 | this.producer.send(data); 104 | } 105 | System.out.printf("Send %d msgs!\n",i); 106 | } catch (Exception e3) { 107 | //System.err.println("Read file Exception: " + e3.getMessage()); 108 | } 109 | try { 110 | File file = new File("/var/www/info/info_queue2"); 111 | file.delete(); 112 | } catch (Exception e4) { 113 | //System.err.println("Deletc file Exception: " + e4.getMessage()); 114 | } 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /frontend/trace/trace_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # parse the trace and test 4 | # 5 | # Author: Shijie Sun 6 | # Email: septimus145@gmail.com 7 | # Sept, 2016 8 | 9 | import time 10 | import threading 11 | import urllib 12 | import urllib2 13 | import Queue 14 | import sys 15 | from itertools import izip 16 | 17 | 18 | TIMEOUT = 3 # timeout restriction for requests 19 | UPDATE_DELAY = 2 # delay time from receiving decision to send update 20 | 21 | 22 | URL = '' 23 | trace_start_time = 0 24 | trace_finish_time = 0 25 | update_queue = Queue.Queue() # message queue for request 26 | request_num = [] # number of requests sent and succeeded [[send1, succeeded1], ... , [send2, succeeded2]] 27 | load_dict_list = [] 28 | cost_list = [] 29 | 30 | 31 | def request_performer(*trace): 32 | global update_queue 33 | global request_num 34 | global load_dict_list 35 | global cost_list 36 | 37 | curr_second = trace[0] - trace_start_time 38 | curr_minute = curr_second / 60 39 | request_num[curr_second][0] += 1 40 | values = {'payload' : trace[1] + '\t'.join(trace[2].keys()), 'method' : 'request'} 41 | decision = '' 42 | try: 43 | con = urllib2.urlopen(URL, urllib.urlencode(values), timeout=TIMEOUT) 44 | decision = con.read().strip() 45 | except Exception as inst: 46 | print(inst) 47 | return 48 | # if decision is not in decision_list 49 | if not trace[2].has_key(decision): 50 | return 51 | request_num[curr_second][1] += 1 52 | # update the load dict 53 | if not load_dict_list[curr_minute].has_key(decision): 54 | load_dict_list[curr_minute][decision] = 1 55 | else: 56 | load_dict_list[curr_minute][decision] += 1 57 | cost_factor = 1 58 | if sum(load_dict_list[curr_minute].values()) > 0: 59 | load = load_dict_list[curr_minute][decision] / float(load_dict_list[curr_minute]['total_sessions']) 60 | for key in sorted(trace[3][decision].keys(), reverse=True): 61 | if load > key: 62 | cost_factor = trace[3][decision][key] 63 | break 64 | cost = cost_factor * float(trace[2][decision]) 65 | cost_list[curr_second] += cost 66 | update_str = trace[1] + decision + '\t' + str(cost) 67 | update_queue.put([time.time() + UPDATE_DELAY, update_str]) 68 | 69 | 70 | def update_performer(): 71 | global update_queue 72 | while True: 73 | while update_queue.empty(): 74 | time.sleep(0.05) 75 | info = update_queue.get() 76 | while time.time() < info[0]: 77 | time.sleep(0.05) 78 | try: 79 | con = urllib2.urlopen(URL, urllib.urlencode({'payload' : info[1], 'method' : 'update'}), timeout=TIMEOUT) 80 | except Exception as inst: 81 | print(inst) 82 | 83 | 84 | if __name__ == '__main__': 85 | #global URL 86 | #global trace_start_time 87 | #global trace_finish_time 88 | #global update_queue 89 | #global request_num 90 | #global load_dict_list 91 | #global cost_list 92 | 93 | if len(sys.argv) < 3: 94 | print "Usage: ", sys.argv[0], "url trace_file" 95 | sys.exit(1) 96 | URL = sys.argv[1] 97 | trace_list = [] 98 | 99 | # load the trace 100 | with open(sys.argv[2]) as fin: 101 | # seek to the beginning of the file and read all traces 102 | fin.seek(0) 103 | for trace in fin.readlines(): 104 | [feature, info] = trace.split('DecisionMap') 105 | trace_time = int(feature.split('\t',1)[0]) / 1000 106 | [decision_str, load_str] = info.strip().split('LoadMap') 107 | decision_map = dict(decision.split(',') for decision in decision_str.strip().split('\t')) 108 | load_map = dict([load.split(',')[0], load.split(',')[1].split(';')] for load in load_str.strip().split('\t')) 109 | for load in load_map: 110 | load_map[load] = dict(zip(load_map[load][0::2], load_map[load][1::2])) 111 | trace_list.append([trace_time, feature, decision_map, load_map]) 112 | 113 | # initialize 114 | trace_start_time = trace_list[0][0] 115 | trace_stop_time = trace_list[len(trace_list) - 1][0] 116 | request_num = [[0,0] for i in range(trace_stop_time - trace_start_time + 1)] 117 | load_dict_list = [{} for i in range((trace_stop_time - trace_start_time)/60 + 1)] 118 | cost_list = [0 for i in range(trace_stop_time - trace_start_time + 1)] 119 | 120 | for load_dict in load_dict_list: 121 | load_dict['total_sessions'] = 0 122 | for trace in trace_list: 123 | load_dict_list[(trace[0] - trace_start_time) / 60]['total_sessions'] += 1 124 | 125 | update_thread = threading.Thread(target=update_performer) 126 | update_thread.daemon = True 127 | update_thread.start() 128 | 129 | test_start_time = time.time() 130 | test_second = 0 131 | send_num = 0 132 | 133 | newoutput = 'result-detail.txt' 134 | foutnew = open(newoutput, 'w') 135 | foutnew.write("") 136 | foutnew.close() 137 | 138 | fout = open('result.txt','w') 139 | # start the test 140 | print "------------------------------ %3d sec" % test_second 141 | for trace in trace_list: 142 | while (time.time() - test_start_time) < (trace[0] - trace_start_time): 143 | time.sleep(0.05) 144 | if int(time.time() - test_start_time) > test_second: 145 | test_second = int(time.time() - test_start_time) 146 | print "| send %d, average cost %d" % (send_num, cost_list[test_second-1]/request_num[test_second-1][1]) 147 | send_num = 0 148 | foutnew = open(newoutput, 'a') 149 | foutnew.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n') 150 | foutnew.close() 151 | fout.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n') 152 | print "------------------------------ %3d sec" % test_second 153 | #print request_num 154 | #print load_dict_list 155 | #print cost_list 156 | thread = threading.Thread(target=request_performer, args=(trace)) 157 | thread.daemon = True 158 | thread.start() 159 | send_num += 1 160 | 161 | # wait all the requests and updates are finished 162 | time.sleep(TIMEOUT * 2) 163 | 164 | fout.close() 165 | print request_num 166 | print cost_list 167 | #with open('result.txt', 'w') as fout: 168 | # for i in range(len(cost_list)): 169 | # fout.write(str(cost_list[i] / request_num[i][1]) + '\n') 170 | 171 | -------------------------------------------------------------------------------- /frontend/trace/trace_parser_mulit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # parse the trace and test 4 | # 5 | # Author: Shijie Sun 6 | # Email: septimus145@gmail.com 7 | # Sept, 2016 8 | 9 | import time 10 | import threading 11 | import urllib 12 | import urllib2 13 | import Queue 14 | import sys 15 | import random 16 | from itertools import izip 17 | 18 | 19 | TIMEOUT = 3 # timeout restriction for requests 20 | UPDATE_DELAY = 2 # delay time from receiving decision to send update 21 | 22 | 23 | URL = [] 24 | trace_start_time = 0 25 | trace_finish_time = 0 26 | update_queue = Queue.Queue() # message queue for request 27 | request_num = [] # number of requests sent and succeeded [[send1, succeeded1], ... , [send2, succeeded2]] 28 | load_dict_list = [] 29 | cost_list = [] 30 | 31 | 32 | def request_performer(*trace): 33 | global update_queue 34 | global request_num 35 | global load_dict_list 36 | global cost_list 37 | 38 | curr_second = trace[0] - trace_start_time 39 | curr_minute = curr_second / 60 40 | request_num[curr_second][0] += 1 41 | values = {'payload' : trace[1] + '\t'.join(trace[2].keys()), 'method' : 'request'} 42 | decision = '' 43 | url_idx = trace[4] % len(URL) 44 | try: 45 | con = urllib2.urlopen(URL[url_idx], urllib.urlencode(values), timeout=TIMEOUT) 46 | decision = con.read().strip() 47 | except Exception as inst: 48 | print(inst) 49 | request_num[curr_second][1] += 1 50 | decision = trace[2].keys()[2] 51 | print "IM in trouble ---" + str(trace[2][decision]) 52 | fout1.write("%d,%s,%s\n"%(url_idx,"local",str(trace[2][decision]))) 53 | cost_list[curr_second] += float(trace[2][decision]) 54 | return 55 | # if decision is not in decision_list 56 | if not trace[2].has_key(decision): 57 | return 58 | request_num[curr_second][1] += 1 59 | # update the load dict 60 | if not load_dict_list[curr_minute].has_key(decision): 61 | load_dict_list[curr_minute][decision] = 1 62 | else: 63 | load_dict_list[curr_minute][decision] += 1 64 | cost_factor = 1 65 | #if sum(load_dict_list[curr_minute].values()) > 0: 66 | # load = load_dict_list[curr_minute][decision] / float(load_dict_list[curr_minute]['total_sessions']) 67 | # for key in sorted(trace[3][decision].keys(), reverse=True): 68 | # if load > key: 69 | # cost_factor = trace[3][decision][key] 70 | # break 71 | cost = cost_factor * float(trace[2][decision]) 72 | fout1.write("%d,%s,%s\n"%(url_idx,"online",str(trace[2][decision]))) 73 | print "IM ok ---" + str(trace[2][decision]) 74 | cost_list[curr_second] += cost 75 | update_str = trace[1] + decision + '\t' + str(cost) 76 | update_queue.put([time.time() + UPDATE_DELAY, update_str, url_idx]) 77 | 78 | 79 | def update_performer(): 80 | global update_queue 81 | while True: 82 | while update_queue.empty(): 83 | time.sleep(0.05) 84 | info = update_queue.get() 85 | while time.time() < info[0]: 86 | time.sleep(0.05) 87 | try: 88 | con = urllib2.urlopen(URL[info[2]], urllib.urlencode({'payload' : info[1], 'method' : 'update'}), timeout=TIMEOUT) 89 | except Exception as inst: 90 | print(inst) 91 | 92 | 93 | if __name__ == '__main__': 94 | #global URL 95 | #global trace_start_time 96 | #global trace_finish_time 97 | #global update_queue 98 | #global request_num 99 | #global load_dict_list 100 | #global cost_list 101 | 102 | if len(sys.argv) < 3: 103 | print "Usage: ", sys.argv[0], "url trace_file" 104 | sys.exit(1) 105 | URL = sys.argv[1].split(",") 106 | trace_list = [] 107 | 108 | # load the trace 109 | with open(sys.argv[2]) as fin: 110 | # seek to the beginning of the file and read all traces 111 | fin.seek(0) 112 | j = 0 113 | for trace in fin.readlines(): 114 | [feature, info] = trace.split('DecisionMap') 115 | trace_time = int(feature.split('\t',1)[0]) / 1000 116 | [decision_str, load_str] = info.strip().split('LoadMap') 117 | decision_map = dict(decision.split(',') for decision in decision_str.strip().split('\t')) 118 | load_map = dict([load.split(',')[0], load.split(',')[1].split(';')] for load in load_str.strip().split('\t')) 119 | for load in load_map: 120 | load_map[load] = dict(zip(load_map[load][0::2], load_map[load][1::2])) 121 | trace_list.append([trace_time, feature, decision_map, load_map, j]) 122 | j+=1 123 | 124 | # initialize 125 | trace_start_time = trace_list[0][0] 126 | trace_stop_time = trace_list[len(trace_list) - 1][0] 127 | request_num = [[0,0] for i in range(trace_stop_time - trace_start_time + 1)] 128 | load_dict_list = [{} for i in range((trace_stop_time - trace_start_time)/60 + 1)] 129 | cost_list = [0 for i in range(trace_stop_time - trace_start_time + 1)] 130 | 131 | for load_dict in load_dict_list: 132 | load_dict['total_sessions'] = 0 133 | for trace in trace_list: 134 | load_dict_list[(trace[0] - trace_start_time) / 60]['total_sessions'] += 1 135 | 136 | update_thread = threading.Thread(target=update_performer) 137 | update_thread.daemon = True 138 | update_thread.start() 139 | 140 | test_start_time = time.time() 141 | test_second = 0 142 | send_num = 0 143 | fout1 = open('separa_result','w') 144 | fout = open('result.txt','w') 145 | # start the test 146 | print "------------------------------ %3d sec" % test_second 147 | for trace in trace_list: 148 | while (time.time() - test_start_time) < (trace[0] - trace_start_time): 149 | time.sleep(0.05) 150 | if int(time.time() - test_start_time) > test_second: 151 | test_second = int(time.time() - test_start_time) 152 | print "| send %d, average cost %d" % (send_num, cost_list[test_second-1]/request_num[test_second-1][1]) 153 | send_num = 0 154 | fout.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n') 155 | print "------------------------------ %3d sec" % test_second 156 | thread = threading.Thread(target=request_performer, args=(trace)) 157 | thread.daemon = True 158 | thread.start() 159 | send_num += 1 160 | 161 | # wait all the requests and updates are finished 162 | time.sleep(TIMEOUT * 2) 163 | 164 | fout.close() 165 | fout1.close() 166 | print request_num 167 | print cost_list 168 | #with open('result.txt', 'w') as fout: 169 | # for i in range(len(cost_list)): 170 | # fout.write(str(cost_list[i] / request_num[i][1]) + '\n') 171 | 172 | -------------------------------------------------------------------------------- /frontend/DecisionMaker/src/main/java/frontend/DecisionMaker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Consume messages from one or more topics in Kafka and make decisions. 3 | * 4 | * Author: Shijie Sun 5 | * Email: septimus145@gmail.com 6 | * August, 2016 7 | */ 8 | 9 | package frontend; 10 | 11 | import java.util.*; 12 | import java.util.regex.Pattern; 13 | import java.util.concurrent.ConcurrentLinkedQueue; 14 | import java.io.*; 15 | 16 | import scala.Tuple2; 17 | 18 | import org.json.JSONObject; 19 | import org.json.JSONArray; 20 | 21 | import kafka.serializer.StringDecoder; 22 | import org.apache.kafka.clients.producer.KafkaProducer; 23 | import org.apache.kafka.clients.producer.ProducerRecord; 24 | 25 | import org.apache.spark.SparkConf; 26 | import org.apache.spark.rdd.RDD; 27 | import org.apache.spark.api.java.function.*; 28 | import org.apache.spark.api.java.JavaRDD; 29 | import org.apache.spark.api.java.JavaPairRDD; 30 | import org.apache.spark.streaming.api.java.*; 31 | import org.apache.spark.streaming.kafka.*; 32 | import org.apache.spark.streaming.Durations; 33 | 34 | // for changing logger config 35 | import org.apache.log4j.Logger; 36 | import org.apache.log4j.Level; 37 | 38 | 39 | 40 | public final class DecisionMaker { 41 | 42 | public final static int processInterval = 2; // seconds 43 | 44 | public static void main(String[] args) throws Exception { 45 | if (args.length < 3) { 46 | System.err.println("Usage: DecisionMaker \n" + 47 | " is a list of one or more Kafka brokers\n" + 48 | " is the kafka topic to consume from\n" + 49 | " is the kafka topic to publish the decision to\n"); 50 | System.exit(1); 51 | } 52 | 53 | Logger.getLogger("org").setLevel(Level.OFF); 54 | Logger.getLogger("akka").setLevel(Level.OFF); 55 | 56 | // parse the arguments 57 | final String brokers = args[0]; 58 | String topicIn = args[1]; 59 | final String topicOut = args[2]; 60 | // final double gamma = Double.parseDouble(args[3]); 61 | // final int precisionTime = Integer.parseInt(args[4]); 62 | 63 | // setup producer 64 | final Properties producerProps = new Properties(); 65 | producerProps.put("bootstrap.servers", brokers); 66 | producerProps.put("acks", "all"); 67 | producerProps.put("retries", 0); 68 | producerProps.put("batch.size", 16384); 69 | producerProps.put("linger.ms", 1); 70 | producerProps.put("buffer.memory", 33554432); 71 | producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 72 | producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 73 | 74 | // Create context with `processInterval` batch interval 75 | SparkConf sparkConf = new SparkConf().setAppName("DicisionMaker"); 76 | final JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(processInterval)); 77 | 78 | // Create direct kafka stream with brokers and topic 79 | Set topicSet = new HashSet<>(Arrays.asList(topicIn)); 80 | Map kafkaParams = new HashMap<>(); 81 | kafkaParams.put("metadata.broker.list", brokers); 82 | JavaPairInputDStream messages = KafkaUtils.createDirectStream( 83 | jssc, 84 | String.class, 85 | String.class, 86 | StringDecoder.class, 87 | StringDecoder.class, 88 | kafkaParams, 89 | topicSet 90 | ); 91 | 92 | // create a class to restore the history data 93 | final HistoryData historyData = new HistoryData(jssc); 94 | 95 | // map to pair to retrieve the data and group_id 96 | // then reduce by key to combine the performance of each batch 97 | JavaPairDStream>> batchQualitySums = messages.mapToPair( 98 | // extract info of each update 99 | new PairFunction, String, List>() { 100 | @Override 101 | public Tuple2> call(Tuple2 tuple2) { 102 | JSONObject jObject = new JSONObject(tuple2._2().trim()); 103 | String group_id = jObject.getString("group_id"); 104 | String[] updates = jObject.getString("update").split("\t"); 105 | String decision = updates[0]; 106 | // reverse the score because higher score should represent better performance 107 | double score = 0 - Double.parseDouble(updates[1]); 108 | List scores = new ArrayList(); 109 | scores.add(score); 110 | return new Tuple2<>(group_id + ":" + decision, scores); 111 | } 112 | // count quantity and total score for (group_id:decision) 113 | }).reduceByKey( 114 | new Function2, List, List>() { 115 | @Override 116 | public List call(List m1, List m2) { 117 | m1.addAll(m2); 118 | return m1; 119 | } 120 | // split the group_id and decision 121 | }).mapToPair( 122 | new PairFunction>, String, Map>>() { 123 | @Override 124 | public Tuple2>> 125 | call(Tuple2> tuple2) { 126 | String group_id = tuple2._1().split(":")[0]; 127 | String decision = tuple2._1().split(":")[1]; 128 | Map> info = new HashMap>(); 129 | info.put(decision, tuple2._2()); 130 | return new Tuple2<>(group_id, info); 131 | } 132 | }); 133 | 134 | // reduce the batchQualitySums 135 | JavaPairDStream>> qualitySums; 136 | Function2>, Map>, 137 | Map>> qualitySumsReduceFunction2 = new Function2< 138 | Map>, Map>, Map>>() { 139 | @Override 140 | public Map> call(Map> m1, 141 | Map> m2) { 142 | // Because has reduced once, so here just merge maps by union 143 | for (Map.Entry> m1Entry : m1.entrySet()) { 144 | m2.put(m1Entry.getKey(), m1Entry.getValue()); 145 | } 146 | return m2; 147 | } 148 | }; 149 | if (historyData.windowSize > processInterval) { 150 | qualitySums = batchQualitySums.reduceByKeyAndWindow(qualitySumsReduceFunction2, 151 | Durations.seconds(historyData.windowSize), Durations.seconds(processInterval)); 152 | } else { 153 | qualitySums = batchQualitySums.reduceByKey(qualitySumsReduceFunction2); 154 | } 155 | 156 | // combine the old data with new data and send the decision to kafka 157 | qualitySums.foreachRDD(new VoidFunction>>>() { 158 | // foreachRDD will get RDD of each batch of dstream 159 | @Override 160 | public void call(JavaPairRDD>> groups) throws Exception { 161 | //System.out.println(groups.cogroup(historyPairDResult).collect()); 162 | 163 | // combine old data with new data: cogroup then map 164 | JavaPairRDD>> combinedData = groups.cogroup( 165 | historyData.pairDData).mapToPair( 166 | new PairFunction>>, Iterable>> 168 | >>, String, Map>>() { 169 | @Override 170 | public Tuple2>> call(Tuple2>>, Iterable>> 172 | >> tuple2) { 173 | return historyData.combineCall(tuple2); 174 | } 175 | }); 176 | historyData.updateData(combinedData); 177 | 178 | // to show the combined result clearly 179 | //System.out.println(combinedResult.collect()); 180 | List>>> collectedData = combinedData.collect(); 181 | Tuple2>> tmpTuple2 = null; 182 | Map> tmpMap = null; 183 | for (int i = 0; i < collectedData.size(); i++) { 184 | tmpTuple2 = collectedData.get(i); 185 | System.out.println(tmpTuple2._1() + "----"); 186 | tmpMap = tmpTuple2._2(); 187 | for (Map.Entry> entry : tmpMap.entrySet()) { 188 | System.out.printf("\t%s : (%f, %f) : %f\n", entry.getKey(), entry.getValue().get(0), 189 | entry.getValue().get(1), entry.getValue().get(0)/entry.getValue().get(1)); 190 | } 191 | } 192 | 193 | combinedData.foreachPartition( 194 | new VoidFunction>>>> () { 195 | @Override 196 | public void call(Iterator>>> group_iter) 197 | throws Exception { 198 | KafkaProducer kproducer = new KafkaProducer(producerProps); 199 | Tuple2>> group = null; 200 | while (group_iter.hasNext()) { 201 | group = group_iter.next(); 202 | Map> duplicateMap = new HashMap>(); 203 | for (Map.Entry> entry : group._2().entrySet()) { 204 | List tmpList = new ArrayList(); 205 | for (Double score : entry.getValue()) 206 | tmpList.add(score.doubleValue()); 207 | duplicateMap.put(entry.getKey(), tmpList); 208 | } 209 | String decisions = historyData.getDecision(duplicateMap); 210 | ProducerRecord data = new ProducerRecord<>(topicOut, 211 | group._1() + ";" + decisions + ";From: " + brokers); 212 | kproducer.send(data); 213 | } 214 | }}); 215 | }}); 216 | 217 | // Start the computation 218 | jssc.start(); 219 | jssc.awaitTermination(); 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /frontend/Communicator/src/main/java/frontend/Communicator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Upload updates to backend and communicate with other cluster 3 | * 4 | * 5 | * Author: Shijie Sun 6 | * Email: septimus145@gmail.com 7 | * August, 2016 8 | */ 9 | 10 | package frontend; 11 | 12 | import java.util.*; 13 | import java.util.regex.Pattern; 14 | import java.util.concurrent.ConcurrentLinkedQueue; 15 | import java.util.concurrent.ConcurrentHashMap; 16 | import java.io.*; 17 | 18 | import scala.Tuple2; 19 | 20 | import org.json.JSONObject; 21 | import org.json.JSONArray; 22 | 23 | import kafka.serializer.StringDecoder; 24 | import org.apache.kafka.clients.producer.KafkaProducer; 25 | import org.apache.kafka.clients.producer.ProducerRecord; 26 | 27 | import org.apache.spark.SparkConf; 28 | import org.apache.spark.rdd.RDD; 29 | import org.apache.spark.api.java.function.*; 30 | import org.apache.spark.api.java.JavaRDD; 31 | import org.apache.spark.api.java.JavaPairRDD; 32 | import org.apache.spark.streaming.api.java.*; 33 | import org.apache.spark.streaming.kafka.*; 34 | import org.apache.spark.streaming.Durations; 35 | 36 | // for changing logger config 37 | import org.apache.log4j.Logger; 38 | import org.apache.log4j.Level; 39 | 40 | 41 | 42 | public final class Communicator { 43 | 44 | public final static int processInterval = 1; // seconds 45 | 46 | public static void main(String[] args) throws Exception { 47 | if (args.length < 1) { 48 | System.err.println("Usage: Communicator config_file"); 49 | System.exit(1); 50 | } 51 | 52 | Logger.getLogger("org").setLevel(Level.OFF); 53 | Logger.getLogger("akka").setLevel(Level.OFF); 54 | 55 | // basic configuration 56 | final Properties config = new Properties(); 57 | InputStream iStream = null; 58 | try { 59 | iStream = new FileInputStream(args[0]); 60 | config.load(iStream); 61 | } catch (IOException ex) { 62 | ex.printStackTrace(); 63 | } finally { 64 | if (iStream != null) { 65 | try { 66 | iStream.close(); 67 | } catch (IOException e) { 68 | e.printStackTrace(); 69 | } 70 | } 71 | } 72 | final String currentClusterID = config.getProperty("clusterID"); 73 | final String updateTopic = config.getProperty("updateTopic"); 74 | final String uploadTopic = config.getProperty("uploadTopic"); 75 | final String decisionTopic = config.getProperty("decisionTopic"); 76 | final String subscribeTopic = config.getProperty("subscribeTopic"); 77 | final String forwardTopic = config.getProperty("forwardTopic"); 78 | final String sampleTopic = config.getProperty("sampleTopic"); 79 | final String aliveTopic = config.getProperty("aliveTopic"); 80 | final int managementLabelsNum = Integer.valueOf(config.getProperty("managementLabelsNum")); 81 | 82 | // setup producer basic config 83 | final Properties producerProps = new Properties(); 84 | producerProps.put("acks", "all"); 85 | producerProps.put("retries", 0); 86 | producerProps.put("batch.size", 16384); 87 | producerProps.put("linger.ms", 1); 88 | producerProps.put("buffer.memory", 33554432); 89 | producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 90 | producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 91 | 92 | // create streaming context 93 | SparkConf sparkConf = new SparkConf().setAppName("Communicator"); 94 | final JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(processInterval)); 95 | 96 | // create a rdd to store the group-subscribers map 97 | List>> groupSubscriberList = new ArrayList<>(); 98 | JavaRDD>> groupSubscriberRDD = jssc.sparkContext().parallelize(groupSubscriberList); 99 | JavaPairRDD> gsRDD = JavaPairRDD.fromJavaRDD(groupSubscriberRDD); 100 | // using queue to make the map updatable 101 | final ConcurrentLinkedQueue>> gsQueue = new ConcurrentLinkedQueue<>(); 102 | gsQueue.add(gsRDD); 103 | 104 | Map kafkaParams = new HashMap<>(); 105 | kafkaParams.put("metadata.broker.list", config.getProperty(currentClusterID)); 106 | // Create stream of upload topic 107 | Set uploadTopicSet = new HashSet<>(Arrays.asList(uploadTopic)); 108 | JavaPairInputDStream uploadMsgs = KafkaUtils.createDirectStream( 109 | jssc, 110 | String.class, 111 | String.class, 112 | StringDecoder.class, 113 | StringDecoder.class, 114 | kafkaParams, 115 | uploadTopicSet 116 | ); 117 | // Create stream of decision topic 118 | Set decisionTopicSet = new HashSet<>(Arrays.asList(decisionTopic)); 119 | JavaPairInputDStream decisionMsgs = KafkaUtils.createDirectStream( 120 | jssc, 121 | String.class, 122 | String.class, 123 | StringDecoder.class, 124 | StringDecoder.class, 125 | kafkaParams, 126 | decisionTopicSet 127 | ); 128 | // Create stream of subscribe topic 129 | Set subscribeTopicSet = new HashSet<>(Arrays.asList(subscribeTopic)); 130 | JavaPairInputDStream subscribeMsgs = KafkaUtils.createDirectStream( 131 | jssc, 132 | String.class, 133 | String.class, 134 | StringDecoder.class, 135 | StringDecoder.class, 136 | kafkaParams, 137 | subscribeTopicSet 138 | ); 139 | // Create stream of forward topic 140 | Set forwardTopicSet = new HashSet<>(Arrays.asList(forwardTopic)); 141 | JavaPairInputDStream forwardMsgs = KafkaUtils.createDirectStream( 142 | jssc, 143 | String.class, 144 | String.class, 145 | StringDecoder.class, 146 | StringDecoder.class, 147 | kafkaParams, 148 | forwardTopicSet 149 | ); 150 | 151 | // upload all the updates 152 | uploadMsgs.foreachRDD(new VoidFunction>() { 153 | // foreachRDD will get RDD of each batch of dstream 154 | @Override 155 | public void call(JavaPairRDD uploadMsgsRDD) throws Exception { 156 | uploadMsgsRDD.sample(false, 0.01).flatMapToPair(new PairFlatMapFunction, String, Integer>() { 157 | @Override 158 | public Iterable> call(Tuple2 tuple2) { 159 | List> result = new ArrayList<>(); 160 | String[] features = tuple2._2().split("\t"); 161 | for (int i = 0; i < (features.length - managementLabelsNum); i++) { 162 | result.add(new Tuple2<>(String.valueOf(i) + ";" + features[i], 1)); 163 | } 164 | return result; 165 | } 166 | }).reduceByKey(new Function2() { 167 | @Override 168 | public Integer call(Integer i1, Integer i2){ 169 | return i1+i2; 170 | } 171 | }).foreachPartition(new VoidFunction>> () { 172 | @Override 173 | public void call(Iterator> samples_iter) throws Exception { 174 | producerProps.put("bootstrap.servers", config.getProperty("backendBrokers")); 175 | KafkaProducer kproducer = new KafkaProducer(producerProps); 176 | ProducerRecord data = null; 177 | Tuple2 sample = null; 178 | while (samples_iter.hasNext()) { 179 | sample = samples_iter.next(); 180 | data = new ProducerRecord<>(sampleTopic, sample._1() + ";" + String.valueOf(sample._2())); 181 | kproducer.send(data); 182 | } 183 | } 184 | }); 185 | uploadMsgsRDD.foreachPartition(new VoidFunction>> () { 186 | @Override 187 | public void call(Iterator> updates_iter) throws Exception { 188 | producerProps.put("bootstrap.servers", config.getProperty("backendBrokers")); 189 | KafkaProducer kproducer = new KafkaProducer(producerProps); 190 | ProducerRecord data = null; 191 | Tuple2 update = null; 192 | while (updates_iter.hasNext()) { 193 | update = updates_iter.next(); 194 | data = new ProducerRecord<>(uploadTopic, update._2()); 195 | kproducer.send(data); 196 | } 197 | data = new ProducerRecord<>(aliveTopic, currentClusterID); 198 | kproducer.send(data); 199 | } 200 | }); 201 | } 202 | }); 203 | 204 | // push all the decision to subscribers 205 | decisionMsgs.foreachRDD(new VoidFunction>() { 206 | // foreachRDD will get RDD of each batch of dstream 207 | @Override 208 | public void call(JavaPairRDD decisionMsgsRDD) throws Exception { 209 | // get the group-subscriber map 210 | // collectAsMap has a bug when used by rdd which is not from jssc.parallel directly 211 | // so here need to use collect and convert it to map manually 212 | List>> gsList = gsQueue.peek().collect(); 213 | final Map> groupSubscriber = new HashMap<>(); 214 | for (Tuple2> gs : gsList) { 215 | groupSubscriber.put(gs._1(), gs._2()); 216 | } 217 | // push the decisions 218 | decisionMsgsRDD.foreachPartition(new VoidFunction>> () { 219 | @Override 220 | public void call(Iterator> decisions_iter) throws Exception { 221 | ConcurrentHashMap> producerMap = new ConcurrentHashMap<>(); 222 | Tuple2 decision = null; 223 | KafkaProducer kproducer = null; 224 | ProducerRecord data = null; 225 | String groupID = null; 226 | String clusterID = null; 227 | while (decisions_iter.hasNext()) { 228 | decision = decisions_iter.next(); 229 | groupID = decision._2().split(";")[0]; 230 | // foreach subscriber 231 | if (groupSubscriber.containsKey(groupID)) { 232 | Iterator it = groupSubscriber.get(groupID).iterator(); 233 | while (it.hasNext()) { 234 | clusterID = it.next(); 235 | // if it is not current cluster 236 | if (clusterID.equals(currentClusterID)) 237 | continue; 238 | // if do not have producer for this cluster, create one 239 | if (! producerMap.containsKey(clusterID)) { 240 | producerProps.put("bootstrap.servers", config.getProperty(clusterID)); 241 | kproducer = new KafkaProducer(producerProps); 242 | producerMap.put(clusterID, kproducer); 243 | } 244 | // push the decision 245 | data = new ProducerRecord<>(decisionTopic, decision._2()); 246 | producerMap.get(clusterID).send(data); 247 | } 248 | } 249 | } 250 | } 251 | }); 252 | } 253 | }); 254 | 255 | // update the subscriber of groups 256 | subscribeMsgs.mapToPair(new PairFunction, String, String>() { 257 | @Override 258 | public Tuple2 call(Tuple2 tuple2) { 259 | return new Tuple2<>(tuple2._2().split(";")[0], tuple2._2().split(";")[1]); 260 | } 261 | }).foreachRDD(new VoidFunction>() { 262 | // foreachRDD will get RDD of each batch of dstream 263 | @Override 264 | public void call(JavaPairRDD subscribeMsgsRDD) throws Exception { 265 | JavaPairRDD> newgsRDD = subscribeMsgsRDD.cogroup(gsQueue.peek()).mapToPair( 266 | new PairFunction, Iterable>>>, String, HashSet>() { 267 | @Override 268 | public Tuple2> call(Tuple2, Iterable>>> tuple2) { 269 | Iterator iter1 = tuple2._2()._1().iterator(); 270 | Iterator> iter2 = tuple2._2()._2().iterator(); 271 | HashSet subscribers = null; 272 | String subscribe = null; 273 | if (iter2.hasNext()) 274 | subscribers = iter2.next(); 275 | else 276 | subscribers = new HashSet<>(); 277 | while (iter1.hasNext()) { 278 | subscribe = iter1.next(); 279 | if (! subscribers.contains(subscribe)) 280 | subscribers.add(subscribe); 281 | } 282 | return new Tuple2(tuple2._1(), subscribers); 283 | } 284 | } 285 | ); 286 | //System.out.println(newgsRDD.collect()); 287 | 288 | // use peek above and first add then poll here 289 | // this is to make sure there is at least one rdd in the queue 290 | // so other stream can always get the rdd through queue.peek() 291 | gsQueue.add(newgsRDD); 292 | gsQueue.poll(); 293 | } 294 | }); 295 | 296 | 297 | // forward the updates of external groups 298 | forwardMsgs.foreachRDD(new VoidFunction>() { 299 | // foreachRDD will get RDD of each batch of dstream 300 | @Override 301 | public void call(JavaPairRDD forwardMsgsRDD) throws Exception { 302 | forwardMsgsRDD.foreachPartition(new VoidFunction>> () { 303 | @Override 304 | public void call(Iterator> forwards_iter) throws Exception { 305 | ConcurrentHashMap> producerMap = new ConcurrentHashMap<>(); 306 | Tuple2 forward = null; 307 | KafkaProducer kproducer = null; 308 | ProducerRecord data = null; 309 | String groupID = null; 310 | String clusterID = null; 311 | // to store all the group_ids and their cluster_ids 312 | Map groupSubs = new HashMap(); 313 | while (forwards_iter.hasNext()) { 314 | forward = forwards_iter.next(); 315 | JSONObject jObject = new JSONObject(forward._2()); 316 | clusterID = jObject.getString("cluster_id"); 317 | groupID = jObject.getString("group_id"); 318 | // foreach subscriber 319 | if (! groupSubs.containsKey(groupID)) 320 | groupSubs.put(groupID, clusterID); 321 | if (! producerMap.containsKey(clusterID)) { 322 | producerProps.put("bootstrap.servers", config.getProperty(clusterID)); 323 | kproducer = new KafkaProducer(producerProps); 324 | producerMap.put(clusterID, kproducer); 325 | } 326 | data = new ProducerRecord<>(updateTopic, forward._2()); 327 | producerMap.get(clusterID).send(data); 328 | } 329 | // subscribe all the sent groups 330 | for (Map.Entry gsEntry : groupSubs.entrySet()) { 331 | data = new ProducerRecord<>(subscribeTopic, gsEntry.getKey() + ";" + currentClusterID); 332 | producerMap.get(gsEntry.getValue()).send(data); 333 | } 334 | } 335 | }); 336 | } 337 | }); 338 | 339 | // Start the computation 340 | jssc.start(); 341 | jssc.awaitTermination(); 342 | } 343 | } 344 | --------------------------------------------------------------------------------