├── bin
    ├── zookeeper.properties
    ├── config.properties
    ├── frontserver_deploy.sh
    ├── spark_deploy.sh
    └── kafka_deploy.sh
├── frontend
    ├── GroupManager
    │   ├── .DS_Store
    │   ├── conf
    │   │   └── gmConfig
    │   ├── src
    │   │   └── main
    │   │   │   └── java
    │   │   │       └── frontend
    │   │   │           ├── DecisionCollector.java
    │   │   │           ├── DecisionCollector_EG.java
    │   │   │           ├── GroupManager.java
    │   │   │           ├── GroupTableUpdater.java
    │   │   │           └── InfoSender.java
    │   └── pom.xml
    ├── trace
    │   ├── tracesort.py
    │   ├── fault_tolerance
    │   │   ├── ft.conf
    │   │   └── sort.py
    │   ├── trace_parser.py
    │   └── trace_parser_mulit.py
    ├── webphp
    │   ├── player.php
    │   ├── update_EG.php
    │   ├── update.php
    │   └── player_EG.php
    ├── DecisionMaker
    │   ├── src
    │   │   └── main
    │   │   │   └── java
    │   │   │       └── frontend
    │   │   │           ├── HistoryObject.java
    │   │   │           ├── HistoryData.java
    │   │   │           └── DecisionMaker.java
    │   ├── algorithms
    │   │   ├── HistoryData_EG.java
    │   │   └── HistoryData_DUCB.java
    │   └── pom.xml
    └── Communicator
    │   ├── pom.xml
    │   └── src
    │       └── main
    │           └── java
    │               └── frontend
    │                   └── Communicator.java
├── README.md
├── conf
    └── frontends
    │   ├── '
    │   └── frontend-1
└── sbin
    ├── setup-all.sh
    ├── run_kafka.sh
    ├── run_zookeeper.sh
    ├── start-service-all.sh
    ├── run_uploadtrace_EG.sh
    ├── run_uploadtrace.sh
    ├── run_groupmanager.sh
    ├── run_decisionmaker.sh
    ├── install.sh
    └── run_communicator.sh


/bin/zookeeper.properties:
--------------------------------------------------------------------------------
1 | # add some configurations
2 | tickTime=2000
3 | initLimit=5
4 | syncLimit=2
5 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nsdi2017-ddn/pytheas/HEAD/frontend/GroupManager/.DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pytheas
2 | 
3 | This is a new version of pytheas.
4 | For this version, we want to standardize the project and make it easy to deploy and use.
5 | 


--------------------------------------------------------------------------------
/conf/frontends/':
--------------------------------------------------------------------------------
1 | # This is an example
2 | ms0626.utah.cloudlab.us
3 | #c220g1-030821.wisc.cloudlab.us
4 | #c220g1-030830.wisc.cloudlab.us
5 | #c220g1-030827.wisc.cloudlab.us
6 | 


--------------------------------------------------------------------------------
/conf/frontends/frontend-1:
--------------------------------------------------------------------------------
1 | # This is an example
2 | ms0626.utah.cloudlab.us
3 | #c220g1-030821.wisc.cloudlab.us
4 | #c220g1-030830.wisc.cloudlab.us
5 | #c220g1-030827.wisc.cloudlab.us
6 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/conf/gmConfig:
--------------------------------------------------------------------------------
1 | Time	BufRate	AvgBitrate	JoinTime	Asn	City	Country	ConnType	State	Os	Liveorvod	ObjectId	PlayerType	InitBitrate	InitCdn	decision	socre
2 | decision	socre
3 | 


--------------------------------------------------------------------------------
/frontend/trace/tracesort.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | 
 5 | fin = open(sys.argv[1])
 6 | records = fin.readlines()
 7 | fin.close()
 8 | records = sorted(records, key=lambda record:record.split("\t", 1)[0])
 9 | fout = open(sys.argv[2], "w")
10 | fout.writelines(records)
11 | fout.close()
12 | 
13 | 


--------------------------------------------------------------------------------
/frontend/trace/fault_tolerance/ft.conf:
--------------------------------------------------------------------------------
 1 | set terminal png truecolor size 600,400 font 'Helvetica,16'
 2 | set autoscale
 3 | set xrange [0:]
 4 | set yrange [0:]
 5 | set key right nobox
 6 | set style data lines
 7 | set output "ft.png"
 8 | set ylabel "Average Cost" font ",18" offset 1,0,0
 9 | set xlabel "Time (sec)" font ",18"
10 | plot "r1" using 1 title "frontend1", \
11 |     "r1" using 2 title "frontend2"
12 | 


--------------------------------------------------------------------------------
/bin/config.properties:
--------------------------------------------------------------------------------
 1 | #config file of communicator
 2 | 
 3 | managementLabelsNum=2
 4 | 
 5 | updateTopic=internal_groups
 6 | uploadTopic=upload
 7 | decisionTopic=decision
 8 | subscribeTopic=subscribe
 9 | forwardTopic=external_groups
10 | sampleTopic=sample
11 | aliveTopic=alive
12 | 
13 | clusterID=frontend1
14 | 
15 | backendBrokers=10.11.10.2:9092
16 | 
17 | frontend1=10.11.10.3:9092
18 | frontend2=10.11.10.4:9092
19 | 


--------------------------------------------------------------------------------
/frontend/trace/fault_tolerance/sort.py:
--------------------------------------------------------------------------------
 1 | #!/usr/local/bin/python
 2 | import sys
 3 | 
 4 | fin = open(sys.argv[1])
 5 | list1 = []
 6 | list11 = []
 7 | list2 = []
 8 | list22 = []
 9 | RPS = 100
10 | for line in fin:
11 |     record = line.split(",")
12 |     if record[0] == '0':
13 |         list1.append(record[2].strip())
14 |     else:
15 |         list2.append(record[2].strip())
16 | i = 0
17 | s = 0
18 | for record in list1:
19 |     i+=1
20 |     s+=float(record)
21 |     if i == RPS:
22 |         i = 0
23 |         list11.append(s/RPS)
24 |         s = 0
25 | i = 0
26 | s = 0
27 | for record in list2:
28 |     i+=1
29 |     s+=float(record)
30 |     if i == RPS:
31 |         i = 0
32 |         list22.append(s/RPS)
33 |         s = 0
34 | fout = open(sys.argv[2], 'w')
35 | for i in range(min(len(list11), len(list22))):
36 |     fout.write("%f\t%f\n"%(list11[i], list22[i]))
37 | print abs(len(list11) - len(list22))
38 | 
39 | 


--------------------------------------------------------------------------------
/sbin/setup-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | [ -z $PYTHEAS_HOME ] && export PYTHEAS_HOME=$(cd `dirname $0`/..; pwd)
 4 | 
 5 | PYTHEAS_CONF_DIR=${PYTHEAS_HOME}/conf
 6 | PYTHEAS_BIN_DIR=${PYTHEAS_HOME}/bin
 7 | PYTHEAS_LOG_DIR=${PYTHEAS_HOME}/log
 8 | debug=${DEBUG:-1}
 9 | 
10 | mkdir -p $PYTHEAS_LOG_DIR
11 | mkdir -p ${PYTHEAS_LOG_DIR}/setup
12 | rm -rf ${PYTHEAS_LOG_DIR}/setup/*
13 | 
14 | for frontend in `ls ${PYTHEAS_CONF_DIR}/frontends/frontend-*`; do
15 |     echo "**************************************************************************"
16 |     echo "*       Setting up ${frontend##*/}"
17 |     echo "**************************************************************************"
18 |     machinelist=$(cat $frontend | sed "s/#.*$//;/^$/d")
19 |     for host in $machinelist
20 |     do
21 |         log_file=${PYTHEAS_LOG_DIR}/setup/${frontend##*/}
22 |         echo -e "Setting up $host.\nSee $log_file for log"
23 |         if [[ ${debug} -ne 0 ]]; then
24 |             bash ${PYTHEAS_HOME}/sbin/install.sh $host ${frontend##*/} $PYTHEAS_BIN_DIR | tee -a $log_file
25 |         else
26 |             bash ${PYTHEAS_HOME}/sbin/install.sh $host ${frontend##*/} $PYTHEAS_BIN_DIR >> $log_file
27 |         fi
28 |     done
29 | done
30 | 


--------------------------------------------------------------------------------
/sbin/run_kafka.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | 
 4 | 
 5 | SSH="junchenj@"$1
 6 | PRE=-o\ "StrictHostKeyChecking=no"
 7 | 
 8 | FRONT_SERVER="front_server"
 9 | SPARK="spark"
10 | KAFKA="kafka"
11 | TRACE="trace"
12 | 
13 | REMOTE_USER_ROOT="/users/junchenj"
14 | REMOTE_SYS_ROOT="/usr/share"
15 | REMOTE_LOG_ROOT="/users/junchenj/log"
16 | LOG_FILE=$REMOTE_LOG_ROOT"/log_kafka"
17 | 
18 | if [ "$#" -ne 1 ]; then
19 | echo "Error: need exactly one argument"
20 | echo "Format: sh run_kafka.sh host"
21 | exit
22 | fi
23 | 
24 | echo ""
25 | echo ""
26 | echo ""
27 | echo "**************************************************************************"
28 | echo "*       Running kafka on "$1
29 | echo "**************************************************************************"
30 | 
31 | echo log file $LOG_FILE
32 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/kafka-server-stop.sh'
33 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$KAFKA && sudo bin/kafka-server-start.sh config/server.properties > $LOG_FILE 2>&1 &'"
34 | 
35 | echo "**************************************************************************"
36 | echo "*       Done kafka on "$1
37 | echo "**************************************************************************"
38 | echo ""
39 | echo ""
40 | echo ""
41 | 
42 | 


--------------------------------------------------------------------------------
/sbin/run_zookeeper.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | 
 4 | 
 5 | SSH="junchenj@"$1
 6 | PRE=-o\ "StrictHostKeyChecking=no"
 7 | 
 8 | FRONT_SERVER="front_server"
 9 | SPARK="spark"
10 | KAFKA="kafka"
11 | TRACE="trace"
12 | 
13 | REMOTE_USER_ROOT="/users/junchenj"
14 | REMOTE_SYS_ROOT="/usr/share"
15 | REMOTE_LOG_ROOT="/users/junchenj/log"
16 | LOG_FILE=$REMOTE_LOG_ROOT"/log_zookeeper"
17 | 
18 | if [ "$#" -ne 1 ]; then
19 | echo "Error: need exactly one argument"
20 | echo "Format: sh run_zookeeper.sh host"
21 | exit
22 | fi
23 | 
24 | echo ""
25 | echo ""
26 | echo ""
27 | echo "**************************************************************************"
28 | echo "*       Running zookeeper on "$1
29 | echo "**************************************************************************"
30 | 
31 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/zookeeper-server-stop.sh'
32 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$KAFKA && sudo nohup bin/zookeeper-server-start.sh config/zookeeper.properties > $LOG_FILE 2>&1 &'"
33 | 
34 | echo "**************************************************************************"
35 | echo "*       Done zookeeper on "$1
36 | echo "**************************************************************************"
37 | echo ""
38 | echo ""
39 | echo ""
40 | 
41 | 


--------------------------------------------------------------------------------
/bin/frontserver_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Auto install httpd and configure the environment
 4 | #
 5 | # Author: Shijie Sun
 6 | # Email: septimus145@gmail.com
 7 | # August, 2016
 8 | 
 9 | if [[ $UID != 0  ]]; then
10 |     echo "Please run this script with sudo:"
11 |     echo "sudo $0 $*"
12 |     exit 1
13 | fi
14 | 
15 | # Install editor
16 | sudo apt-get update
17 | which vim >&/dev/null || sudo apt-get install -y vim
18 | which tmux >&/dev/null || sudo apt-get install -y tmux
19 | 
20 | # Install jdk and maven
21 | which javac >&/dev/null || sudo apt-get install -y default-jdk
22 | which mvn >&/dev/null || sudo apt-get install -y maven
23 | if [ -z $JAVA_HOME ]; then
24 |   JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ')
25 |   echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment
26 |   export JAVA_HOME=$JAVA_HOME
27 | fi
28 | 
29 | # Install httpd and php5
30 | sudo apt-get install -y apache2 php libapache2-mod-php
31 | 
32 | # Configure the httpd
33 | #sudo cp update.php /var/www/html
34 | #sudo cp player.php /var/www/html
35 | #sudo cp player_EG.php /var/www/html
36 | sudo mkdir /var/www/info
37 | sudo chmod 777 /var/www/info
38 | sudo sed -i -e "s/\(KeepAlive \).*/\1"Off"/" \
39 |     /etc/apache2/apache2.conf
40 | sudo service apache2 reload
41 | 
42 | echo Success
43 | exit 0
44 | 


--------------------------------------------------------------------------------
/frontend/webphp/player.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | // Response to the request and forward the update
 3 | //
 4 | // Author: Shijie Sun
 5 | // Email: septimus145@gmail.com
 6 | // August, 2016
 7 | 
 8 | header('Access-Control-Allow-Origin: *');
 9 | $path = '/var/www/info';
10 | set_include_path(get_include_path() . PATH_SEPARATOR . $path);
11 | $features = explode("\t", $_POST["payload"]);
12 | 
13 | $group_id = $features[4]; // take asn as group
14 | include 'match.php';
15 | 
16 | // request
17 | if ($_POST['method'] == 'request') {
18 |     $decision_list = array_slice($features, 15);
19 |     $decisions_raw = file_get_contents($path . '/d_' . $group_id);
20 |     $decisions_array = explode(":", trim($decisions_raw, ": \t\n\r\0\x0B"));
21 |     $decision = $decisions_array[rand(0,count($decisions_array)-1)];
22 |     if (empty($decision) || !in_array($decision, $decision_list)) {
23 |         $decision = $decision_list[array_rand($decision_list, 1)];
24 |     }
25 |     echo $decision;
26 | }
27 | 
28 | // update
29 | if ($_POST['method'] == 'update') {
30 |     // Encode the info with json and write it into file
31 |     $info = array(
32 |         "update" => $_POST["payload"],
33 |         "group_id" => $group_id
34 |     );
35 |     $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL;
36 |     file_put_contents($path . '/info_queue',$in,FILE_APPEND|LOCK_EX);
37 | }
38 | 
39 | ?>
40 | 


--------------------------------------------------------------------------------
/bin/spark_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Auto install Spark Streaming and configure the environment
 4 | #
 5 | # Author: Shijie Sun
 6 | # Email: septimus145@gmail.com
 7 | # July, 2016
 8 | 
 9 | if [[ $UID != 0  ]]; then
10 |     echo "Please run this script with sudo:"
11 |     echo "sudo $0 $*"
12 |     exit 1
13 | fi
14 | 
15 | # Install editor
16 | sudo apt-get update
17 | which vim >&/dev/null || sudo apt-get install -y vim
18 | which tmux >&/dev/null || sudo apt-get install -y tmux
19 | 
20 | # Install jdk and maven
21 | which javac >&/dev/null || sudo apt-get install -y default-jdk
22 | which mvn >&/dev/null || sudo apt-get install -y maven
23 | if [ -z $JAVA_HOME ]; then
24 |   JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ')
25 |   echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment
26 |   export JAVA_HOME=$JAVA_HOME
27 | fi
28 | 
29 | # Download the spark
30 | spark_path="/usr/share/spark/"
31 | wget http://www-eu.apache.org/dist/spark/spark-1.6.2/spark-1.6.2-bin-hadoop2.6.tgz
32 | sudo tar -xvzf spark-1.6.2-bin-hadoop2.6.tgz -C /usr/share
33 | sudo mv /usr/share/spark-1.6.2-bin-hadoop2.6 /usr/share/spark
34 | rm spark-1.6.2-bin-hadoop2.6.tgz
35 | echo "spark.io.compression.codec    lzf" | sudo tee --append /usr/share/spark/conf/spark-defaults.conf
36 | 
37 | #sudo mkdir -p /var/spark_tmp
38 | #sudo cp ./entry.dat /var/spark_tmp/
39 | echo Success
40 | exit 0
41 | 


--------------------------------------------------------------------------------
/frontend/webphp/update_EG.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | // Match the request and write its info and group id into file
 3 | //
 4 | // Author: Shijie Sun
 5 | // Email: septimus145@gmail.com
 6 | // August, 2016
 7 | 
 8 | $path = '/var/www/info';
 9 | set_include_path(get_include_path() . PATH_SEPARATOR . $path);
10 | $features = explode("\t", $_POST["payload"]);
11 | //print_r($features);
12 | 
13 | include 'match.php';
14 | 
15 | // if no match
16 | if (empty($group_id)) {
17 |     $group_id = "null";
18 |     // get default decision
19 |     $out = file_get_contents('/var/www/info/d_'.$group_id);
20 | } else {
21 |     $decisions = file('/var/www/info/d_'.$group_id);
22 |     if (count($decisions) == 2) {
23 |         $out = $decisions[1];
24 |     } else if (count($decisions) > 2) {
25 |         $epsilon = floatval($decisions[0]);
26 |         // get random decision
27 |         if (rand(0, 100) < $epsilon * 100) {
28 |             $out = $decisions[rand(0,count($decisions)-3)+2];
29 |         }
30 |         // get best decision
31 |         else {
32 |             $out = $decisions[1];
33 |         }
34 |     }
35 | }
36 | 
37 | // response
38 | if (empty($out))
39 |     echo "Oops";
40 | else
41 |     echo $out;
42 | 
43 | // Encode the info with json and write it into file
44 | $info = array(
45 |     "update" => $_POST["payload"],
46 |     "group_id" => $group_id
47 | );
48 | 
49 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL;
50 | //echo $in;
51 | file_put_contents('/var/www/info/info_queue',$in,FILE_APPEND|LOCK_EX);
52 | 
53 | ?>
54 | 


--------------------------------------------------------------------------------
/frontend/webphp/update.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | // Match the request and write its info and group id into file
 3 | //
 4 | // Author: Shijie Sun
 5 | // Email: septimus145@gmail.com
 6 | // August, 2016
 7 | 
 8 | $path = '/var/www/info';
 9 | set_include_path(get_include_path() . PATH_SEPARATOR . $path);
10 | $features = explode("\t", $_POST["payload"]);
11 | //print_r($features);
12 | 
13 | include 'match.php';
14 | 
15 | // if no match
16 | if (empty($group_id)) {
17 |     //$group_id = "null";
18 |     $group_id = $features[4];
19 |     // get default decision
20 |     $out = file_get_contents('/var/www/info/d_'.$group_id);
21 | } else {
22 |     $decisions = file('/var/www/info/d_'.$group_id);
23 |     $cnt = count($decisions);
24 |     $value = rand(0,100);
25 |     for ($i = 0; $i < $cnt; $i++) {
26 |         $decision = explode(";", $decisions[$i]);
27 |         $value -= $decision[1] * 100;
28 |         if ($value <= 0) {
29 |             $out = $decision[0];
30 |             break;
31 |         }
32 |     }
33 |     // in case value still bigger than 0. that is, $out is still unassigned
34 |     if ($value > 0)
35 |         $out = $decision[0];
36 | }
37 | 
38 | // response
39 | if (empty($out))
40 |     echo "Oops";
41 | else
42 |     echo $out;
43 | 
44 | // Encode the info with json and write it into file
45 | $info = array(
46 |     "update" => $_POST["payload"],
47 |     "group_id" => $group_id
48 | );
49 | 
50 | $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL;
51 | //echo $in;
52 | file_put_contents('/var/www/info/info_queue',$in,FILE_APPEND|LOCK_EX);
53 | 
54 | ?>
55 | 


--------------------------------------------------------------------------------
/sbin/start-service-all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | function start_logic_node {
 4 |   host=$1
 5 |   . "${PYTHEAS_HOME}/sbin/run_decisionmaker.sh" -f $host -p
 6 | }
 7 | 
 8 | function start_comm_node {
 9 |   host=$1
10 |   . "${PYTHEAS_HOME}/sbin/run_communicator.sh" -f $host -b $host -p
11 | }
12 | 
13 | function start_pubsub_node {
14 |   host=$1
15 |   . "${PYTHEAS_HOME}/sbin/run_zookeeper.sh" $host
16 |   . "${PYTHEAS_HOME}/sbin/run_kafka.sh" $host
17 | }
18 | 
19 | function start_front_node {
20 |   host=$1
21 |   commnode=$2
22 |   . "${PYTHEAS_HOME}/sbin/run_groupmanager.sh" -l UCB -f $host -k $commnode -p
23 | }
24 | 
25 | 
26 | 
27 | if [ -z "${PYTHEAS_HOME}" ]; then
28 |   export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
29 | fi
30 | 
31 | PYTHEAS_CONF_DIR=${PYTHEAS_HOME}/conf
32 | PYTHEAS_BIN_DIR=${PYTHEAS_HOME}/bin
33 | 
34 | FRONTENDLIST=`cat "${PYTHEAS_CONF_DIR}/frontends"`
35 | 
36 | for frontend in `echo "$FRONTENDLIST"|sed  "s/#.*$//;/^$/d"`; do
37 |   echo "Setting up $frontend"
38 |   set -f
39 |   MACHINELIST=(${frontend//;/ })
40 |   pubsubnode=${MACHINELIST[0]}
41 |   echo "Starting publish/subcribe node $pubsubnode"
42 |   start_pubsub_node $pubsubnode
43 |   commnode=${MACHINELIST[0]}
44 |   echo "Starting communication node $commnode"
45 |   start_comm_node $commnode
46 |   logicnode=${MACHINELIST[0]}
47 |   echo "Starting computing node $logicnode"
48 |   start_logic_node $logicnode
49 |   for index in "${!MACHINELIST[@]}"; do
50 |     if [ $index -ne 0 ]; then 
51 |         frontnode=${MACHINELIST[$index]}
52 |         echo "Starting front node $frontnode"
53 |         start_front_node $frontnode $commnode
54 |     fi
55 |   done
56 | done
57 | 
58 | 


--------------------------------------------------------------------------------
/frontend/webphp/player_EG.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | // Response to the request and forward the update
 3 | //
 4 | // Author: Shijie Sun
 5 | // Email: septimus145@gmail.com
 6 | // August, 2016
 7 | 
 8 | header('Access-Control-Allow-Origin: *');
 9 | $path = '/var/www/info';
10 | set_include_path(get_include_path() . PATH_SEPARATOR . $path);
11 | $features = explode("\t", $_POST["payload"]);
12 | 
13 | include 'match.php';
14 | 
15 | $group_id = $features[4]; // take asn as group
16 | 
17 | // request
18 | if ($_POST['method'] == 'request') {
19 |     $decision_list = array_slice($features, 15);
20 |     $decisions = file_get_contents($path . '/d_' . $group_id);
21 |     if (count($decisions) == 2) {
22 |         $decision = $decisions[1];
23 |     } else if (count($decisions) > 2) {
24 |         $epsilon = floatval($decisions[0]);
25 |         // get random decision
26 |         if (rand(0, 100) < $epsilon * 100) {
27 |             $decision = $decisions[rand(0,count($decisions)-3)+2];
28 |         }
29 |         // get best decision
30 |         else {
31 |             $decision = $decisions[1];
32 |         }
33 |     }
34 |     if (empty($decision) || !in_array($decision, $decision_list)) {
35 |         $decision = $decision_list[array_rand($decision_list, 1)];
36 |     }
37 |     echo $decision;
38 | }
39 | 
40 | // update
41 | if ($_POST['method'] == 'update') {
42 |     // Encode the info with json and write it into file
43 |     $info = array(
44 |         "update" => $_POST["payload"],
45 |         "group_id" => $group_id
46 |     );
47 |     $in = json_encode($info, JSON_UNESCAPED_SLASHES).PHP_EOL;
48 |     file_put_contents($path . '/info_queue',$in,FILE_APPEND|LOCK_EX);
49 | }
50 | 
51 | ?>
52 | 


--------------------------------------------------------------------------------
/sbin/run_uploadtrace_EG.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | 
 4 | 
 5 | SSH="junchenj@"$1
 6 | PRE=-o\ "StrictHostKeyChecking=no"
 7 | 
 8 | LOCAL_DDN="/Users/junchenjiang/Documents/research/ddn-controller/proto/shijie/DDN"
 9 | FRONT_SERVER="front_server"
10 | SPARK="spark"
11 | KAFKA="kafka"
12 | TRACE="trace"
13 | 
14 | REMOTE_USER_ROOT="/users/junchenj"
15 | REMOTE_SYS_ROOT="/usr/share"
16 | 
17 | 
18 | if [ "$#" -ne 2 ]; then
19 | echo "Error: need exactly two arguments"
20 | echo "Format: sh run_uploadtrace.sh host tracefile"
21 | exit
22 | fi
23 | 
24 | echo ""
25 | echo ""
26 | echo ""
27 | echo "**************************************************************************"
28 | echo "*       Starting Uploading trace to "$1
29 | echo "**************************************************************************"
30 | 
31 | Tracefile=$2
32 | TraceUnsorted='trace_raw.txt'
33 | scp $PRE -r $LOCAL_DDN/$TRACE $SSH:$REMOTE_USER_
34 | scp $PRE -r $Tracefile $SSH:$REMOTE_USER_ROOT/$TRACE/$TraceUnsorted
35 | 
36 | TraceSorted='trace_sort.txt'
37 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./tracesort.py '$TraceUnsorted' '$TraceSorted
38 | ssh $PRE $SSH  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
39 | FrontendIp=$(cat temp)
40 | rm temp
41 | 
42 | scp $PRE httpd_deploy.sh $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/
43 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER/'; sudo sh httpd_deploy.sh'
44 | 
45 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./trace_parser.py http://'$FrontendIp'/player_EG.php '$TraceSorted
46 | 
47 | echo "**************************************************************************"
48 | echo "*       Done Uploading trace to "$1
49 | echo "**************************************************************************"
50 | echo ""
51 | echo ""
52 | echo ""
53 | 
54 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/src/main/java/frontend/HistoryObject.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.util.*;
 4 | import java.io.Serializable;
 5 | 
 6 | import scala.Tuple2;
 7 | 
 8 | import org.apache.spark.api.java.JavaRDD;
 9 | import org.apache.spark.api.java.JavaPairRDD;
10 | import org.apache.spark.streaming.api.java.*;
11 | 
12 | public abstract class HistoryObject implements Serializable {
13 | 
14 |     public JavaPairRDD<String, Map<String, List<Double>>> pairDData;
15 |     public int windowSize; //seconds
16 | 
17 |     public HistoryObject(JavaStreamingContext jssc) {
18 |         List<Tuple2<String, Map<String, List<Double>>>> tmpDataList = new ArrayList<>();
19 |         //// for test
20 |         //Map<String, List<Double>> testMap = new HashMap<>();
21 |         //List<Double> testList = new ArrayList<Double>();
22 |         //testList.add(7000.0);
23 |         //testList.add(200.0);
24 |         //testMap.put("decision1", testList);
25 |         //tmpDataList.add(new Tuple2("group1", testMap));
26 |         JavaRDD<Tuple2<String, Map<String, List<Double>>>> dData =
27 |                 jssc.sparkContext().parallelize(tmpDataList);
28 |         this.pairDData = JavaPairRDD.fromJavaRDD(dData);
29 |     }
30 | 
31 |     public void updateData(JavaPairRDD<String, Map<String, List<Double>>> newPairDData) {
32 |         this.pairDData = newPairDData;
33 |     }
34 | 
35 |    /*
36 |     * implement this method for combination of old data and new data
37 |     */
38 |     public abstract Tuple2<String, Map<String, List<Double>>> combineCall(Tuple2<String, Tuple2<
39 |             Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
40 |             >> tuple2);
41 | 
42 |     /*
43 |      * implement this method for decision making
44 |      */
45 |     public abstract String getDecision(Map<String, List<Double>> decisionStatMap);
46 | }
47 | 


--------------------------------------------------------------------------------
/sbin/run_uploadtrace.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | 
 4 | 
 5 | SSH="junchenj@"$1
 6 | PRE=-o\ "StrictHostKeyChecking=no"
 7 | if [ -z "${PYTHEAS_HOME}" ]; then
 8 |   export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 9 | fi
10 | 
11 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend
12 | FRONT_SERVER="front_server"
13 | SPARK="spark"
14 | KAFKA="kafka"
15 | TRACE="trace"
16 | 
17 | REMOTE_USER_ROOT="/users/junchenj"
18 | REMOTE_SYS_ROOT="/usr/share"
19 | 
20 | 
21 | if [ "$#" -ne 2 ]; then
22 | echo "Error: need exactly two arguments"
23 | echo "Format: sh run_uploadtrace.sh host tracefile"
24 | exit
25 | fi
26 | 
27 | echo ""
28 | echo ""
29 | echo ""
30 | echo "**************************************************************************"
31 | echo "*       Starting Uploading trace to "$1
32 | echo "**************************************************************************"
33 | 
34 | Tracefile=$2
35 | TraceUnsorted='trace_raw.txt'
36 | scp $PRE -r $FRONTEND_HOME/$TRACE $SSH:$REMOTE_USER_
37 | scp $PRE -r $Tracefile $SSH:$REMOTE_USER_ROOT/$TRACE/$TraceUnsorted
38 | 
39 | TraceSorted='trace_sort.txt'
40 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./tracesort.py '$TraceUnsorted' '$TraceSorted
41 | ssh $PRE $SSH  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
42 | FrontendIp=$(cat temp)
43 | rm temp
44 | 
45 | scp $PRE httpd_deploy.sh $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/
46 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER/'; sudo sh httpd_deploy.sh'
47 | 
48 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$TRACE/'; ./trace_parser.py http://'$FrontendIp'/player.php '$TraceSorted
49 | 
50 | echo "**************************************************************************"
51 | echo "*       Done Uploading trace to "$1
52 | echo "**************************************************************************"
53 | echo ""
54 | echo ""
55 | echo ""
56 | 
57 | 


--------------------------------------------------------------------------------
/bin/kafka_deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Auto install Kafka and configure the environment
 4 | #
 5 | # Author: Shijie Sun
 6 | # Email: septimus145@gmail.com
 7 | # July, 2016
 8 | 
 9 | if [ $# -lt 2  ]; then
10 |   echo "Usage: sudo $0 <host_list> <host_number>"
11 |   echo -e "\n\t<host_list> is all IP addresses of kafka servers, separated by comma"
12 |   echo -e "\t<host_number> is the sequence number of current host in host_list"
13 |   echo -e "\ne.g. if want to run kafka with two hosts(10.1.1.2,10.1.1.3) and IP of current host is 10.1.1.3. Then host_list=\"10.1.1.2,10.1.1.3\", host_number=2\n"
14 |   exit 1
15 | fi
16 | 
17 | if [[ $UID != 0  ]]; then
18 |   echo "Please run this script with sudo:"
19 |   echo "sudo $0 $*"
20 |   exit 1
21 | fi
22 | 
23 | host_list=(${1//,/ })
24 | 
25 | # Install editor
26 | sudo apt-get update
27 | which vim >&/dev/null || sudo apt-get install -y vim
28 | which tmux >&/dev/null || sudo apt-get install -y tmux
29 | 
30 | # Install jre
31 | which java >&/dev/null || sudo apt-get install -y default-jre
32 | if [ -z $JAVA_HOME ]; then
33 |   JAVA_HOME=$(sudo update-java-alternatives -l | head -n 1 | sed -e 's/ \+/ /g' | cut -f3 -d' ')
34 |   echo JAVA_HOME=\"$JAVA_HOME\" | sudo tee --append /etc/environment
35 |   export JAVA_HOME=$JAVA_HOME
36 | fi
37 | 
38 | # Download kafka
39 | kafka_path="/usr/share/kafka/"
40 | wget http://www-eu.apache.org/dist/kafka/0.10.0.0/kafka_2.11-0.10.0.0.tgz
41 | sudo tar -xvzf kafka_2.11-0.10.0.0.tgz -C /usr/share
42 | sudo mv /usr/share/kafka_2.11-0.10.0.0 /usr/share/kafka
43 | rm kafka_2.11-0.10.0.0.tgz
44 | 
45 | # Configure the zookeeper
46 | cat zookeeper.properties | sudo tee --append $kafka_path/config/zookeeper.properties
47 | i=0
48 | while [ $i -lt ${#host_list[@]}  ]
49 | do
50 |     server_info="server."$(( i+1  ))"="${host_list[$i]}":2888:3888"
51 |     echo $server_info | sudo tee --append $kafka_path/config/zookeeper.properties
52 |     (( i++ ))
53 | done
54 | sudo mkdir -p /tmp/zookeeper
55 | sudo touch /tmp/zookeeper/myid
56 | echo $2 | sudo tee --append /tmp/zookeeper/myid
57 | 
58 | # Configure the kafka
59 | znodes=${host_list[0]}":2181"
60 | i=1
61 | while [ $i -lt ${#host_list[@]}  ]
62 | do
63 |     znodes=$znodes","${host_list[$i]}":2181"
64 |     (( i++ ))
65 | done
66 | sudo sed -i -e "s/\(broker.id=\).*/\1$2/" \
67 |     -e "s/\(zookeeper.connect=\).*/\1$znodes/" $kafka_path/config/server.properties
68 | echo "delete.topic.enable=true" | sudo tee --append $kafka_path/config/server.properties
69 | 
70 | echo Success
71 | exit 0
72 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/src/main/java/frontend/DecisionCollector.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.io.*;
 4 | import java.util.Arrays;
 5 | import java.util.Iterator;
 6 | import java.util.Properties;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 8 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 9 | import org.apache.kafka.clients.consumer.KafkaConsumer;
10 | import org.json.JSONObject;
11 | import org.json.JSONArray;
12 | 
13 | /**
14 |  * Fetch decisions from Kafka
15 |  *
16 |  * Author: Shijie Sun
17 |  * Email: septimus145@gmail.com
18 |  * August, 2016
19 |  */
20 | 
21 | 
22 | public class DecisionCollector implements Runnable {
23 | 
24 |     protected String brokerList = "";		// list of broker
25 |     protected String hostname = "";		// name of current host
26 |     public KafkaConsumer<String, String> consumer = null;       // kafka consumer
27 | 
28 |     public DecisionCollector( String hostname, String brokerList ) {
29 |         this.hostname = hostname;
30 |         this.brokerList = brokerList;
31 |         // setup consumer
32 |         Properties consumerProps = new Properties();
33 |         consumerProps.put("bootstrap.servers", brokerList);
34 |         consumerProps.put("group.id", this.hostname);
35 |         consumerProps.put("enable.auto.commit", "true");
36 |         consumerProps.put("auto.commit.interval.ms", "1000");
37 |         consumerProps.put("session.timeout.ms", "30000");
38 |         consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
39 |         consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
40 |         this.consumer = new KafkaConsumer<>(consumerProps);
41 |         consumer.subscribe(Arrays.asList("decision"));
42 |     }
43 | 
44 |     public void run() {
45 |         KafkaConsumer<String, String> tconsumer = consumer;
46 |         while (true) {
47 |             ConsumerRecords<String, String> records = tconsumer.poll(1000);
48 |             for (ConsumerRecord<String, String> record : records) {
49 |                 String[] decision = record.value().split(";");
50 |                 try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/d_" + decision[0]), "utf-8"))) {
51 |                     writer.write(decision[1]);
52 |                 } catch (Exception e) {
53 |                     System.err.println("Caught Exception: " + e.getMessage());
54 |                 }
55 |             }
56 |             try {
57 |                 Thread.sleep(1000);
58 |             } catch(InterruptedException ex) {
59 |                 Thread.currentThread().interrupt();
60 |             }
61 |         }
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 |   <groupId>frontend</groupId>
 5 |   <artifactId>GroupManager</artifactId>
 6 |   <packaging>jar</packaging>
 7 |   <version>1.0-SNAPSHOT</version>
 8 |   <name>GroupManager</name>
 9 |   <url>http://maven.apache.org</url>
10 |   <properties>
11 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
12 |   </properties>
13 |   <dependencies>
14 |     <dependency>
15 |       <groupId>junit</groupId>
16 |       <artifactId>junit</artifactId>
17 |       <version>3.8.1</version>
18 |       <scope>test</scope>
19 |     </dependency>
20 |     <dependency>
21 |       <groupId>org.json</groupId>
22 |       <artifactId>json</artifactId>
23 |       <version>20160212</version>
24 |     </dependency>
25 |     <dependency>
26 |       <!-- Kafka Dependency -->
27 |       <groupId>org.apache.kafka</groupId>
28 |       <artifactId>kafka-clients</artifactId>
29 |       <version>0.10.0.0</version>
30 |     </dependency>
31 |     <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
32 |     <dependency>
33 |         <groupId>org.apache.commons</groupId>
34 |         <artifactId>commons-lang3</artifactId>
35 |         <version>3.0</version>
36 |     </dependency>
37 |   </dependencies>
38 |   <build>
39 |     <plugins>
40 |       <plugin>
41 |         <!-- This plugin will package the dependencies into jar. Do not need to worry about runtime dependency -->
42 |         <groupId>org.apache.maven.plugins</groupId>
43 |         <artifactId>maven-shade-plugin</artifactId>
44 |         <version>2.4.3</version>
45 |         <configuration>
46 |           <!-- move the xml generated to target directory instead of base-dir -->
47 |           <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml</dependencyReducedPomLocation>
48 |         </configuration>
49 |         <executions>
50 |           <execution>
51 |             <phase>package</phase>
52 |             <goals>
53 |               <goal>shade</goal>
54 |             </goals>
55 |           </execution>
56 |         </executions>
57 |       </plugin>
58 |       <plugin>
59 |         <!-- This setting makes compiler using JAVA-1.7, while default setting using Java-1.3 -->
60 |         <groupId>org.apache.maven.plugins</groupId>
61 |         <artifactId>maven-compiler-plugin</artifactId>
62 |         <version>3.3</version>
63 |         <configuration>
64 |           <source>1.7</source>
65 |           <target>1.7</target>
66 |         </configuration>
67 |       </plugin>
68 |     </plugins>
69 |   </build>
70 | </project>
71 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/algorithms/HistoryData_EG.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.util.*;
 4 | import scala.Tuple2;
 5 | import org.json.JSONObject;
 6 | import org.json.JSONArray;
 7 | import org.apache.spark.streaming.api.java.*;
 8 | 
 9 | public class HistoryData extends HistoryObject {
10 | 
11 |     // self defined parameters
12 |     public double epsilon = 0.9;
13 |     // end of self defined
14 | 
15 |     public HistoryData(JavaStreamingContext jssc) {
16 |         super(jssc);
17 |         windowSize = 10000;
18 |     }
19 | 
20 |    /*
21 |     * implement this method for combination of old data and new data
22 |     */
23 |     public Tuple2<String, Map<String, List<Double>>> combineCall(Tuple2<String, Tuple2<
24 |             Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
25 |             >> tuple2) {
26 |         Map<String, List<Double>> newData=null;
27 |         Iterator<Map<String, List<Double>>> iter;
28 |         iter = tuple2._2()._1().iterator();
29 |         if (iter.hasNext())
30 |             newData = iter.next();
31 |         if (newData != null) {
32 |             // calculate the average for new records
33 |             for (Map.Entry<String, List<Double>> entry : newData.entrySet()) {
34 |                 List<Double> scores = entry.getValue();
35 |                 double totalscore = 0;
36 |                 for (double score : scores)
37 |                     totalscore += score;
38 |                 // here becomes 2-elements list: [sum, size]
39 |                 List<Double> countedScore = new ArrayList<>();
40 |                 countedScore.add(totalscore);
41 |                 countedScore.add((double)scores.size());
42 |                 entry.setValue(countedScore);
43 |             }
44 |         }
45 |         return new Tuple2(tuple2._1(), newData);
46 |     }
47 | 
48 |     /*
49 |      * implement this method for decision making
50 |      */
51 |     public String getDecision(Map<String, List<Double>> decisionStatMap) {
52 |         double maxScore = -Double.MAX_VALUE;
53 |         String bestDecision = null;
54 |         JSONArray jArray = new JSONArray();
55 |         for (Map.Entry<String, List<Double>> entry : decisionStatMap.entrySet()) {
56 |             // TODO: what if here is divided by zero?
57 |             if (entry.getValue().get(0) / entry.getValue().get(1) > maxScore) {
58 |                 if (bestDecision != null)
59 |                     jArray.put(bestDecision);
60 |                 bestDecision = entry.getKey();
61 |                 maxScore = entry.getValue().get(0) / entry.getValue().get(1);
62 |             } else {
63 |                 jArray.put(entry.getKey());
64 |             }
65 |         }
66 |         JSONObject jObject = new JSONObject();
67 |         jObject.put("random", jArray);
68 |         jObject.put("best", bestDecision);
69 |         jObject.put("epsilon", epsilon);
70 |         return jObject.toString();
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/src/main/java/frontend/HistoryData.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.util.*;
 4 | import scala.Tuple2;
 5 | import org.json.JSONObject;
 6 | import org.json.JSONArray;
 7 | import org.apache.spark.streaming.api.java.*;
 8 | 
 9 | public class HistoryData extends HistoryObject {
10 | 
11 |     // self defined parameters
12 |     public double epsilon = 0.9;
13 |     // end of self defined
14 | 
15 |     public HistoryData(JavaStreamingContext jssc) {
16 |         super(jssc);
17 |         windowSize = 10000;
18 |     }
19 | 
20 |    /*
21 |     * implement this method for combination of old data and new data
22 |     */
23 |     public Tuple2<String, Map<String, List<Double>>> combineCall(Tuple2<String, Tuple2<
24 |             Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
25 |             >> tuple2) {
26 |         Map<String, List<Double>> newData=null;
27 |         Iterator<Map<String, List<Double>>> iter;
28 |         iter = tuple2._2()._1().iterator();
29 |         if (iter.hasNext())
30 |             newData = iter.next();
31 |         if (newData != null) {
32 |             // calculate the average for new records
33 |             for (Map.Entry<String, List<Double>> entry : newData.entrySet()) {
34 |                 List<Double> scores = entry.getValue();
35 |                 double totalscore = 0;
36 |                 for (double score : scores)
37 |                     totalscore += score;
38 |                 // here becomes 2-elements list: [sum, size]
39 |                 List<Double> countedScore = new ArrayList<>();
40 |                 countedScore.add(totalscore);
41 |                 countedScore.add((double)scores.size());
42 |                 entry.setValue(countedScore);
43 |             }
44 |         }
45 |         return new Tuple2(tuple2._1(), newData);
46 |     }
47 | 
48 |     /*
49 |      * implement this method for decision making
50 |      */
51 |     public String getDecision(Map<String, List<Double>> decisionStatMap) {
52 |         double maxScore = -Double.MAX_VALUE;
53 |         String bestDecision = null;
54 |         JSONArray jArray = new JSONArray();
55 |         for (Map.Entry<String, List<Double>> entry : decisionStatMap.entrySet()) {
56 |             // TODO: what if here is divided by zero?
57 |             if (entry.getValue().get(0) / entry.getValue().get(1) > maxScore) {
58 |                 if (bestDecision != null)
59 |                     jArray.put(bestDecision);
60 |                 bestDecision = entry.getKey();
61 |                 maxScore = entry.getValue().get(0) / entry.getValue().get(1);
62 |             } else {
63 |                 jArray.put(entry.getKey());
64 |             }
65 |         }
66 |         JSONObject jObject = new JSONObject();
67 |         jObject.put("random", jArray);
68 |         jObject.put("best", bestDecision);
69 |         jObject.put("epsilon", epsilon);
70 |         return jObject.toString();
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/sbin/run_groupmanager.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | Host=""
 4 | KafkaNode=""
 5 | Logic=""
 6 | 
 7 | if [ "$#" -lt 6 ]; then
 8 | echo "Error: need at least six arguments"
 9 | echo "Format: sh run_groupmanager.sh [-option] --logic logic --frontend host --kafka kafkanode"
10 | exit
11 | fi
12 | 
13 | Rebuild=false
14 | 
15 | while test $# -gt 0; do
16 |     case "$1" in
17 |         -h|--help)
18 |             echo "Format: sh run_groupmanager.sh [-option] host"
19 |             echo "options:"
20 |             echo "-h --help     show brief help"
21 |             echo "-p --package  repackage before running"
22 |             echo "-f --frontend set frontend host"
23 |             echo "-l --logic    set logic type (EG/UCB)"
24 |             exit 0
25 |             ;;
26 |         -p|--package)
27 |             Rebuild=true
28 |             shift
29 |             ;;
30 |         -f|--frontend)
31 |             Host=$2
32 |             shift
33 |             ;;
34 | 	 -k|--kafka)
35 |             KafkaNode=$2
36 |             shift
37 |             ;;
38 |         -l|--logic)
39 |             Logic=$2
40 |             shift
41 |             ;;
42 |         -*)
43 |             echo "invalid option "$1
44 |             exit 0
45 |             ;;
46 |         *)
47 |             shift
48 |             ;;
49 |     esac
50 | done
51 | 
52 | 
53 | SSH="junchenj@"$Host
54 | SSHKAFKA="junchenj"@$KafkaNode
55 | PRE=-o\ "StrictHostKeyChecking=no"
56 | if [ -z "${PYTHEAS_HOME}" ]; then
57 |   export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
58 | fi
59 | 
60 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend
61 | FRONT_SERVER="front_server"
62 | SPARK="spark"
63 | KAFKA="kafka"
64 | TRACE="trace"
65 | 
66 | REMOTE_USER_ROOT="/users/junchenj"
67 | REMOTE_SYS_ROOT="/usr/share"
68 | REMOTE_LOG_ROOT="/users/junchenj/log"
69 | LOG_FILE=$REMOTE_LOG_ROOT"/log_groupmanager"
70 | 
71 | 
72 | echo ""
73 | echo ""
74 | echo ""
75 | echo "**************************************************************************"
76 | echo "*       Starting GroupManager on "$Host
77 | echo "**************************************************************************"
78 | 
79 | if [ "$Rebuild" = true ] ; then
80 | scp $PRE -r $FRONTEND_HOME/$FRONT_SERVER/GroupManager $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER/
81 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER'/GroupManager; mvn package > '$LOG_FILE
82 | fi
83 | 
84 | ssh $PRE $SSHKAFKA  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
85 | KafkaIp=$(cat temp)
86 | echo "Got IP="$KafkaIp
87 | rm temp
88 | ssh $PRE $SSH "sh -c 'cd $REMOTE_USER_ROOT/$FRONT_SERVER/GroupManager && java -cp target/GroupManager-1.0-SNAPSHOT.jar frontend.GroupManager frontend1 $KafkaIp ../gmConfig $Logic > $LOG_FILE 2>&1 &'"
89 | 
90 | echo "**************************************************************************"
91 | echo "*       Done GroupManager on "$Host
92 | echo "**************************************************************************"
93 | echo ""
94 | echo ""
95 | echo ""
96 | 
97 | 


--------------------------------------------------------------------------------
/sbin/run_decisionmaker.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | Host=""
 4 | 
 5 | if [ "$#" -lt 2 ]; then
 6 | echo "Error: need at least one argument"
 7 | echo "Format: sh run_decisionmaker.sh [-option] --frontend <host>"
 8 | exit
 9 | fi
10 | 
11 | Rebuild=false
12 | 
13 | while test $# -gt 0; do
14 |     case "$1" in
15 |         -h|--help)
16 |             echo "Format: sh run_decisionmaker.sh [-option] -f <host>"
17 |             echo "options:"
18 |             echo "-h --help     show brief help"
19 |             echo "-p --package  repackage before running"
20 |             echo "-f --frontend set frontend host"
21 |             exit 0
22 |             ;;
23 |         -p|--package)
24 |             Rebuild=true
25 |             shift
26 |             ;;
27 |         -f|--frontend)
28 |             Host=$2
29 |             shift
30 |             ;;
31 |         -*)
32 |             echo "invalid option "$1
33 |             exit 0
34 |             ;;
35 |         *)
36 |             shift
37 |             ;;
38 |     esac
39 | done
40 | 
41 | 
42 | SSH="junchenj@"$Host
43 | PRE=-o\ "StrictHostKeyChecking=no"
44 | 
45 | if [ -z "${PYTHEAS_HOME}" ]; then
46 |   export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
47 | fi
48 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend
49 | FRONT_SERVER="front_server"
50 | SPARK="spark"
51 | KAFKA="kafka"
52 | TRACE="trace"
53 | 
54 | REMOTE_USER_ROOT="/users/junchenj"
55 | REMOTE_SYS_ROOT="/usr/share"
56 | REMOTE_LOG_ROOT="/users/junchenj/log"
57 | LOG_FILE=$REMOTE_LOG_ROOT"/log_decisionmaker"
58 | 
59 | 
60 | echo ""
61 | echo ""
62 | echo ""
63 | echo "**************************************************************************"
64 | echo "*       Starting DecisionMaker on "$Host
65 | echo "**************************************************************************"
66 | 
67 | if [ "$Rebuild" = true ] ; then
68 | scp $PRE -r $FRONTEND_HOME/$SPARK/DecisionMaker $SSH:$REMOTE_USER_ROOT/$SPARK/
69 | ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$SPARK'/DecisionMaker; mvn package > '$LOG_FILE
70 | fi
71 | 
72 | ##### get kafka pointer of the frontend
73 | ssh $PRE $SSH  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
74 | FrontendKafka=$(cat temp)':9092'
75 | FrontendZookeeper=$(cat temp)':2181'
76 | rm temp
77 | echo "Frontend Kafka="$FrontendKafka" / "$FrontendZookeeper
78 | 
79 | CONF=$REMOTE_USER_ROOT/$SPARK"/config.properties"
80 | ##### get updateTopic
81 | TopicKey='updateTopic'
82 | ssh $PRE $SSH "cat $CONF | grep $TopicKey'=' | cut -d= -f2 | awk '{ print \$1}'" > temp
83 | TOPIC=$(cat temp)
84 | rm temp
85 | 
86 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$SPARK; sudo bin/spark-submit --class frontend.DecisionMaker --master local --executor-memory 30G --total-executor-cores 1 --executor-cores 1 ~/spark/DecisionMaker/target/DecisionMaker-1.0-SNAPSHOT.jar $FrontendKafka $TOPIC decision 0.7 10 > $LOG_FILE 2>&1 &'"
87 | 
88 | 
89 | echo "**************************************************************************"
90 | echo "*       Done DecisionMaker on "$Host
91 | echo "**************************************************************************"
92 | echo ""
93 | echo ""
94 | echo ""
95 | 
96 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/src/main/java/frontend/DecisionCollector_EG.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.io.*;
 4 | import java.util.Arrays;
 5 | import java.util.Properties;
 6 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 8 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 9 | import org.json.JSONObject;
10 | import org.json.JSONArray;
11 | 
12 | /**
13 |  * Fetch decisions from Kafka
14 |  *
15 |  * Author: Shijie Sun
16 |  * Email: septimus145@gmail.com
17 |  * August, 2016
18 |  */
19 | 
20 | 
21 | public class DecisionCollector_EG implements Runnable {
22 | 
23 |     protected String brokerList = "";		// list of broker
24 |     protected String hostname = "";		// name of current host
25 |     public KafkaConsumer<String, String> consumer = null;       // kafka consumer
26 | 
27 |     public DecisionCollector_EG( String hostname, String brokerList ) {
28 |         this.hostname = hostname;
29 |         this.brokerList = brokerList;
30 |         // setup consumer
31 |         Properties consumerProps = new Properties();
32 |         consumerProps.put("bootstrap.servers", brokerList);
33 |         consumerProps.put("group.id", this.hostname);
34 |         consumerProps.put("enable.auto.commit", "true");
35 |         consumerProps.put("auto.commit.interval.ms", "1000");
36 |         consumerProps.put("session.timeout.ms", "30000");
37 |         consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
38 |         consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
39 |         this.consumer = new KafkaConsumer<>(consumerProps);
40 |         consumer.subscribe(Arrays.asList("decision"));
41 |     }
42 | 
43 |     public void run() {
44 |         KafkaConsumer<String, String> tconsumer = consumer;
45 |         while (true) {
46 |             ConsumerRecords<String, String> records = tconsumer.poll(1000);
47 |             for (ConsumerRecord<String, String> record : records) {
48 |                 String[] decision = record.value().split(";");
49 |                 try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/d_" + decision[0]), "utf-8"))) {
50 |                     JSONObject jObject = new JSONObject(decision[1]);
51 |                     writer.write(String.valueOf(jObject.getDouble("epsilon")));
52 |                     writer.newLine();
53 |                     writer.write(jObject.getString("best"));
54 |                     JSONArray jArray = jObject.getJSONArray("random");
55 |                     for (int i=0; i < jArray.length(); i++) {
56 |                          writer.newLine();
57 |                          writer.write(jArray.getString(i));
58 |                     }
59 |                 } catch (Exception e) {
60 |                     System.err.println("Caught Exception: " + e.getMessage());
61 |                 }
62 |             }
63 |             try {
64 |                 Thread.sleep(1000);
65 |             } catch(InterruptedException ex) {
66 |                 Thread.currentThread().interrupt();
67 |             }
68 |         }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/src/main/java/frontend/GroupManager.java:
--------------------------------------------------------------------------------
 1 | package frontend;
 2 | 
 3 | import java.util.concurrent.ConcurrentHashMap;
 4 | import java.net.InetAddress;
 5 | import java.net.UnknownHostException;
 6 | 
 7 | /**
 8 |  * Manage the groups of current cluster
 9 |  *
10 |  * Retrive the info of updates from file and send them to Kafka server
11 |  * Fetch group table from Kafka and maintain it
12 |  * Fetch decisions from Kafka
13 |  *
14 |  * Author: Shijie Sun
15 |  * Email: septimus145@gmail.com
16 |  * August, 2016
17 |  */
18 | 
19 | public class GroupManager {
20 | 
21 |     protected Thread decisionCollector = null;
22 |     protected Thread groupTableUpdater = null;
23 |     protected Thread infoSender = null;
24 |     protected String hostname = "";
25 |     protected String kafkaBrokerList = "";
26 |     protected String clusterID = "";
27 |     public ConcurrentHashMap<String, String> group2ClusterMap = null;
28 | 
29 |     public GroupManager( String clusterID, String kafkaServerList, String configFile ) {
30 |         this.clusterID = clusterID;
31 |         try {
32 |             this.hostname = InetAddress.getLocalHost().getHostName();
33 |         } catch (UnknownHostException e){
34 |             this.hostname = "HOST";
35 |         }
36 |         this.kafkaBrokerList = kafkaServerList.replace(",",":9092,") + ":9092";
37 |         this.group2ClusterMap = new ConcurrentHashMap<>();
38 | 
39 |         this.groupTableUpdater = new Thread(new GroupTableUpdater(this.hostname, this.clusterID, this.kafkaBrokerList, this.group2ClusterMap));
40 |         this.groupTableUpdater.setDaemon(true);
41 |         this.groupTableUpdater.start();
42 |         System.out.println("Group table updater ready.");
43 | 
44 |         this.decisionCollector = new Thread(new DecisionCollector(this.hostname, this.kafkaBrokerList));
45 |         this.decisionCollector.setDaemon(true);
46 |         this.decisionCollector.start();
47 |         System.out.println("Decision collector ready.");
48 | 
49 |         this.infoSender = new Thread(new InfoSender(this.kafkaBrokerList, this.clusterID, this.group2ClusterMap, configFile));
50 |         this.infoSender.setDaemon(true);
51 |         this.infoSender.start();
52 |         System.out.println("Info sender ready.");
53 |     }
54 | 
55 |     public static void main( String[] args )
56 |     {
57 |         if (args.length < 3) {
58 |             System.out.println("Usage: java frontend.GroupManager cluster_ID kafka_server config_file");
59 |             System.out.println("\n\tcluster_ID is the ID of current cluster");
60 |             System.out.println("\n\tkafka_server is the list of IP of kafka servers, separated by comma");
61 |             System.out.println("\n\tconfig_file contains labels of update info and reduced labels");
62 |             return;
63 |         }
64 | 
65 |         GroupManager gManager = new GroupManager(args[0], args[1], args[2]);
66 | 
67 |         while (true)
68 |         {
69 |             try {
70 |                 Thread.sleep(1000);
71 |             } catch (InterruptedException e) {
72 |                 Thread.currentThread().interrupt();
73 |                 // code for stopping current task so thread stops
74 |             }
75 |         }
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/sbin/install.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | echo ""
 5 | echo ""
 6 | echo ""
 7 | echo "**************************************************************************"
 8 | echo "*       Setting up $2 : $HOST"
 9 | echo "**************************************************************************"
10 | echo $0 $@
11 | echo ""
12 | 
13 | if [ "$#" -lt 3 ]; then
14 |     echo "Error: need at least three arguments"
15 |     echo "Format: sh setup_install.sh <host> <cluster> <bin_dir>"
16 |     exit 1;
17 | fi
18 | 
19 | HOST=$1
20 | SSH=junchenj@$HOST
21 | PRE=-o\ "StrictHostKeyChecking=no"
22 | LOCAL_BIN_DIR=$3
23 | FRONT_SERVER=front_server
24 | SPARK=spark
25 | KAFKA=kafka
26 | TRACE=trace
27 | 
28 | REMOTE_USER_ROOT="/users/junchenj"
29 | REMOTE_LOG_ROOT="/users/junchenj/log"
30 | 
31 | echo "======== ${HOST}:     Creating ${REMOTE_USER_ROOT} ========"
32 | ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT
33 | ssh $PRE $SSH 'mkdir -p '$REMOTE_LOG_ROOT
34 | 
35 | [ -z $SPARK ] || {
36 |     echo "======== ${HOST}:     Setting up spark ========"
37 |     ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$SPARK
38 |     scp $PRE $LOCAL_BIN_DIR/'spark_deploy.sh' $SSH:$REMOTE_USER_ROOT/$SPARK
39 |     scp $PRE $LOCAL_BIN_DIR/'config.properties' $SSH:$REMOTE_USER_ROOT/$SPARK
40 |     ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$SPARK'; sudo bash spark_deploy.sh'
41 | }
42 | 
43 | [ -z $KAFKA ] || {
44 |     echo "======== ${HOST}:     Setting up kafka ========"
45 |     ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$KAFKA
46 |     scp $PRE $LOCAL_BIN_DIR/'kafka_deploy.sh' $SSH:$REMOTE_USER_ROOT/$KAFKA
47 |     scp $PRE $LOCAL_BIN_DIR/'zookeeper.properties' $SSH:$REMOTE_USER_ROOT/$KAFKA
48 |     IP=$(ssh $PRE $SSH getent hosts \$\(hostname\) | awk '{print $1}')
49 |     echo "Got IP = $IP"
50 |     ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$KAFKA'; sudo ./kafka_deploy.sh '$IP' 1'
51 | }
52 | 
53 | [ -z $FRONT_SERVER ] || {
54 |     echo "======== "$HOST":     Setting up front_server ========"
55 |     ssh $PRE $SSH 'mkdir -p '$REMOTE_USER_ROOT/$FRONT_SERVER
56 |     scp $PRE $LOCAL_BIN_DIR/'frontserver_deploy.sh' $SSH:$REMOTE_USER_ROOT/$FRONT_SERVER
57 |     ssh $PRE $SSH -t 'cd '$REMOTE_USER_ROOT/$FRONT_SERVER'; sudo ./frontserver_deploy.sh'
58 | }
59 | 
60 | echo "**************************************************************************"
61 | echo "*       Finish setup of $2 : $HOST"
62 | echo "**************************************************************************"
63 | 
64 | #echo "**************************************************************************"
65 | #echo "*       Done Setting up on "$host
66 | #echo "*       NEXT STEP: Open 6 new windws"
67 | #echo "*                  Run 'sh run_zookeeper.sh "$host"' in 1st window"
68 | #echo "*                  Run 'sh run_kafka.sh "$host"' in 2nd window"
69 | #echo "*                  Run 'sh run_groupmanager.sh -f "$host" -p' in 3rd window"
70 | #echo "*                  Run 'sh run_communicator.sh -f "$host" -b backendhost -p' in 4th window"
71 | #echo "*                  Run 'sh run_decisionmaker.sh -f "$host" -p' in 5th window"
72 | #echo "*                  Run 'sh run_uploadtrace.sh "$host" tracefile -p' in 6th window"
73 | #echo "**************************************************************************"
74 | #echo ""
75 | #echo ""
76 | #echo ""
77 | 


--------------------------------------------------------------------------------
/frontend/Communicator/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 |   <groupId>frontend</groupId>
 5 |   <artifactId>Communicator</artifactId>
 6 |   <packaging>jar</packaging>
 7 |   <version>1.0</version>
 8 |   <name>Communicator</name>
 9 |   <url>http://maven.apache.org</url>
10 |   <properties>
11 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
12 |   </properties>
13 |   <dependencies>
14 |     <dependency>
15 |       <groupId>junit</groupId>
16 |       <artifactId>junit</artifactId>
17 |       <version>3.8.1</version>
18 |       <scope>test</scope>
19 |     </dependency>
20 |     <dependency>
21 |       <groupId>org.json</groupId>
22 |       <artifactId>json</artifactId>
23 |       <version>20160212</version>
24 |     </dependency>
25 |     <dependency> <!-- Spark dependency -->
26 |       <groupId>org.apache.spark</groupId>
27 |       <artifactId>spark-core_2.10</artifactId>
28 |       <version>1.6.2</version>
29 |     </dependency>
30 |     <dependency> <!-- Spark Streaming dependency -->
31 |       <groupId>org.apache.spark</groupId>
32 |       <artifactId>spark-streaming_2.11</artifactId>
33 |       <version>2.0.0</version>
34 |     </dependency>
35 |     <dependency> <!-- SSKafka Dependency -->
36 |       <groupId>org.apache.spark</groupId>
37 |       <artifactId>spark-streaming-kafka_2.10</artifactId>
38 |       <version>1.6.2</version>
39 |     </dependency>
40 |     <dependency> <!-- Kafka dependency -->
41 |       <groupId>org.apache.kafka</groupId>
42 |       <artifactId>kafka-clients</artifactId>
43 |       <version>0.10.0.0</version>
44 |     </dependency>
45 |   </dependencies>
46 |   <build>
47 |     <plugins>
48 |       <plugin>
49 |         <!-- This plugin will package the dependencies into jar. Do not need to worry about runtime dependency -->
50 |         <groupId>org.apache.maven.plugins</groupId>
51 |         <artifactId>maven-shade-plugin</artifactId>
52 |         <version>2.4.3</version>
53 |         <configuration>
54 |           <!-- move the xml generated to target directory instead of base-dir -->
55 |           <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml</dependencyReducedPomLocation>
56 |           <!-- exclude manifest signature files -->
57 |           <filters>
58 |               <filter>
59 |                   <artifact>*:*</artifact>
60 |                   <excludes>
61 |                       <exclude>META-INF/*.SF</exclude>
62 |                       <exclude>META-INF/*.DSA</exclude>
63 |                       <exclude>META-INF/*.RSA</exclude>
64 |                   </excludes>
65 |               </filter>
66 |           </filters>
67 |         </configuration>
68 |         <executions>
69 |           <execution>
70 |             <phase>package</phase>
71 |             <goals>
72 |               <goal>shade</goal>
73 |             </goals>
74 |           </execution>
75 |         </executions>
76 |       </plugin>
77 |       <plugin>
78 |         <!-- This setting makes compiler using JAVA-1.7, while default setting using Java-1.3 -->
79 |         <groupId>org.apache.maven.plugins</groupId>
80 |         <artifactId>maven-compiler-plugin</artifactId>
81 |         <version>3.3</version>
82 |         <configuration>
83 |           <source>1.7</source>
84 |           <target>1.7</target>
85 |         </configuration>
86 |       </plugin>
87 |     </plugins>
88 |   </build>
89 | </project>
90 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 |   <groupId>frontend</groupId>
 5 |   <artifactId>DecisionMaker</artifactId>
 6 |   <packaging>jar</packaging>
 7 |   <version>1.0-SNAPSHOT</version>
 8 |   <name>DecisionMaker</name>
 9 |   <url>http://maven.apache.org</url>
10 |   <properties>
11 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
12 |   </properties>
13 |   <dependencies>
14 |     <dependency>
15 |       <groupId>junit</groupId>
16 |       <artifactId>junit</artifactId>
17 |       <version>3.8.1</version>
18 |       <scope>test</scope>
19 |     </dependency>
20 |     <dependency>
21 |       <groupId>org.json</groupId>
22 |       <artifactId>json</artifactId>
23 |       <version>20160212</version>
24 |     </dependency>
25 |     <dependency> <!-- Spark dependency -->
26 |       <groupId>org.apache.spark</groupId>
27 |       <artifactId>spark-core_2.10</artifactId>
28 |       <version>1.6.2</version>
29 |     </dependency>
30 |     <dependency> <!-- Spark Streaming dependency -->
31 |       <groupId>org.apache.spark</groupId>
32 |       <artifactId>spark-streaming_2.11</artifactId>
33 |       <version>2.0.0</version>
34 |     </dependency>
35 |     <dependency> <!-- SSKafka Dependency -->
36 |       <groupId>org.apache.spark</groupId>
37 |       <artifactId>spark-streaming-kafka_2.10</artifactId>
38 |       <version>1.6.2</version>
39 |     </dependency>
40 |     <dependency> <!-- Kafka dependency -->
41 |       <groupId>org.apache.kafka</groupId>
42 |       <artifactId>kafka-clients</artifactId>
43 |       <version>0.10.0.0</version>
44 |     </dependency>
45 |   </dependencies>
46 |   <build>
47 |     <plugins>
48 |       <plugin>
49 |         <!-- This plugin will package the dependencies into jar. Do not need to worry about runtime dependency -->
50 |         <groupId>org.apache.maven.plugins</groupId>
51 |         <artifactId>maven-shade-plugin</artifactId>
52 |         <version>2.4.3</version>
53 |         <configuration>
54 |           <!-- move the xml generated to target directory instead of base-dir -->
55 |           <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml</dependencyReducedPomLocation>
56 |           <!-- exclude manifest signature files -->
57 |           <filters>
58 |               <filter>
59 |                   <artifact>*:*</artifact>
60 |                   <excludes>
61 |                       <exclude>META-INF/*.SF</exclude>
62 |                       <exclude>META-INF/*.DSA</exclude>
63 |                       <exclude>META-INF/*.RSA</exclude>
64 |                   </excludes>
65 |               </filter>
66 |           </filters>
67 |         </configuration>
68 |         <executions>
69 |           <execution>
70 |             <phase>package</phase>
71 |             <goals>
72 |               <goal>shade</goal>
73 |             </goals>
74 |           </execution>
75 |         </executions>
76 |       </plugin>
77 |       <plugin>
78 |         <!-- This setting makes compiler using JAVA-1.7, while default setting using Java-1.3 -->
79 |         <groupId>org.apache.maven.plugins</groupId>
80 |         <artifactId>maven-compiler-plugin</artifactId>
81 |         <version>3.3</version>
82 |         <configuration>
83 |           <source>1.7</source>
84 |           <target>1.7</target>
85 |         </configuration>
86 |       </plugin>
87 |     </plugins>
88 |   </build>
89 | </project>
90 | 


--------------------------------------------------------------------------------
/sbin/run_communicator.sh:
--------------------------------------------------------------------------------
  1 | #/bin/bash
  2 | 
  3 | Host=""
  4 | Backend=""
  5 | 
  6 | if [ "$#" -lt 4 ]; then
  7 | echo "Error: need at least four argument"
  8 | echo "Format: sh run_communicator.sh [-option] --frontend host --backend host"
  9 | exit
 10 | fi
 11 | 
 12 | Rebuild=false
 13 | 
 14 | while test $# -gt 0; do
 15 |     case "$1" in
 16 |         -h|--help)
 17 |             echo "Format: sh run_communicator.sh [-option] host"
 18 |             echo "options:"
 19 |             echo "-h --help     show brief help"
 20 |             echo "-p --package  repackage before running"
 21 |             echo "-f --frontend set frontend host"
 22 |             echo "-b --backend  set backend host"
 23 |             exit 0
 24 |             ;;
 25 |         -p|--package)
 26 |             Rebuild=true
 27 |             shift
 28 |             ;;
 29 |         -f|--frontend)
 30 |             Host=$2
 31 |             shift
 32 |             ;;
 33 |         -b|--backend)
 34 |             Backend=$2
 35 |             shift
 36 |             ;;
 37 |         -*)
 38 |             echo "invalid option "$1
 39 |             exit 0
 40 |             ;;
 41 |         *)
 42 |             shift
 43 |             ;;
 44 |     esac
 45 | done
 46 | 
 47 | SSH="junchenj@"$Host
 48 | PRE=-o\ "StrictHostKeyChecking=no"
 49 | 
 50 | if [ -z "${PYTHEAS_HOME}" ]; then
 51 |   export PYTHEAS_HOME="$(cd "`dirname "$0"`"/..; pwd)"
 52 | fi
 53 | 
 54 | FRONTEND_HOME=${PYTHEAS_HOME}/frontend
 55 | FRONT_SERVER="front_server"
 56 | SPARK="spark"
 57 | KAFKA="kafka"
 58 | TRACE="trace"
 59 | 
 60 | REMOTE_USER_ROOT="/users/junchenj"
 61 | REMOTE_SYS_ROOT="/usr/share"
 62 | REMOTE_LOG_ROOT="/users/junchenj/log"
 63 | LOG_FILE=$REMOTE_LOG_ROOT"/log_communicator"
 64 | 
 65 | 
 66 | echo ""
 67 | echo ""
 68 | echo ""
 69 | echo "**************************************************************************"
 70 | echo "*       Starting Communicator on "$Host
 71 | echo "**************************************************************************"
 72 | 
 73 | CONF=$REMOTE_USER_ROOT/$SPARK"/config.properties"
 74 | 
 75 | if [ "$Rebuild" = true ] ; then
 76 | scp $PRE -r $FRONTEND_HOME/$SPARK/Communicator $SSH:$REMOTE_USER_ROOT/$SPARK/
 77 | ssh $PRE $SSH 'cd '$REMOTE_USER_ROOT/$SPARK/Communicator'; mvn package > '$LOG_FILE
 78 | fi
 79 | 
 80 | ##### get kafka pointer of the frontend
 81 | ssh $PRE $SSH  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
 82 | FrontendKafka=$(cat temp)':9092'
 83 | FrontendZookeeper=$(cat temp)':2181'
 84 | rm temp
 85 | echo "Frontend Kafka="$FrontendKafka" / "$FrontendZookeeper
 86 | 
 87 | ##### get kafka pointer of the frontend
 88 | ssh $PRE junchenj@$Backend  /sbin/ifconfig br-flat-lan-1 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}' > temp
 89 | BackendKafka=$(cat temp)':9092'
 90 | BackendZookeeper=$(cat temp)':2181'
 91 | rm temp
 92 | echo "Backend Kafka="$BackendKafka" / "$BackendZookeeper
 93 | 
 94 | ##### get and add topics
 95 | for TopicKey in 'updateTopic' 'uploadTopic' 'decisionTopic' 'subscribeTopic' 'forwardTopic' 'sampleTopic' 'aliveTopic'
 96 | do
 97 | ssh $PRE $SSH "cat $CONF | grep $TopicKey'=' | cut -d= -f2 | awk '{ print \$1}'" > temp
 98 | TOPIC=$(cat temp)
 99 | rm temp
100 | ssh $PRE $SSH -t 'cd '$REMOTE_SYS_ROOT/$KAFKA'; sudo bin/kafka-topics.sh --create --zookeeper '$FrontendZookeeper' --topic '$TOPIC' --partition 1 --replication-factor 1'
101 | echo "Added topic "$TopicKey"="$TOPIC
102 | done
103 | 
104 | ssh $PRE $SSH -t "sed -i 's/frontend1=10.11.10.3:9092/frontend1="$FrontendKafka"/g' "$CONF
105 | ssh $PRE $SSH -t "sed -i 's/backendBrokers=10.11.10.2:9092/backendBrokers="$BackendKafka"/g' "$CONF
106 | ssh $PRE $SSH -t "sed -i 's/frontend2=10.11.10.4:9092//g' "$CONF
107 | 
108 | ssh $PRE $SSH "sh -c 'cd $REMOTE_SYS_ROOT/$SPARK && sudo bin/spark-submit --class frontend.Communicator --master local --executor-memory 30G --total-executor-cores 1 --executor-cores 1 ~/spark/Communicator/target/Communicator-1.0.jar $CONF > $LOG_FILE 2>&1 &'"
109 | 
110 | echo "**************************************************************************"
111 | echo "*       Done Communicator on "$Host
112 | echo "**************************************************************************"
113 | echo ""
114 | echo ""
115 | echo ""
116 | 
117 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/algorithms/HistoryData_DUCB.java:
--------------------------------------------------------------------------------
  1 | package frontend;
  2 | 
  3 | import java.util.*;
  4 | import scala.Tuple2;
  5 | import org.apache.spark.streaming.api.java.*;
  6 | 
  7 | public class HistoryData extends HistoryObject {
  8 | 
  9 |     // self defined parameters
 10 |     public double gamma = 0.8;
 11 |     public int precisionTime = 10;
 12 |     // end of self defined
 13 | 
 14 |     public HistoryData(JavaStreamingContext jssc) {
 15 |         super(jssc);
 16 |         windowSize = -1;
 17 |     }
 18 | 
 19 |    /*
 20 |     * implement this method for combination of old data and new data
 21 |     */
 22 |     public Tuple2<String, Map<String, List<Double>>> combineCall(Tuple2<String, Tuple2<
 23 |             Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
 24 |             >> tuple2) {
 25 |         Map<String, List<Double>> oldData=null, newData=null;
 26 |         Iterator<Map<String, List<Double>>> iter;
 27 |         iter = tuple2._2()._1().iterator();
 28 |         if (iter.hasNext())
 29 |             newData = iter.next();
 30 |         iter = tuple2._2()._2().iterator();
 31 |         if (iter.hasNext())
 32 |             oldData = iter.next();
 33 |         if (newData != null) {
 34 |             // calculate the average for new records
 35 |             for (Map.Entry<String, List<Double>> entry : newData.entrySet()) {
 36 |                 List<Double> scores = entry.getValue();
 37 |                 double totalscore = 0;
 38 |                 for (double score : scores)
 39 |                     totalscore += score;
 40 |                 // here becomes 2-elements list: [sum, size]
 41 |                 List<Double> countedScore = new ArrayList<>();
 42 |                 countedScore.add(totalscore);
 43 |                 countedScore.add((double)scores.size());
 44 |                 entry.setValue(countedScore);
 45 |             }
 46 |             // combine the old data and new data
 47 |             if (oldData != null) {
 48 |                 for (Map.Entry<String, List<Double>> oldEntry : oldData.entrySet()) {
 49 |                     List<Double> newValue = newData.get(oldEntry.getKey());
 50 |                     List<Double> oldValue = oldEntry.getValue();
 51 |                     if (newValue != null) {
 52 |                         newValue.set(0, newValue.get(0) + oldValue.get(0) * this.gamma);
 53 |                         newValue.set(1, newValue.get(1) + oldValue.get(1) * this.gamma);
 54 |                     } else {
 55 |                         newData.put(oldEntry.getKey(), oldValue);
 56 |                     }
 57 |                 }
 58 |             }
 59 |             return new Tuple2(tuple2._1(), newData);
 60 |         } else {
 61 |             // discount the old data
 62 |             if (oldData != null) {
 63 |                 for (Map.Entry<String, List<Double>> oldEntry : oldData.entrySet()) {
 64 |                     List<Double> oldValue = oldEntry.getValue();
 65 |                     oldValue.set(0, oldValue.get(0) * this.gamma);
 66 |                     oldValue.set(1, oldValue.get(1) * this.gamma);
 67 |                 }
 68 |             }
 69 |             return new Tuple2(tuple2._1(), oldData);
 70 |         }
 71 |     }
 72 | 
 73 |     /*
 74 |      * implement this method for decision making
 75 |      */
 76 |     public String getDecision(Map<String, List<Double>> decisionStatMap) {
 77 |         // here is just a convert from List<Double> to double[]
 78 |         // the latter is much easier for process here
 79 |         Map<String, double[]> tmpMap = new HashMap<String, double[]>();
 80 |         for (Map.Entry<String, List<Double>> entry : decisionStatMap.entrySet()) {
 81 |             tmpMap.put(entry.getKey(), new double[]{entry.getValue().get(0), entry.getValue().get(1)});
 82 |         }
 83 |         double N = 0;
 84 |         for (Map.Entry<String, double[]> entry : tmpMap.entrySet()) {
 85 |             N += entry.getValue()[1];
 86 |             if (entry.getValue()[1] > 0)
 87 |                 entry.getValue()[0] /= entry.getValue()[1];
 88 |             else
 89 |                 entry.getValue()[0] = 0;
 90 |         }
 91 |         double score, maxScore;
 92 |         String bestDecision = "";
 93 |         String decisions = "";
 94 |         double[] bestDecisionInfo;
 95 |         double Bsqrt2logN = 0;
 96 |         for (int j = 0; j < precisionTime; j++) {
 97 |             maxScore = -Double.MAX_VALUE;
 98 |             Bsqrt2logN = 0;
 99 |             // if N <= 1, then it will be a negative number or zero.
100 |             // in this case, we will not compute the Ct(y,i)
101 |             if (N > 1)
102 |                 Bsqrt2logN = 1000 * Math.sqrt(2 * Math.log(N));
103 |             for (Map.Entry<String, double[]> entry : tmpMap.entrySet()) {
104 |                 if (entry.getValue()[1] > 0)
105 |                     score = entry.getValue()[0] + Bsqrt2logN / Math.sqrt(entry.getValue()[1]);
106 |                 else
107 |                     score = 0;
108 |                 if (score > maxScore) {
109 |                     bestDecision = entry.getKey();
110 |                     maxScore = score;
111 |                 }
112 |             }
113 |             decisions += bestDecision + ":";
114 |             bestDecisionInfo = tmpMap.get(bestDecision);
115 |             bestDecisionInfo[1] += 1;
116 |             N += 1;
117 |             // discount for next precision
118 |             N *= this.gamma;
119 |             for (Map.Entry<String, double[]> entry : tmpMap.entrySet()) {
120 |                  entry.getValue()[1] *= this.gamma;
121 |             }
122 |         }
123 |         return decisions;
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/src/main/java/frontend/GroupTableUpdater.java:
--------------------------------------------------------------------------------
  1 | package frontend;
  2 | 
  3 | import java.io.*;
  4 | import java.util.Arrays;
  5 | import java.util.ArrayList;
  6 | import java.util.Iterator;
  7 | import java.util.Properties;
  8 | import java.util.concurrent.ConcurrentHashMap;
  9 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 10 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 11 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 12 | import org.json.JSONObject;
 13 | import org.json.JSONArray;
 14 | 
 15 | /**
 16 |  * Fetch group table from Kafka and maintain it
 17 |  *
 18 |  * Author: Shijie Sun
 19 |  * Email: septimus145@gmail.com
 20 |  * August, 2016
 21 |  */
 22 | 
 23 | 
 24 | public class GroupTableUpdater implements Runnable {
 25 | 
 26 |     protected String brokerList = "";		// list of broker
 27 |     protected String hostname = "";		// name of current host
 28 |     public KafkaConsumer<String, String> consumer = null;       // kafka consumer
 29 |     public ConcurrentHashMap<String, String> group2ClusterMap = null;
 30 | 
 31 |     public GroupTableUpdater( String hostname, String clusterID, String brokerList, ConcurrentHashMap<String, String> group2ClusterMap ) {
 32 |         this.hostname = hostname;
 33 |         this.brokerList = brokerList;
 34 |         this.group2ClusterMap = group2ClusterMap;
 35 |         group2ClusterMap.put("null", clusterID);
 36 |         // setup consumer
 37 |         Properties consumerProps = new Properties();
 38 |         consumerProps.put("bootstrap.servers", brokerList);
 39 |         consumerProps.put("group.id", this.hostname);
 40 |         consumerProps.put("enable.auto.commit", "true");
 41 |         consumerProps.put("auto.commit.interval.ms", "1000");
 42 |         consumerProps.put("session.timeout.ms", "30000");
 43 |         consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 44 |         consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 45 |         this.consumer = new KafkaConsumer<>(consumerProps);
 46 |         consumer.subscribe(Arrays.asList("group_table"));
 47 |     }
 48 | 
 49 |     public void run() {
 50 |         while (true) {
 51 |             ConsumerRecords<String, String> records = this.consumer.poll(1000);
 52 |             for (ConsumerRecord<String, String> record : records) {
 53 |                 JSONObject jObject = new JSONObject(record.value());
 54 | 
 55 |                 // if it is group to cluster map, save it
 56 |                 if (jObject.has("GroupAssignment")) {
 57 |                     JSONArray jArray = jObject.getJSONArray("GroupAssignment");
 58 |                     for (int i = 0; i < jArray.length(); i++) {
 59 |                         group2ClusterMap.put(jArray.getJSONObject(i).getString("GroupName"), jArray.getJSONObject(i).getString("Cluster"));
 60 |                     }
 61 |                 }
 62 | 
 63 |                 // if it is feature values to group map, generate new PHP code
 64 |                 if (jObject.has("GroupingRules")) {
 65 |                     try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("/var/www/info/match.php"), "utf-8"))) {
 66 |                         JSONObject jObjectRules = jObject.getJSONObject("GroupingRules");
 67 |                         ArrayList<String> code = phpGenerator(jObjectRules);
 68 |                         Iterator<String> codeIter = code.iterator();
 69 |                         while (codeIter.hasNext()) {
 70 |                             writer.write(codeIter.next());
 71 |                             writer.newLine();
 72 |                         }
 73 |                     } catch (Exception e) {
 74 |                         System.err.println("Caught Exception: " + e.getMessage());
 75 |                     }
 76 |                 }
 77 |             }
 78 |             try {
 79 |                 Thread.sleep(1000);
 80 |             } catch(InterruptedException ex) {
 81 |                 Thread.currentThread().interrupt();
 82 |             }
 83 |         }
 84 |     }
 85 | 
 86 |     private ArrayList<String> phpGenerator (JSONObject jObjectRules) {
 87 |         ArrayList<String> code = new ArrayList<String>();
 88 |         code.add("<?php");
 89 |         code.add("// Match the request into groups, called by update.php");
 90 |         code.add("//");
 91 |         code.add("// Author: Shijie Sun");
 92 |         code.add("// Email: septimus145@gmail.com");
 93 |         code.add("// August, 2016");
 94 |         code.add("");
 95 |         code.add("// Hash the features and find the match");
 96 |         ruleParser(jObjectRules, code, 0);
 97 |         code.add("?>");
 98 |         return code;
 99 |     }
100 | 
101 |     private void ruleParser (JSONObject jObjectRules, ArrayList<String> code, int indent) {
102 |         String indentStr = new String(new char[indent]).replace('\0', ' ');
103 |         if (jObjectRules.has("Field")) {
104 |             JSONArray jArrayTable = jObjectRules.getJSONArray("Table");
105 |             String field = jObjectRules.getString("Field");
106 |             for (int i = 0; i < jArrayTable.length(); i++) {
107 |                 JSONObject jObjectRule = jArrayTable.getJSONObject(i);
108 |                 code.add(indentStr + "if ($features[" + field + "] == \"" + jObjectRule.getString("Key") + "\") {");
109 |                 ruleParser(jObjectRule.getJSONObject("Rule"), code, indent+2);
110 |                 code.add(indentStr + "}");
111 |             }
112 |         } else if (jObjectRules.has("GroupName")) {
113 |             code.add(indentStr + "$group_id = \"" + jObjectRules.getString("GroupName") + "\";");
114 |         }
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/frontend/GroupManager/src/main/java/frontend/InfoSender.java:
--------------------------------------------------------------------------------
  1 | package frontend;
  2 | 
  3 | import java.io.*;
  4 | import java.util.List;
  5 | import java.util.Arrays;
  6 | import java.util.Properties;
  7 | import java.util.concurrent.ConcurrentHashMap;
  8 | import org.apache.commons.lang3.StringUtils;
  9 | import org.apache.kafka.clients.producer.KafkaProducer;
 10 | import org.apache.kafka.clients.producer.ProducerRecord;
 11 | import org.json.JSONObject;
 12 | 
 13 | /**
 14 |  * Retrive the info of updates from file and send them to Kafka server
 15 |  *
 16 |  * Author: Shijie Sun
 17 |  * Email: septimus145@gmail.com
 18 |  * August, 2016
 19 |  */
 20 | 
 21 | public class InfoSender implements Runnable {
 22 | 
 23 |     protected String brokerList = "";		// list of broker
 24 |     protected String clusterID = "";
 25 |     public KafkaProducer<String, String> producer = null;	// kafka producer
 26 |     public ConcurrentHashMap<String, String> group2ClusterMap = null;
 27 |     public int[] reducedUpdateLabelIndexes = null;
 28 | 
 29 |     public InfoSender( String brokerList, String clusterID, ConcurrentHashMap<String, String> group2ClusterMap, String configFile ) {
 30 |         // get reduced labels index
 31 |         try (BufferedReader br = new BufferedReader(new FileReader(configFile))) {
 32 |             List<String> updateLabelsList = Arrays.asList(br.readLine().split("\t"));
 33 |             String[] reducedUpdateLabels = br.readLine().split("\t");
 34 |             reducedUpdateLabelIndexes = new int[reducedUpdateLabels.length];
 35 |             for (int i=0; i < reducedUpdateLabels.length; i++) {
 36 |                 reducedUpdateLabelIndexes[i] = updateLabelsList.indexOf(reducedUpdateLabels[i]);
 37 |             }
 38 |             System.out.println(Arrays.toString(this.reducedUpdateLabelIndexes));
 39 |         } catch (Exception e) {
 40 |             System.err.println("Read config file failed: " + e.getMessage());
 41 |         }
 42 | 
 43 |         this.brokerList = brokerList;
 44 |         this.clusterID = clusterID;
 45 |         this.group2ClusterMap = group2ClusterMap;
 46 |         // setup producer
 47 |         Properties producerProps = new Properties();
 48 |         producerProps.put("bootstrap.servers", brokerList);
 49 |         producerProps.put("acks", "all");
 50 |         producerProps.put("retries", 0);
 51 |         producerProps.put("batch.size", 16384);
 52 |         producerProps.put("linger.ms", 1);
 53 |         producerProps.put("buffer.memory", 33554432);
 54 |         producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 55 |         producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 56 |         this.producer = new KafkaProducer<String, String>(producerProps);
 57 |     }
 58 | 
 59 |     public void run() {
 60 |         while(true) {
 61 |             try {
 62 |                 Thread.sleep(100);
 63 |             } catch (InterruptedException e) {
 64 |                 Thread.currentThread().interrupt();
 65 |             }
 66 |             try {
 67 |                 File file = new File("/var/www/info/info_queue");
 68 |                 File file2 = new File("/var/www/info/info_queue2");
 69 |                 file.renameTo(file2);
 70 |             } catch (Exception e2) {
 71 |                 //System.err.println("Change file Exception: " + e2.getMessage());
 72 |             }
 73 |             try (BufferedReader br = new BufferedReader(new FileReader("/var/www/info/info_queue2"))) {
 74 |                 String line;
 75 |                 String topic;
 76 |                 int i = 0;
 77 |                 // foreach record
 78 |                 while ((line = br.readLine()) != null) {
 79 |                     i++;
 80 |                     // if record's group belongs to current cluster, topic is "internal_groups", otherwise topic is "external_groups"
 81 |                     JSONObject jObject = new JSONObject(line);
 82 |                     // for uploading
 83 |                     ProducerRecord<String, String> data = new ProducerRecord<>("upload", jObject.getString("update"));
 84 |                     this.producer.send(data);
 85 |                     // for processing
 86 |                     String cluster;
 87 |                     if (group2ClusterMap.containsKey(jObject.getString("group_id")))
 88 |                         cluster = group2ClusterMap.get(jObject.getString("group_id"));
 89 |                     else // if no cluster to map to, deal this group wthin current cluster
 90 |                         cluster = this.clusterID;
 91 |                     if (cluster.equals(this.clusterID))
 92 |                         topic = "internal_groups";
 93 |                     else
 94 |                         topic = "external_groups";
 95 |                     jObject.put("cluster_id", cluster);
 96 |                     String[] update = jObject.getString("update").split("\t");
 97 |                     String[] reducedupdate = new String[this.reducedUpdateLabelIndexes.length];
 98 |                     for (int j=0; j < reducedUpdateLabelIndexes.length; j++) {
 99 |                          reducedupdate[j] = update[reducedUpdateLabelIndexes[j]];
100 |                     }
101 |                     jObject.put("update", StringUtils.join(reducedupdate, "\t"));
102 |                     data = new ProducerRecord<>(topic, jObject.toString());
103 |                     this.producer.send(data);
104 |                 }
105 |                 System.out.printf("Send %d msgs!\n",i);
106 |             } catch (Exception e3) {
107 |                 //System.err.println("Read file Exception: " + e3.getMessage());
108 |             }
109 |             try {
110 |                 File file = new File("/var/www/info/info_queue2");
111 |                 file.delete();
112 |             } catch (Exception e4) {
113 |                 //System.err.println("Deletc file Exception: " + e4.getMessage());
114 |             }
115 |         }
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/frontend/trace/trace_parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # parse the trace and test
  4 | #
  5 | # Author: Shijie Sun
  6 | # Email: septimus145@gmail.com
  7 | # Sept, 2016
  8 | 
  9 | import time
 10 | import threading
 11 | import urllib
 12 | import urllib2
 13 | import Queue
 14 | import sys
 15 | from itertools import izip
 16 | 
 17 | 
 18 | TIMEOUT = 3     # timeout restriction for requests
 19 | UPDATE_DELAY = 2    # delay time from receiving decision to send update
 20 | 
 21 | 
 22 | URL = ''
 23 | trace_start_time = 0
 24 | trace_finish_time = 0
 25 | update_queue = Queue.Queue() # message queue for request
 26 | request_num = []   # number of requests sent and succeeded [[send1, succeeded1], ... , [send2, succeeded2]]
 27 | load_dict_list = []
 28 | cost_list = []
 29 | 
 30 | 
 31 | def request_performer(*trace):
 32 |     global update_queue
 33 |     global request_num
 34 |     global load_dict_list
 35 |     global cost_list
 36 | 
 37 |     curr_second = trace[0] - trace_start_time
 38 |     curr_minute = curr_second / 60
 39 |     request_num[curr_second][0] += 1
 40 |     values = {'payload' : trace[1] + '\t'.join(trace[2].keys()), 'method' : 'request'}
 41 |     decision = ''
 42 |     try:
 43 |         con = urllib2.urlopen(URL, urllib.urlencode(values), timeout=TIMEOUT)
 44 |         decision = con.read().strip()
 45 |     except Exception as inst:
 46 |         print(inst)
 47 |         return
 48 |     # if decision is not in decision_list
 49 |     if not trace[2].has_key(decision):
 50 |         return
 51 |     request_num[curr_second][1] += 1
 52 |     # update the load dict
 53 |     if not load_dict_list[curr_minute].has_key(decision):
 54 |         load_dict_list[curr_minute][decision] = 1
 55 |     else:
 56 |         load_dict_list[curr_minute][decision] += 1
 57 |     cost_factor = 1
 58 |     if sum(load_dict_list[curr_minute].values()) > 0:
 59 |         load = load_dict_list[curr_minute][decision] / float(load_dict_list[curr_minute]['total_sessions'])
 60 |         for key in sorted(trace[3][decision].keys(), reverse=True):
 61 |             if load > key:
 62 |                 cost_factor = trace[3][decision][key]
 63 |                 break
 64 |     cost = cost_factor * float(trace[2][decision])
 65 |     cost_list[curr_second] += cost
 66 |     update_str = trace[1] + decision + '\t' + str(cost)
 67 |     update_queue.put([time.time() + UPDATE_DELAY, update_str])
 68 | 
 69 | 
 70 | def update_performer():
 71 |     global update_queue
 72 |     while True:
 73 |         while update_queue.empty():
 74 |             time.sleep(0.05)
 75 |         info = update_queue.get()
 76 |         while time.time() < info[0]:
 77 |             time.sleep(0.05)
 78 |         try:
 79 |             con = urllib2.urlopen(URL, urllib.urlencode({'payload' : info[1], 'method' : 'update'}), timeout=TIMEOUT)
 80 |         except Exception as inst:
 81 |             print(inst)
 82 | 
 83 | 
 84 | if __name__ == '__main__':
 85 |     #global URL
 86 |     #global trace_start_time
 87 |     #global trace_finish_time
 88 |     #global update_queue
 89 |     #global request_num
 90 |     #global load_dict_list
 91 |     #global cost_list
 92 | 
 93 |     if len(sys.argv) < 3:
 94 |         print "Usage: ", sys.argv[0], "url trace_file"
 95 |         sys.exit(1)
 96 |     URL = sys.argv[1]
 97 |     trace_list = []
 98 | 
 99 |     # load the trace
100 |     with open(sys.argv[2]) as fin:
101 |         # seek to the beginning of the file and read all traces
102 |         fin.seek(0)
103 |         for trace in fin.readlines():
104 |             [feature, info] = trace.split('DecisionMap')
105 |             trace_time = int(feature.split('\t',1)[0]) / 1000
106 |             [decision_str, load_str] = info.strip().split('LoadMap')
107 |             decision_map = dict(decision.split(',') for decision in decision_str.strip().split('\t'))
108 |             load_map = dict([load.split(',')[0], load.split(',')[1].split(';')] for load in load_str.strip().split('\t'))
109 |             for load in load_map:
110 |                 load_map[load] = dict(zip(load_map[load][0::2], load_map[load][1::2]))
111 |             trace_list.append([trace_time, feature, decision_map, load_map])
112 | 
113 |     # initialize
114 |     trace_start_time = trace_list[0][0]
115 |     trace_stop_time = trace_list[len(trace_list) - 1][0]
116 |     request_num = [[0,0] for i in range(trace_stop_time - trace_start_time + 1)]
117 |     load_dict_list = [{} for i in range((trace_stop_time - trace_start_time)/60 + 1)]
118 |     cost_list = [0 for i in range(trace_stop_time - trace_start_time + 1)]
119 | 
120 |     for load_dict in load_dict_list:
121 |         load_dict['total_sessions'] = 0
122 |     for trace in trace_list:
123 |         load_dict_list[(trace[0] - trace_start_time) / 60]['total_sessions'] += 1
124 | 
125 |     update_thread = threading.Thread(target=update_performer)
126 |     update_thread.daemon = True
127 |     update_thread.start()
128 | 
129 |     test_start_time = time.time()
130 |     test_second = 0
131 |     send_num = 0
132 | 
133 |     newoutput = 'result-detail.txt'
134 |     foutnew = open(newoutput, 'w')
135 |     foutnew.write("")
136 |     foutnew.close()
137 |     
138 |     fout = open('result.txt','w')
139 |     # start the test
140 |     print "------------------------------ %3d sec" % test_second
141 |     for trace in trace_list:
142 |         while (time.time() - test_start_time) < (trace[0] - trace_start_time):
143 |             time.sleep(0.05)
144 |         if int(time.time() - test_start_time) > test_second:
145 |             test_second = int(time.time() - test_start_time)
146 |             print "| send %d, average cost %d" % (send_num, cost_list[test_second-1]/request_num[test_second-1][1])
147 |             send_num = 0
148 |             foutnew = open(newoutput, 'a')
149 |             foutnew.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n')
150 |             foutnew.close()
151 |             fout.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n')
152 |             print "------------------------------ %3d sec" % test_second
153 |             #print request_num
154 |             #print load_dict_list
155 |             #print cost_list
156 |         thread = threading.Thread(target=request_performer, args=(trace))
157 |         thread.daemon = True
158 |         thread.start()
159 |         send_num += 1
160 | 
161 |     # wait all the requests and updates are finished
162 |     time.sleep(TIMEOUT * 2)
163 | 
164 |     fout.close()
165 |     print request_num
166 |     print cost_list
167 |     #with open('result.txt', 'w') as fout:
168 |     #    for i in range(len(cost_list)):
169 |     #        fout.write(str(cost_list[i] / request_num[i][1]) + '\n')
170 | 
171 | 


--------------------------------------------------------------------------------
/frontend/trace/trace_parser_mulit.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # parse the trace and test
  4 | #
  5 | # Author: Shijie Sun
  6 | # Email: septimus145@gmail.com
  7 | # Sept, 2016
  8 | 
  9 | import time
 10 | import threading
 11 | import urllib
 12 | import urllib2
 13 | import Queue
 14 | import sys
 15 | import random
 16 | from itertools import izip
 17 | 
 18 | 
 19 | TIMEOUT = 3     # timeout restriction for requests
 20 | UPDATE_DELAY = 2    # delay time from receiving decision to send update
 21 | 
 22 | 
 23 | URL = []
 24 | trace_start_time = 0
 25 | trace_finish_time = 0
 26 | update_queue = Queue.Queue() # message queue for request
 27 | request_num = []   # number of requests sent and succeeded [[send1, succeeded1], ... , [send2, succeeded2]]
 28 | load_dict_list = []
 29 | cost_list = []
 30 | 
 31 | 
 32 | def request_performer(*trace):
 33 |     global update_queue
 34 |     global request_num
 35 |     global load_dict_list
 36 |     global cost_list
 37 | 
 38 |     curr_second = trace[0] - trace_start_time
 39 |     curr_minute = curr_second / 60
 40 |     request_num[curr_second][0] += 1
 41 |     values = {'payload' : trace[1] + '\t'.join(trace[2].keys()), 'method' : 'request'}
 42 |     decision = ''
 43 |     url_idx = trace[4] % len(URL)
 44 |     try:
 45 |         con = urllib2.urlopen(URL[url_idx], urllib.urlencode(values), timeout=TIMEOUT)
 46 |         decision = con.read().strip()
 47 |     except Exception as inst:
 48 |         print(inst)
 49 |         request_num[curr_second][1] += 1
 50 |         decision = trace[2].keys()[2]
 51 |         print "IM in trouble ---" + str(trace[2][decision])
 52 |         fout1.write("%d,%s,%s\n"%(url_idx,"local",str(trace[2][decision])))
 53 |         cost_list[curr_second] += float(trace[2][decision])
 54 |         return
 55 |     # if decision is not in decision_list
 56 |     if not trace[2].has_key(decision):
 57 |         return
 58 |     request_num[curr_second][1] += 1
 59 |     # update the load dict
 60 |     if not load_dict_list[curr_minute].has_key(decision):
 61 |         load_dict_list[curr_minute][decision] = 1
 62 |     else:
 63 |         load_dict_list[curr_minute][decision] += 1
 64 |     cost_factor = 1
 65 |     #if sum(load_dict_list[curr_minute].values()) > 0:
 66 |     #    load = load_dict_list[curr_minute][decision] / float(load_dict_list[curr_minute]['total_sessions'])
 67 |     #    for key in sorted(trace[3][decision].keys(), reverse=True):
 68 |     #        if load > key:
 69 |     #            cost_factor = trace[3][decision][key]
 70 |     #            break
 71 |     cost = cost_factor * float(trace[2][decision])
 72 |     fout1.write("%d,%s,%s\n"%(url_idx,"online",str(trace[2][decision])))
 73 |     print "IM ok ---" + str(trace[2][decision])
 74 |     cost_list[curr_second] += cost
 75 |     update_str = trace[1] + decision + '\t' + str(cost)
 76 |     update_queue.put([time.time() + UPDATE_DELAY, update_str, url_idx])
 77 | 
 78 | 
 79 | def update_performer():
 80 |     global update_queue
 81 |     while True:
 82 |         while update_queue.empty():
 83 |             time.sleep(0.05)
 84 |         info = update_queue.get()
 85 |         while time.time() < info[0]:
 86 |             time.sleep(0.05)
 87 |         try:
 88 |             con = urllib2.urlopen(URL[info[2]], urllib.urlencode({'payload' : info[1], 'method' : 'update'}), timeout=TIMEOUT)
 89 |         except Exception as inst:
 90 |             print(inst)
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     #global URL
 95 |     #global trace_start_time
 96 |     #global trace_finish_time
 97 |     #global update_queue
 98 |     #global request_num
 99 |     #global load_dict_list
100 |     #global cost_list
101 | 
102 |     if len(sys.argv) < 3:
103 |         print "Usage: ", sys.argv[0], "url trace_file"
104 |         sys.exit(1)
105 |     URL = sys.argv[1].split(",")
106 |     trace_list = []
107 | 
108 |     # load the trace
109 |     with open(sys.argv[2]) as fin:
110 |         # seek to the beginning of the file and read all traces
111 |         fin.seek(0)
112 |         j = 0
113 |         for trace in fin.readlines():
114 |             [feature, info] = trace.split('DecisionMap')
115 |             trace_time = int(feature.split('\t',1)[0]) / 1000
116 |             [decision_str, load_str] = info.strip().split('LoadMap')
117 |             decision_map = dict(decision.split(',') for decision in decision_str.strip().split('\t'))
118 |             load_map = dict([load.split(',')[0], load.split(',')[1].split(';')] for load in load_str.strip().split('\t'))
119 |             for load in load_map:
120 |                 load_map[load] = dict(zip(load_map[load][0::2], load_map[load][1::2]))
121 |             trace_list.append([trace_time, feature, decision_map, load_map, j])
122 |             j+=1
123 | 
124 |     # initialize
125 |     trace_start_time = trace_list[0][0]
126 |     trace_stop_time = trace_list[len(trace_list) - 1][0]
127 |     request_num = [[0,0] for i in range(trace_stop_time - trace_start_time + 1)]
128 |     load_dict_list = [{} for i in range((trace_stop_time - trace_start_time)/60 + 1)]
129 |     cost_list = [0 for i in range(trace_stop_time - trace_start_time + 1)]
130 | 
131 |     for load_dict in load_dict_list:
132 |         load_dict['total_sessions'] = 0
133 |     for trace in trace_list:
134 |         load_dict_list[(trace[0] - trace_start_time) / 60]['total_sessions'] += 1
135 | 
136 |     update_thread = threading.Thread(target=update_performer)
137 |     update_thread.daemon = True
138 |     update_thread.start()
139 | 
140 |     test_start_time = time.time()
141 |     test_second = 0
142 |     send_num = 0
143 |     fout1 = open('separa_result','w')
144 |     fout = open('result.txt','w')
145 |     # start the test
146 |     print "------------------------------ %3d sec" % test_second
147 |     for trace in trace_list:
148 |         while (time.time() - test_start_time) < (trace[0] - trace_start_time):
149 |             time.sleep(0.05)
150 |         if int(time.time() - test_start_time) > test_second:
151 |             test_second = int(time.time() - test_start_time)
152 |             print "| send %d, average cost %d" % (send_num, cost_list[test_second-1]/request_num[test_second-1][1])
153 |             send_num = 0
154 |             fout.write(str(cost_list[test_second-1] / request_num[test_second-1][1]) + '\n')
155 |             print "------------------------------ %3d sec" % test_second
156 |         thread = threading.Thread(target=request_performer, args=(trace))
157 |         thread.daemon = True
158 |         thread.start()
159 |         send_num += 1
160 | 
161 |     # wait all the requests and updates are finished
162 |     time.sleep(TIMEOUT * 2)
163 | 
164 |     fout.close()
165 |     fout1.close()
166 |     print request_num
167 |     print cost_list
168 |     #with open('result.txt', 'w') as fout:
169 |     #    for i in range(len(cost_list)):
170 |     #        fout.write(str(cost_list[i] / request_num[i][1]) + '\n')
171 | 
172 | 


--------------------------------------------------------------------------------
/frontend/DecisionMaker/src/main/java/frontend/DecisionMaker.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Consume messages from one or more topics in Kafka and make decisions.
  3 |  *
  4 |  * Author: Shijie Sun
  5 |  * Email: septimus145@gmail.com
  6 |  * August, 2016
  7 |  */
  8 | 
  9 | package frontend;
 10 | 
 11 | import java.util.*;
 12 | import java.util.regex.Pattern;
 13 | import java.util.concurrent.ConcurrentLinkedQueue;
 14 | import java.io.*;
 15 | 
 16 | import scala.Tuple2;
 17 | 
 18 | import org.json.JSONObject;
 19 | import org.json.JSONArray;
 20 | 
 21 | import kafka.serializer.StringDecoder;
 22 | import org.apache.kafka.clients.producer.KafkaProducer;
 23 | import org.apache.kafka.clients.producer.ProducerRecord;
 24 | 
 25 | import org.apache.spark.SparkConf;
 26 | import org.apache.spark.rdd.RDD;
 27 | import org.apache.spark.api.java.function.*;
 28 | import org.apache.spark.api.java.JavaRDD;
 29 | import org.apache.spark.api.java.JavaPairRDD;
 30 | import org.apache.spark.streaming.api.java.*;
 31 | import org.apache.spark.streaming.kafka.*;
 32 | import org.apache.spark.streaming.Durations;
 33 | 
 34 | // for changing logger config
 35 | import org.apache.log4j.Logger;
 36 | import org.apache.log4j.Level;
 37 | 
 38 | 
 39 | 
 40 | public final class DecisionMaker {
 41 | 
 42 |   public final static int processInterval = 2; // seconds
 43 | 
 44 |   public static void main(String[] args) throws Exception {
 45 |     if (args.length < 3) {
 46 |       System.err.println("Usage: DecisionMaker <brokers> <topic-in> <topic-out>\n" +
 47 |           "  <brokers> is a list of one or more Kafka brokers\n" +
 48 |           "  <topic-in> is the kafka topic to consume from\n" +
 49 |           "  <topic-out> is the kafka topic to publish the decision to\n");
 50 |       System.exit(1);
 51 |     }
 52 | 
 53 |     Logger.getLogger("org").setLevel(Level.OFF);
 54 |     Logger.getLogger("akka").setLevel(Level.OFF);
 55 | 
 56 |     // parse the arguments
 57 |     final String brokers = args[0];
 58 |     String topicIn = args[1];
 59 |     final String topicOut = args[2];
 60 |     // final double gamma = Double.parseDouble(args[3]);
 61 |     // final int precisionTime = Integer.parseInt(args[4]);
 62 | 
 63 |     // setup producer
 64 |     final Properties producerProps = new Properties();
 65 |     producerProps.put("bootstrap.servers", brokers);
 66 |     producerProps.put("acks", "all");
 67 |     producerProps.put("retries", 0);
 68 |     producerProps.put("batch.size", 16384);
 69 |     producerProps.put("linger.ms", 1);
 70 |     producerProps.put("buffer.memory", 33554432);
 71 |     producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 72 |     producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 73 | 
 74 |     // Create context with `processInterval` batch interval
 75 |     SparkConf sparkConf = new SparkConf().setAppName("DicisionMaker");
 76 |     final JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(processInterval));
 77 | 
 78 |     // Create direct kafka stream with brokers and topic
 79 |     Set<String> topicSet = new HashSet<>(Arrays.asList(topicIn));
 80 |     Map<String, String> kafkaParams = new HashMap<>();
 81 |     kafkaParams.put("metadata.broker.list", brokers);
 82 |     JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(
 83 |         jssc,
 84 |         String.class,
 85 |         String.class,
 86 |         StringDecoder.class,
 87 |         StringDecoder.class,
 88 |         kafkaParams,
 89 |         topicSet
 90 |     );
 91 | 
 92 |     // create a class to restore the history data
 93 |     final HistoryData historyData = new HistoryData(jssc);
 94 | 
 95 |     // map to pair to retrieve the data and group_id
 96 |     // then reduce by key to combine the performance of each batch
 97 |     JavaPairDStream<String, Map<String, List<Double>>> batchQualitySums = messages.mapToPair(
 98 |         // extract info of each update
 99 |         new PairFunction<Tuple2<String, String>, String, List<Double>>() {
100 |             @Override
101 |             public Tuple2<String, List<Double>> call(Tuple2<String, String> tuple2) {
102 |                 JSONObject jObject = new JSONObject(tuple2._2().trim());
103 |                 String group_id = jObject.getString("group_id");
104 |                 String[] updates = jObject.getString("update").split("\t");
105 |                 String decision = updates[0];
106 |                 // reverse the score because higher score should represent better performance
107 |                 double score = 0 - Double.parseDouble(updates[1]);
108 |                 List<Double> scores = new ArrayList<Double>();
109 |                 scores.add(score);
110 |                 return new Tuple2<>(group_id + ":" + decision, scores);
111 |             }
112 |         // count quantity and total score for (group_id:decision)
113 |         }).reduceByKey(
114 |             new Function2<List<Double>, List<Double>, List<Double>>() {
115 |                 @Override
116 |                 public List<Double> call(List<Double> m1, List<Double> m2) {
117 |                     m1.addAll(m2);
118 |                     return m1;
119 |                 }
120 |         // split the group_id and decision
121 |         }).mapToPair(
122 |             new PairFunction<Tuple2<String, List<Double>>, String, Map<String, List<Double>>>() {
123 |                 @Override
124 |                 public Tuple2<String, Map<String, List<Double>>>
125 |                         call(Tuple2<String, List<Double>> tuple2) {
126 |                     String group_id = tuple2._1().split(":")[0];
127 |                     String decision = tuple2._1().split(":")[1];
128 |                     Map<String, List<Double>> info = new HashMap<String, List<Double>>();
129 |                     info.put(decision, tuple2._2());
130 |                     return new Tuple2<>(group_id, info);
131 |                 }
132 |         });
133 | 
134 |     // reduce the batchQualitySums
135 |     JavaPairDStream<String, Map<String, List<Double>>> qualitySums;
136 |     Function2<Map<String, List<Double>>, Map<String, List<Double>>,
137 |             Map<String, List<Double>>> qualitySumsReduceFunction2 = new Function2<
138 |             Map<String, List<Double>>, Map<String, List<Double>>, Map<String, List<Double>>>() {
139 |         @Override
140 |         public Map<String, List<Double>> call(Map<String, List<Double>> m1,
141 |                 Map<String, List<Double>> m2) {
142 |             // Because has reduced once, so here just merge maps by union
143 |             for (Map.Entry<String, List<Double>> m1Entry : m1.entrySet()) {
144 |                 m2.put(m1Entry.getKey(), m1Entry.getValue());
145 |             }
146 |             return m2;
147 |         }
148 |     };
149 |     if (historyData.windowSize > processInterval) {
150 |         qualitySums = batchQualitySums.reduceByKeyAndWindow(qualitySumsReduceFunction2,
151 |                 Durations.seconds(historyData.windowSize), Durations.seconds(processInterval));
152 |     } else {
153 |         qualitySums = batchQualitySums.reduceByKey(qualitySumsReduceFunction2);
154 |     }
155 | 
156 |     // combine the old data with new data and send the decision to kafka
157 |     qualitySums.foreachRDD(new VoidFunction<JavaPairRDD<String, Map<String, List<Double>>>>() {
158 |         // foreachRDD will get RDD of each batch of dstream
159 |         @Override
160 |         public void call(JavaPairRDD<String, Map<String, List<Double>>> groups) throws Exception {
161 |             //System.out.println(groups.cogroup(historyPairDResult).collect());
162 | 
163 |             // combine old data with new data: cogroup then map
164 |             JavaPairRDD<String, Map<String, List<Double>>> combinedData = groups.cogroup(
165 |                     historyData.pairDData).mapToPair(
166 |                     new PairFunction<Tuple2<String, Tuple2<
167 |                     Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
168 |                     >>, String, Map<String, List<Double>>>() {
169 |                 @Override
170 |                 public Tuple2<String, Map<String, List<Double>>> call(Tuple2<String, Tuple2<
171 |                         Iterable<Map<String, List<Double>>>, Iterable<Map<String, List<Double>>>
172 |                         >> tuple2) {
173 |                     return historyData.combineCall(tuple2);
174 |                 }
175 |             });
176 |             historyData.updateData(combinedData);
177 | 
178 |             // to show the combined result clearly
179 |             //System.out.println(combinedResult.collect());
180 |             List<Tuple2<String, Map<String, List<Double>>>> collectedData = combinedData.collect();
181 |             Tuple2<String, Map<String, List<Double>>> tmpTuple2 = null;
182 |             Map<String, List<Double>> tmpMap = null;
183 |             for (int i = 0; i < collectedData.size(); i++) {
184 |                 tmpTuple2 = collectedData.get(i);
185 |                 System.out.println(tmpTuple2._1() + "----");
186 |                 tmpMap = tmpTuple2._2();
187 |                 for (Map.Entry<String, List<Double>> entry : tmpMap.entrySet()) {
188 |                     System.out.printf("\t%s : (%f, %f) : %f\n", entry.getKey(), entry.getValue().get(0),
189 |                             entry.getValue().get(1), entry.getValue().get(0)/entry.getValue().get(1));
190 |                 }
191 |             }
192 | 
193 |             combinedData.foreachPartition(
194 |                     new VoidFunction<Iterator<Tuple2<String, Map<String, List<Double>>>>> () {
195 |                 @Override
196 |                 public void call(Iterator<Tuple2<String, Map<String, List<Double>>>> group_iter)
197 |                         throws Exception {
198 |                     KafkaProducer<String, String> kproducer = new KafkaProducer<String, String>(producerProps);
199 |                     Tuple2<String, Map<String, List<Double>>> group = null;
200 |                     while (group_iter.hasNext()) {
201 |                         group = group_iter.next();
202 |                         Map<String, List<Double>> duplicateMap = new HashMap<String, List<Double>>();
203 |                         for (Map.Entry<String, List<Double>> entry : group._2().entrySet()) {
204 |                             List<Double> tmpList = new ArrayList<Double>();
205 |                             for (Double score : entry.getValue())
206 |                                 tmpList.add(score.doubleValue());
207 |                             duplicateMap.put(entry.getKey(), tmpList);
208 |                         }
209 |                         String decisions = historyData.getDecision(duplicateMap);
210 |                         ProducerRecord<String, String> data = new ProducerRecord<>(topicOut,
211 |                                 group._1() + ";" + decisions + ";From: " + brokers);
212 |                         kproducer.send(data);
213 |                     }
214 |             }});
215 |         }});
216 | 
217 |     // Start the computation
218 |     jssc.start();
219 |     jssc.awaitTermination();
220 |   }
221 | }
222 | 


--------------------------------------------------------------------------------
/frontend/Communicator/src/main/java/frontend/Communicator.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Upload updates to backend and communicate with other cluster
  3 |  *
  4 |  *
  5 |  * Author: Shijie Sun
  6 |  * Email: septimus145@gmail.com
  7 |  * August, 2016
  8 |  */
  9 | 
 10 | package frontend;
 11 | 
 12 | import java.util.*;
 13 | import java.util.regex.Pattern;
 14 | import java.util.concurrent.ConcurrentLinkedQueue;
 15 | import java.util.concurrent.ConcurrentHashMap;
 16 | import java.io.*;
 17 | 
 18 | import scala.Tuple2;
 19 | 
 20 | import org.json.JSONObject;
 21 | import org.json.JSONArray;
 22 | 
 23 | import kafka.serializer.StringDecoder;
 24 | import org.apache.kafka.clients.producer.KafkaProducer;
 25 | import org.apache.kafka.clients.producer.ProducerRecord;
 26 | 
 27 | import org.apache.spark.SparkConf;
 28 | import org.apache.spark.rdd.RDD;
 29 | import org.apache.spark.api.java.function.*;
 30 | import org.apache.spark.api.java.JavaRDD;
 31 | import org.apache.spark.api.java.JavaPairRDD;
 32 | import org.apache.spark.streaming.api.java.*;
 33 | import org.apache.spark.streaming.kafka.*;
 34 | import org.apache.spark.streaming.Durations;
 35 | 
 36 | // for changing logger config
 37 | import org.apache.log4j.Logger;
 38 | import org.apache.log4j.Level;
 39 | 
 40 | 
 41 | 
 42 | public final class Communicator {
 43 | 
 44 |     public final static int processInterval = 1; // seconds
 45 | 
 46 |     public static void main(String[] args) throws Exception {
 47 |         if (args.length < 1) {
 48 |             System.err.println("Usage: Communicator config_file");
 49 |             System.exit(1);
 50 |         }
 51 | 
 52 |         Logger.getLogger("org").setLevel(Level.OFF);
 53 |         Logger.getLogger("akka").setLevel(Level.OFF);
 54 | 
 55 |         // basic configuration
 56 |         final Properties config = new Properties();
 57 |         InputStream iStream = null;
 58 |         try {
 59 |             iStream = new FileInputStream(args[0]);
 60 |             config.load(iStream);
 61 |         } catch (IOException ex) {
 62 |             ex.printStackTrace();
 63 |         } finally {
 64 |             if (iStream != null) {
 65 |                 try {
 66 |                     iStream.close();
 67 |                 } catch (IOException e) {
 68 |                     e.printStackTrace();
 69 |                 }
 70 |             }
 71 |         }
 72 |         final String currentClusterID = config.getProperty("clusterID");
 73 |         final String updateTopic = config.getProperty("updateTopic");
 74 |         final String uploadTopic = config.getProperty("uploadTopic");
 75 |         final String decisionTopic = config.getProperty("decisionTopic");
 76 |         final String subscribeTopic = config.getProperty("subscribeTopic");
 77 |         final String forwardTopic = config.getProperty("forwardTopic");
 78 |         final String sampleTopic = config.getProperty("sampleTopic");
 79 |         final String aliveTopic = config.getProperty("aliveTopic");
 80 |         final int managementLabelsNum = Integer.valueOf(config.getProperty("managementLabelsNum"));
 81 | 
 82 |         // setup producer basic config
 83 |         final Properties producerProps = new Properties();
 84 |         producerProps.put("acks", "all");
 85 |         producerProps.put("retries", 0);
 86 |         producerProps.put("batch.size", 16384);
 87 |         producerProps.put("linger.ms", 1);
 88 |         producerProps.put("buffer.memory", 33554432);
 89 |         producerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 90 |         producerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
 91 | 
 92 |         // create streaming context
 93 |         SparkConf sparkConf = new SparkConf().setAppName("Communicator");
 94 |         final JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(processInterval));
 95 | 
 96 |         // create a rdd to store the group-subscribers map
 97 |         List<Tuple2<String, HashSet<String>>> groupSubscriberList = new ArrayList<>();
 98 |         JavaRDD<Tuple2<String, HashSet<String>>> groupSubscriberRDD = jssc.sparkContext().parallelize(groupSubscriberList);
 99 |         JavaPairRDD<String, HashSet<String>> gsRDD = JavaPairRDD.fromJavaRDD(groupSubscriberRDD);
100 |         // using queue to make the map updatable
101 |         final ConcurrentLinkedQueue<JavaPairRDD<String, HashSet<String>>> gsQueue = new ConcurrentLinkedQueue<>();
102 |         gsQueue.add(gsRDD);
103 | 
104 |         Map<String, String> kafkaParams = new HashMap<>();
105 |         kafkaParams.put("metadata.broker.list", config.getProperty(currentClusterID));
106 |         // Create stream of upload topic
107 |         Set<String> uploadTopicSet = new HashSet<>(Arrays.asList(uploadTopic));
108 |         JavaPairInputDStream<String, String> uploadMsgs = KafkaUtils.createDirectStream(
109 |             jssc,
110 |             String.class,
111 |             String.class,
112 |             StringDecoder.class,
113 |             StringDecoder.class,
114 |             kafkaParams,
115 |             uploadTopicSet
116 |         );
117 |         // Create stream of decision topic
118 |         Set<String> decisionTopicSet = new HashSet<>(Arrays.asList(decisionTopic));
119 |         JavaPairInputDStream<String, String> decisionMsgs = KafkaUtils.createDirectStream(
120 |             jssc,
121 |             String.class,
122 |             String.class,
123 |             StringDecoder.class,
124 |             StringDecoder.class,
125 |             kafkaParams,
126 |             decisionTopicSet
127 |         );
128 |         // Create stream of subscribe topic
129 |         Set<String> subscribeTopicSet = new HashSet<>(Arrays.asList(subscribeTopic));
130 |         JavaPairInputDStream<String, String> subscribeMsgs = KafkaUtils.createDirectStream(
131 |             jssc,
132 |             String.class,
133 |             String.class,
134 |             StringDecoder.class,
135 |             StringDecoder.class,
136 |             kafkaParams,
137 |             subscribeTopicSet
138 |         );
139 |         // Create stream of forward topic
140 |         Set<String> forwardTopicSet = new HashSet<>(Arrays.asList(forwardTopic));
141 |         JavaPairInputDStream<String, String> forwardMsgs = KafkaUtils.createDirectStream(
142 |             jssc,
143 |             String.class,
144 |             String.class,
145 |             StringDecoder.class,
146 |             StringDecoder.class,
147 |             kafkaParams,
148 |             forwardTopicSet
149 |         );
150 | 
151 |         // upload all the updates
152 |         uploadMsgs.foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() {
153 |             // foreachRDD will get RDD of each batch of dstream
154 |             @Override
155 |             public void call(JavaPairRDD<String, String> uploadMsgsRDD) throws Exception {
156 |                 uploadMsgsRDD.sample(false, 0.01).flatMapToPair(new PairFlatMapFunction<Tuple2<String, String>, String, Integer>() {
157 |                     @Override
158 |                     public Iterable<Tuple2<String, Integer>> call(Tuple2<String, String> tuple2) {
159 |                         List<Tuple2<String, Integer>> result = new ArrayList<>();
160 |                         String[] features = tuple2._2().split("\t");
161 |                         for (int i = 0; i < (features.length - managementLabelsNum); i++) {
162 |                             result.add(new Tuple2<>(String.valueOf(i) + ";" + features[i], 1));
163 |                         }
164 |                         return result;
165 |                     }
166 |                 }).reduceByKey(new Function2<Integer, Integer, Integer>() {
167 |                     @Override
168 |                     public Integer call(Integer i1, Integer i2){
169 |                         return i1+i2;
170 |                     }
171 |                 }).foreachPartition(new VoidFunction<Iterator<Tuple2<String, Integer>>> () {
172 |                     @Override
173 |                     public void call(Iterator<Tuple2<String, Integer>> samples_iter) throws Exception {
174 |                         producerProps.put("bootstrap.servers", config.getProperty("backendBrokers"));
175 |                         KafkaProducer<String, String> kproducer = new KafkaProducer<String, String>(producerProps);
176 |                         ProducerRecord<String, String> data = null;
177 |                         Tuple2<String, Integer> sample = null;
178 |                         while (samples_iter.hasNext()) {
179 |                             sample = samples_iter.next();
180 |                             data = new ProducerRecord<>(sampleTopic, sample._1() + ";" + String.valueOf(sample._2()));
181 |                             kproducer.send(data);
182 |                         }
183 |                     }
184 |                 });
185 |                 uploadMsgsRDD.foreachPartition(new VoidFunction<Iterator<Tuple2<String, String>>> () {
186 |                     @Override
187 |                     public void call(Iterator<Tuple2<String, String>> updates_iter) throws Exception {
188 |                         producerProps.put("bootstrap.servers", config.getProperty("backendBrokers"));
189 |                         KafkaProducer<String, String> kproducer = new KafkaProducer<String, String>(producerProps);
190 |                         ProducerRecord<String, String> data = null;
191 |                         Tuple2<String, String> update = null;
192 |                         while (updates_iter.hasNext()) {
193 |                             update = updates_iter.next();
194 |                             data = new ProducerRecord<>(uploadTopic, update._2());
195 |                             kproducer.send(data);
196 |                         }
197 |                         data = new ProducerRecord<>(aliveTopic, currentClusterID);
198 |                         kproducer.send(data);
199 |                     }
200 |                 });
201 |             }
202 |         });
203 | 
204 |         // push all the decision to subscribers
205 |         decisionMsgs.foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() {
206 |             // foreachRDD will get RDD of each batch of dstream
207 |             @Override
208 |             public void call(JavaPairRDD<String, String> decisionMsgsRDD) throws Exception {
209 |                 // get the group-subscriber map
210 |                 // collectAsMap has a bug when used by rdd which is not from jssc.parallel directly
211 |                 // so here need to use collect and convert it to map manually
212 |                 List<Tuple2<String, HashSet<String>>> gsList = gsQueue.peek().collect();
213 |                 final Map<String, HashSet<String>> groupSubscriber = new HashMap<>();
214 |                 for (Tuple2<String, HashSet<String>> gs : gsList) {
215 |                     groupSubscriber.put(gs._1(), gs._2());
216 |                 }
217 |                 // push the decisions
218 |                 decisionMsgsRDD.foreachPartition(new VoidFunction<Iterator<Tuple2<String, String>>> () {
219 |                     @Override
220 |                     public void call(Iterator<Tuple2<String, String>> decisions_iter) throws Exception {
221 |                         ConcurrentHashMap<String, KafkaProducer<String, String>> producerMap = new ConcurrentHashMap<>();
222 |                         Tuple2<String, String> decision = null;
223 |                         KafkaProducer<String, String> kproducer = null;
224 |                         ProducerRecord<String, String> data = null;
225 |                         String groupID = null;
226 |                         String clusterID = null;
227 |                         while (decisions_iter.hasNext()) {
228 |                             decision = decisions_iter.next();
229 |                             groupID = decision._2().split(";")[0];
230 |                             // foreach subscriber
231 |                             if (groupSubscriber.containsKey(groupID)) {
232 |                                 Iterator<String> it = groupSubscriber.get(groupID).iterator();
233 |                                 while (it.hasNext()) {
234 |                                     clusterID = it.next();
235 |                                     // if it is not current cluster
236 |                                     if (clusterID.equals(currentClusterID))
237 |                                         continue;
238 |                                     // if do not have producer for this cluster, create one
239 |                                     if (! producerMap.containsKey(clusterID)) {
240 |                                         producerProps.put("bootstrap.servers", config.getProperty(clusterID));
241 |                                         kproducer = new KafkaProducer<String, String>(producerProps);
242 |                                         producerMap.put(clusterID, kproducer);
243 |                                     }
244 |                                     // push the decision
245 |                                     data = new ProducerRecord<>(decisionTopic, decision._2());
246 |                                     producerMap.get(clusterID).send(data);
247 |                                 }
248 |                             }
249 |                         }
250 |                     }
251 |                 });
252 |             }
253 |         });
254 | 
255 |         // update the subscriber of groups
256 |         subscribeMsgs.mapToPair(new PairFunction<Tuple2<String, String>, String, String>() {
257 |             @Override
258 |             public Tuple2<String, String> call(Tuple2<String, String> tuple2) {
259 |                 return new Tuple2<>(tuple2._2().split(";")[0], tuple2._2().split(";")[1]);
260 |             }
261 |         }).foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() {
262 |             // foreachRDD will get RDD of each batch of dstream
263 |             @Override
264 |             public void call(JavaPairRDD<String, String> subscribeMsgsRDD) throws Exception {
265 |                 JavaPairRDD<String, HashSet<String>> newgsRDD = subscribeMsgsRDD.cogroup(gsQueue.peek()).mapToPair(
266 |                         new PairFunction<Tuple2<String, Tuple2<Iterable<String>, Iterable<HashSet<String>>>>, String, HashSet<String>>() {
267 |                             @Override
268 |                             public Tuple2<String, HashSet<String>> call(Tuple2<String, Tuple2<Iterable<String>, Iterable<HashSet<String>>>> tuple2) {
269 |                                 Iterator<String> iter1 = tuple2._2()._1().iterator();
270 |                                 Iterator<HashSet<String>> iter2 = tuple2._2()._2().iterator();
271 |                                 HashSet<String> subscribers = null;
272 |                                 String subscribe = null;
273 |                                 if (iter2.hasNext())
274 |                                     subscribers = iter2.next();
275 |                                 else
276 |                                     subscribers = new HashSet<>();
277 |                                 while (iter1.hasNext()) {
278 |                                     subscribe = iter1.next();
279 |                                     if (! subscribers.contains(subscribe))
280 |                                         subscribers.add(subscribe);
281 |                                 }
282 |                                 return new Tuple2(tuple2._1(), subscribers);
283 |                             }
284 |                         }
285 |                         );
286 |                 //System.out.println(newgsRDD.collect());
287 | 
288 |                 // use peek above and first add then poll here
289 |                 // this is to make sure there is at least one rdd in the queue
290 |                 // so other stream can always get the rdd through queue.peek()
291 |                 gsQueue.add(newgsRDD);
292 |                 gsQueue.poll();
293 |             }
294 |         });
295 | 
296 | 
297 |         // forward the updates of external groups
298 |         forwardMsgs.foreachRDD(new VoidFunction<JavaPairRDD<String, String>>() {
299 |             // foreachRDD will get RDD of each batch of dstream
300 |             @Override
301 |             public void call(JavaPairRDD<String, String> forwardMsgsRDD) throws Exception {
302 |                 forwardMsgsRDD.foreachPartition(new VoidFunction<Iterator<Tuple2<String, String>>> () {
303 |                     @Override
304 |                     public void call(Iterator<Tuple2<String, String>> forwards_iter) throws Exception {
305 |                         ConcurrentHashMap<String, KafkaProducer<String, String>> producerMap = new ConcurrentHashMap<>();
306 |                         Tuple2<String, String> forward = null;
307 |                         KafkaProducer<String, String> kproducer = null;
308 |                         ProducerRecord<String, String> data = null;
309 |                         String groupID = null;
310 |                         String clusterID = null;
311 |                         // to store all the group_ids and their cluster_ids
312 |                         Map<String, String> groupSubs = new HashMap<String, String>();
313 |                         while (forwards_iter.hasNext()) {
314 |                             forward = forwards_iter.next();
315 |                             JSONObject jObject = new JSONObject(forward._2());
316 |                             clusterID = jObject.getString("cluster_id");
317 |                             groupID = jObject.getString("group_id");
318 |                             // foreach subscriber
319 |                             if (! groupSubs.containsKey(groupID))
320 |                                 groupSubs.put(groupID, clusterID);
321 |                             if (! producerMap.containsKey(clusterID)) {
322 |                                 producerProps.put("bootstrap.servers", config.getProperty(clusterID));
323 |                                 kproducer = new KafkaProducer<String, String>(producerProps);
324 |                                 producerMap.put(clusterID, kproducer);
325 |                             }
326 |                             data = new ProducerRecord<>(updateTopic, forward._2());
327 |                             producerMap.get(clusterID).send(data);
328 |                         }
329 |                         // subscribe all the sent groups
330 |                         for (Map.Entry<String, String> gsEntry : groupSubs.entrySet()) {
331 |                             data = new ProducerRecord<>(subscribeTopic, gsEntry.getKey() + ";" + currentClusterID);
332 |                             producerMap.get(gsEntry.getValue()).send(data);
333 |                         }
334 |                     }
335 |                 });
336 |             }
337 |         });
338 | 
339 |         // Start the computation
340 |         jssc.start();
341 |         jssc.awaitTermination();
342 |     }
343 | }
344 | 


--------------------------------------------------------------------------------