├── README.md ├── RL_brain.py ├── ResearchReport.pdf ├── citation.txt ├── fog_env.py ├── plot.py ├── train.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep-Q-learning-for-mobile-edge-computing 2 | 3 | More details can be found at https://ieeexplore.ieee.org/document/9253665 4 | 5 | To run the code, please install tensorflow 1.4.0. File train.py is the main code. File fog_env.py contains the code for mobile edge computing environment. File RL_brain.py contains the code for deep reinforcement learning. 6 | 7 | If you use this code for research, please cite the following paper: 8 | Ming Tang and Vincent W.S. Wong, “Deep Reinforcement Learning for Task Offloading in Mobile Edge Computing Systems,” IEEE Transactions on Mobile Computing, 2020 (Early Access). 9 | -------------------------------------------------------------------------------- /RL_brain.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from collections import deque 4 | 5 | 6 | class DeepQNetwork: 7 | 8 | def __init__(self, 9 | n_actions, # the number of actions 10 | n_features, 11 | n_lstm_features, 12 | n_time, 13 | learning_rate=0.01, 14 | reward_decay=0.9, 15 | e_greedy=0.99, 16 | replace_target_iter=200, # each 200 steps, update target net 17 | memory_size=500, # maximum of memory 18 | batch_size=32, 19 | e_greedy_increment=0.00025, 20 | n_lstm_step=10, 21 | dueling=True, 22 | double_q=True, 23 | N_L1=20, 24 | N_lstm=20, 25 | optimizer='rms_prop', 26 | seed=0): 27 | 28 | self.n_actions = n_actions 29 | self.n_features = n_features 30 | self.n_time = n_time 31 | self.lr = learning_rate 32 | self.gamma = reward_decay 33 | self.epsilon_max = e_greedy 34 | self.replace_target_iter = replace_target_iter 35 | self.memory_size = memory_size 36 | self.batch_size = batch_size # select self.batch_size number of time sequence for learning 37 | self.epsilon_increment = e_greedy_increment 38 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 39 | self.dueling = dueling 40 | self.double_q = double_q 41 | self.learn_step_counter = 0 42 | self.N_L1 = N_L1 43 | self.seed = seed 44 | 45 | if optimizer not in ['adam', 'gd', 'rms_prop']: 46 | raise SystemExit( 47 | "Invalid optimizer: {optimizer}.\nChoose one of " + 48 | "['adam', 'rms_prop', 'gd'], via CLI with flag --optimizer") 49 | else: 50 | self.optimizer = optimizer 51 | 52 | # lstm 53 | self.N_lstm = N_lstm 54 | self.n_lstm_step = n_lstm_step # step_size in lstm 55 | self.n_lstm_state = n_lstm_features # [fog1, fog2, ...., fogn, M_n(t)] 56 | 57 | # initialize zero memory np.hstack((s, [a, r], s_, lstm_s, lstm_s_)) 58 | self.memory = np.zeros((self.memory_size, self.n_features + 1 + 1 59 | + self.n_features + self.n_lstm_state + self.n_lstm_state)) 60 | 61 | # consist of [target_net, evaluate_net] 62 | self._build_net(optimizer=self.optimizer, seed=self.seed) 63 | 64 | # replace the parameters in target net 65 | t_params = tf.get_collection('target_net_params') # obtain the parameters in target_net 66 | e_params = tf.get_collection('eval_net_params') # obtain the parameters in eval_net 67 | self.replace_target_op = [tf.assign(t, e) for t, e in 68 | zip(t_params, e_params)] # update the parameters in target_net 69 | 70 | self.sess = tf.Session() 71 | 72 | self.sess.run(tf.global_variables_initializer()) 73 | self.reward_store = list() 74 | self.action_store = list() 75 | self.delay_store = list() 76 | 77 | self.lstm_history = deque(maxlen=self.n_lstm_step) 78 | for ii in range(self.n_lstm_step): 79 | self.lstm_history.append(np.zeros([self.n_lstm_state])) 80 | 81 | self.store_q_value = list() 82 | 83 | def _build_net(self, optimizer='rms_prop', seed=0): 84 | 85 | tf.reset_default_graph() 86 | tf.set_random_seed(seed) 87 | 88 | def build_layers(s,lstm_s,c_names, n_l1, n_lstm, w_initializer, b_initializer): 89 | 90 | # lstm for load levels 91 | with tf.variable_scope('l0'): 92 | lstm_dnn = tf.contrib.rnn.BasicLSTMCell(n_lstm) 93 | lstm_dnn.zero_state(self.batch_size, tf.float32) 94 | lstm_output,lstm_state = tf.nn.dynamic_rnn(lstm_dnn, lstm_s, dtype=tf.float32) 95 | lstm_output_reduced = tf.reshape(lstm_output[:, -1, :], shape=[-1, n_lstm]) 96 | 97 | # first layer 98 | with tf.variable_scope('l1'): 99 | w1 = tf.get_variable('w1',[n_lstm + self.n_features, n_l1], initializer=w_initializer, 100 | collections=c_names) 101 | b1 = tf.get_variable('b1',[1,n_l1],initializer=b_initializer, collections=c_names) 102 | l1 = tf.nn.relu(tf.matmul(tf.concat([lstm_output_reduced, s],1), w1) + b1) 103 | 104 | # second layer 105 | with tf.variable_scope('l12'): 106 | w12 = tf.get_variable('w12', [n_l1, n_l1], initializer=w_initializer, 107 | collections=c_names) 108 | b12 = tf.get_variable('b12', [1, n_l1], initializer=b_initializer, collections=c_names) 109 | l12 = tf.nn.relu(tf.matmul(l1, w12) + b12) 110 | 111 | # the second layer is different 112 | if self.dueling: 113 | # Dueling DQN 114 | # a single output n_l1 -> 1 115 | with tf.variable_scope('Value'): 116 | w2 = tf.get_variable('w2',[n_l1,1],initializer=w_initializer,collections=c_names) 117 | b2 = tf.get_variable('b2',[1,1],initializer=b_initializer,collections=c_names) 118 | self.V = tf.matmul(l12,w2) + b2 119 | # n_l1 -> n_actions 120 | with tf.variable_scope('Advantage'): 121 | w2 = tf.get_variable('w2',[n_l1,self.n_actions],initializer=w_initializer,collections=c_names) 122 | b2 = tf.get_variable('b2',[1,self.n_actions],initializer=b_initializer,collections=c_names) 123 | self.A = tf.matmul(l12,w2) + b2 124 | 125 | with tf.variable_scope('Q'): 126 | out = self.V + (self.A - tf.reduce_mean(self.A,axis=1,keep_dims=True)) # Q = V(s) +A(s,a) 127 | 128 | else: 129 | with tf.variable_scope('Q'): 130 | w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) 131 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 132 | out = tf.matmul(l1, w2) + b2 133 | 134 | return out 135 | 136 | # input for eval_net 137 | self.s = tf.placeholder(tf.float32,[None,self.n_features], name = 's') # state (observation) 138 | self.lstm_s = tf.placeholder(tf.float32,[None,self.n_lstm_step,self.n_lstm_state], name='lstm1_s') 139 | 140 | self.q_target = tf.placeholder(tf.float32,[None,self.n_actions], name = 'Q_target') # q_target 141 | 142 | # input for target_net 143 | self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') 144 | self.lstm_s_ = tf.placeholder(tf.float32,[None,self.n_lstm_step,self.n_lstm_state], name='lstm1_s_') 145 | 146 | # generate EVAL_NET, update parameters 147 | with tf.variable_scope('eval_net'): 148 | 149 | # c_names(collections_names), will be used when update target_net 150 | # tf.random_normal_initializer(mean=0.0, stddev=1.0, seed=None, dtype=tf.float32), return a initializer 151 | c_names, n_l1, n_lstm, w_initializer, b_initializer = \ 152 | ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], self.N_L1, self.N_lstm,\ 153 | tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers 154 | 155 | # input (n_feature) -> l1 (n_l1) -> l2 (n_actions) 156 | self.q_eval = build_layers(self.s, self.lstm_s, c_names, n_l1, n_lstm, w_initializer, b_initializer) 157 | 158 | # generate TARGET_NET 159 | with tf.variable_scope('target_net'): 160 | c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] 161 | 162 | self.q_next = build_layers(self.s_, self.lstm_s_, c_names, n_l1, n_lstm, w_initializer, b_initializer) 163 | 164 | # loss and train 165 | with tf.variable_scope('loss'): 166 | self.loss = tf.reduce_mean(tf.squared_difference(self.q_target,self.q_eval)) 167 | with tf.variable_scope('train'): 168 | if optimizer == 'rms_prop': 169 | self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 170 | elif optimizer == 'adam': 171 | self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 172 | elif optimizer == 'gd': 173 | self._train_op = \ 174 | tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss) 175 | 176 | def store_transition(self, s, lstm_s, a, r, s_, lstm_s_): 177 | # RL.store_transition(observation,action,reward,observation_) 178 | # hasattr(object, name), if object has name attribute 179 | if not hasattr(self, 'memory_counter'): 180 | self.memory_counter = 0 181 | 182 | # store np.hstack((s, [a, r], s_, lstm_s, lstm_s_)) 183 | transition = np.hstack((s, [a, r], s_, lstm_s, lstm_s_)) # stack in horizontal direction 184 | 185 | # if memory overflows, replace old memory with new one 186 | index = self.memory_counter % self.memory_size 187 | # print(transition) 188 | self.memory[index, :] = transition 189 | self.memory_counter += 1 190 | 191 | def update_lstm(self, lstm_s): 192 | 193 | self.lstm_history.append(lstm_s) 194 | 195 | def choose_action(self, observation, inference=False): 196 | # the shape of the observation (1, size_of_observation) 197 | # x1 = np.array([1, 2, 3, 4, 5]), x1_new = x1[np.newaxis, :], now, the shape of x1_new is (1, 5) 198 | observation = observation[np.newaxis, :] 199 | 200 | if inference or np.random.uniform() < self.epsilon: 201 | 202 | # lstm only contains history, there is no current observation 203 | lstm_observation = np.array(self.lstm_history) 204 | 205 | actions_value = self.sess.run(self.q_eval, 206 | feed_dict={self.s: observation, 207 | self.lstm_s: lstm_observation.reshape(1, self.n_lstm_step, 208 | self.n_lstm_state), 209 | }) 210 | 211 | self.store_q_value.append({'observation': observation, 'q_value': actions_value}) 212 | 213 | action = np.argmax(actions_value) 214 | 215 | else: 216 | 217 | action = np.random.randint(0, self.n_actions) 218 | 219 | return action 220 | 221 | def learn(self): 222 | 223 | # check if replace target_net parameters 224 | if self.learn_step_counter % self.replace_target_iter == 0: 225 | # run the self.replace_target_op in __int__ 226 | self.sess.run(self.replace_target_op) 227 | # print(f"{self.learn_step_counter}: target_params_replaced\n") 228 | 229 | # randomly pick [batch_size] memory from memory np.hstack((s, [a, r], s_, lstm_s, lstm_s_)) 230 | if self.memory_counter > self.memory_size: 231 | sample_index = np.random.choice(self.memory_size - self.n_lstm_step, size=self.batch_size) 232 | else: 233 | sample_index = np.random.choice(self.memory_counter - self.n_lstm_step, size=self.batch_size)\ 234 | 235 | # transition = np.hstack(s, [a, r], s_, lstm_s, lstm_s_) 236 | batch_memory = self.memory[sample_index, :self.n_features+1+1+self.n_features] 237 | lstm_batch_memory = np.zeros([self.batch_size, self.n_lstm_step, self.n_lstm_state * 2]) 238 | for ii in range(len(sample_index)): 239 | for jj in range(self.n_lstm_step): 240 | lstm_batch_memory[ii,jj,:] = self.memory[sample_index[ii]+jj, 241 | self.n_features+1+1+self.n_features:] 242 | 243 | # obtain q_next (from target_net) (to q_target) and q_eval (from eval_net) 244 | # minimize(target_q - q_eval)^2 245 | # q_target = reward + gamma * q_next 246 | # in the size of bacth_memory 247 | # q_next, given the next state from batch, what will be the q_next from q_next 248 | # q_eval4next, given the next state from batch, what will be the q_eval4next from q_eval 249 | q_next, q_eval4next = self.sess.run( 250 | [self.q_next, self.q_eval], # output 251 | feed_dict={ 252 | # [s, a, r, s_] 253 | # input for target_q (last) 254 | self.s_: batch_memory[:, -self.n_features:], self.lstm_s_: lstm_batch_memory[:,:,self.n_lstm_state:], 255 | # input for eval_q (last) 256 | self.s: batch_memory[:, -self.n_features:], self.lstm_s: lstm_batch_memory[:,:,self.n_lstm_state:], 257 | } 258 | ) 259 | # q_eval, given the current state from batch, what will be the q_eval from q_eval 260 | q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features], 261 | self.lstm_s: lstm_batch_memory[:,:,:self.n_lstm_state]}) 262 | q_target = q_eval.copy() 263 | batch_index = np.arange(self.batch_size, dtype=np.int32) 264 | eval_act_index = batch_memory[:, self.n_features].astype(int) # action with a single value (int action) 265 | reward = batch_memory[:, self.n_features + 1] # reward with a single value 266 | 267 | # update the q_target at the particular batch at the correponding action 268 | if self.double_q: 269 | max_act4next = np.argmax(q_eval4next, axis=1) 270 | selected_q_next = q_next[batch_index, max_act4next] 271 | else: 272 | selected_q_next = np.max(q_next, axis=1) 273 | 274 | q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next 275 | 276 | # both self.s and self.q_target belong to eval_q 277 | # input self.s and self.q_target, output self._train_op, self.loss (to minimize the gap) 278 | # self.sess.run: given input (feed), output the required element 279 | _, self.cost = self.sess.run([self._train_op, self.loss], 280 | feed_dict={self.s: batch_memory[:, :self.n_features], 281 | self.lstm_s: lstm_batch_memory[:, :, :self.n_lstm_state], 282 | self.q_target: q_target}) 283 | 284 | # gradually increase epsilon 285 | self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max 286 | self.learn_step_counter += 1 287 | 288 | def do_store_reward(self, episode, time, reward): 289 | while episode >= len(self.reward_store): 290 | self.reward_store.append(np.zeros([self.n_time])) 291 | self.reward_store[episode][time] = reward 292 | 293 | def do_store_action(self,episode,time, action): 294 | while episode >= len(self.action_store): 295 | self.action_store.append(- np.ones([self.n_time])) 296 | self.action_store[episode][time] = action 297 | 298 | def do_store_delay(self, episode, time, delay): 299 | while episode >= len(self.delay_store): 300 | self.delay_store.append(np.zeros([self.n_time])) 301 | self.delay_store[episode][time] = delay -------------------------------------------------------------------------------- /ResearchReport.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SZU-AdvTech-2024/059-Deep-Reinforcement-Learning-for-Task-Offloading-in-Mobile-Edge-Computing-Systems/c63a038ecd7254a0a7bb2ca7c787984e9a5ae2b3/ResearchReport.pdf -------------------------------------------------------------------------------- /citation.txt: -------------------------------------------------------------------------------- 1 | @article{REPO059, 2 | author = "Tang, Ming and Wong, Vincent WS", 3 | journal = "IEEE Transactions on Mobile Computing", 4 | number = "6", 5 | pages = "1985--1997", 6 | publisher = "IEEE", 7 | title = "{Deep reinforcement learning for task offloading in mobile edge computing systems}", 8 | volume = "21", 9 | year = "2020" 10 | } 11 | -------------------------------------------------------------------------------- /fog_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import math 4 | import queue 5 | 6 | class Offload: 7 | 8 | def __init__(self, num_iot, num_fog, num_time, max_delay, task_arrive_prob): 9 | 10 | # INPUT DATA 11 | self.n_iot = num_iot 12 | self.n_fog = num_fog 13 | self.n_time = num_time 14 | self.duration = 0.1 15 | 16 | # test 17 | self.drop_trans_count = 0 18 | self.drop_fog_count = 0 19 | self.drop_iot_count = 0 20 | 21 | # CONSIDER A SCENARIO RANDOM IS NOT GOOD 22 | # LOCAL CAP SHOULD NOT BE TOO SMALL, OTHERWISE, THE STATE MATRIX IS TOO LARGE (EXCEED THE MAXIMUM) 23 | # SHOULD NOT BE LESS THAN ONE 24 | self.comp_cap_iot = 2.5 * np.ones(self.n_iot) * self.duration # 2.5 Gigacycles per second * duration 25 | self.comp_cap_fog = 41.8 * np.ones([self.n_fog]) * self.duration # Gigacycles per second * duration 26 | self.tran_cap_iot = 14 * np.ones([self.n_iot, self.n_fog]) * self.duration # Mbps * duration 27 | self.comp_density = 0.297 * np.ones([self.n_iot]) # 0.297 Gigacycles per Mbits 28 | self.max_delay = max_delay # time slots 29 | 30 | # BITARRIVE_SET (MARKOVIAN) 31 | self.task_arrive_prob = task_arrive_prob # 0.3 32 | self.max_bit_arrive = 5 # Mbits 33 | self.min_bit_arrive = 2 # Mbits 34 | self.bitArrive_set = np.arange(self.min_bit_arrive, self.max_bit_arrive, 0.1) 35 | self.bitArrive = np.zeros([self.n_time, self.n_iot]) 36 | 37 | # ACTION: 0, local; 1, fog 0; 2, fog 1; ...; n, fog n - 1 38 | self.n_actions = 1 + num_fog 39 | # STATE: [A, t^{comp}, t^{tran}, [B^{fog}]] 40 | self.n_features = 1 + 1 + 1 + num_fog 41 | # LSTM STATE 42 | self.n_lstm_state = self.n_fog # [fog1, fog2, ...., fogn] 43 | 44 | # TIME COUNT 45 | self.time_count = int(0) 46 | 47 | # QUEUE INITIALIZATION: size -> task size; time -> arrive time 48 | self.Queue_iot_comp = list() 49 | self.Queue_iot_tran = list() 50 | self.Queue_fog_comp = list() 51 | 52 | for iot in range(self.n_iot): 53 | self.Queue_iot_comp.append(queue.Queue()) 54 | self.Queue_iot_tran.append(queue.Queue()) 55 | self.Queue_fog_comp.append(list()) 56 | for fog in range(self.n_fog): 57 | self.Queue_fog_comp[iot].append(queue.Queue()) 58 | 59 | # QUEUE INFO INITIALIZATION 60 | self.t_iot_comp = - np.ones([self.n_iot]) 61 | self.t_iot_tran = - np.ones([self.n_iot]) 62 | self.b_fog_comp = np.zeros([self.n_iot, self.n_fog]) 63 | 64 | # TASK INDICATOR 65 | self.task_on_process_local = list() 66 | self.task_on_transmit_local = list() 67 | self.task_on_process_fog = list() 68 | self.fog_iot_m = np.zeros(self.n_fog) 69 | self.fog_iot_m_observe = np.zeros(self.n_fog) 70 | 71 | for iot in range(self.n_iot): 72 | self.task_on_process_local.append({'size': np.nan, 'time': np.nan, 'remain': np.nan}) 73 | self.task_on_transmit_local.append({'size': np.nan, 'time': np.nan, 74 | 'fog': np.nan, 'remain': np.nan}) 75 | self.task_on_process_fog.append(list()) 76 | for fog in range(self.n_fog): 77 | self.task_on_process_fog[iot].append({'size': np.nan, 'time': np.nan, 'remain': np.nan}) 78 | 79 | # TASK DELAY 80 | self.process_delay = np.zeros([self.n_time, self.n_iot]) # total delay 81 | self.process_delay_unfinish_ind = np.zeros([self.n_time, self.n_iot]) # unfinished indicator 82 | self.process_delay_trans = np.zeros([self.n_time, self.n_iot]) # transmission delay (if applied) 83 | 84 | self.fog_drop = np.zeros([self.n_iot, self.n_fog]) 85 | 86 | # reset the network scenario 87 | def reset(self, bitArrive): 88 | 89 | # test 90 | self.drop_trans_count = 0 91 | self.drop_fog_count = 0 92 | self.drop_iot_count = 0 93 | 94 | # BITRATE 95 | self.bitArrive = bitArrive 96 | 97 | # TIME COUNT 98 | self.time_count = int(0) 99 | 100 | # QUEUE INITIALIZATION 101 | self.Queue_iot_comp = list() 102 | self.Queue_iot_tran = list() 103 | self.Queue_fog_comp = list() 104 | 105 | for iot in range(self.n_iot): 106 | self.Queue_iot_comp.append(queue.Queue()) 107 | self.Queue_iot_tran.append(queue.Queue()) 108 | self.Queue_fog_comp.append(list()) 109 | for fog in range(self.n_fog): 110 | self.Queue_fog_comp[iot].append(queue.Queue()) 111 | 112 | # QUEUE INFO INITIALIZATION 113 | self.t_iot_comp = - np.ones([self.n_iot]) 114 | self.t_iot_tran = - np.ones([self.n_iot]) 115 | self.b_fog_comp = np.zeros([self.n_iot, self.n_fog]) 116 | 117 | # TASK INDICATOR 118 | self.task_on_process_local = list() 119 | self.task_on_transmit_local = list() 120 | self.task_on_process_fog = list() 121 | 122 | for iot in range(self.n_iot): 123 | self.task_on_process_local.append({'size': np.nan, 'time': np.nan, 'remain': np.nan}) 124 | self.task_on_transmit_local.append({'size': np.nan, 'time': np.nan, 125 | 'fog': np.nan, 'remain': np.nan}) 126 | self.task_on_process_fog.append(list()) 127 | for fog in range(self.n_fog): 128 | self.task_on_process_fog[iot].append({'size': np.nan, 'time': np.nan, 'remain': np.nan}) 129 | 130 | # TASK DELAY 131 | self.process_delay = np.zeros([self.n_time, self.n_iot]) 132 | self.process_delay_unfinish_ind = np.zeros([self.n_time, self.n_iot]) # unfinished indicator 133 | self.process_delay_trans = np.zeros([self.n_time, self.n_iot]) # transmission delay (if applied) 134 | 135 | self.fog_drop = np.zeros([self.n_iot, self.n_fog]) 136 | 137 | # INITIAL 138 | observation_all = np.zeros([self.n_iot, self.n_features]) 139 | for iot_index in range(self.n_iot): 140 | # observation is zero if there is no task arrival 141 | if self.bitArrive[self.time_count, iot_index] != 0: 142 | # state [A, B^{comp}, B^{tran}, [B^{fog}]] 143 | observation_all[iot_index, :] = np.hstack([ 144 | self.bitArrive[self.time_count, iot_index], self.t_iot_comp[iot_index], 145 | self.t_iot_tran[iot_index], 146 | np.squeeze(self.b_fog_comp[iot_index, :])]) 147 | 148 | lstm_state_all = np.zeros([self.n_iot, self.n_lstm_state]) 149 | 150 | return observation_all, lstm_state_all 151 | 152 | # perform action, observe state and delay (several steps later) 153 | def step(self, action): 154 | 155 | # EXTRACT ACTION FOR EACH IOT 156 | iot_action_local = np.zeros([self.n_iot], np.int32) 157 | iot_action_fog = np.zeros([self.n_iot], np.int32) 158 | for iot_index in range(self.n_iot): 159 | iot_action = action[iot_index] 160 | iot_action_fog[iot_index] = int(iot_action - 1) 161 | if iot_action == 0: 162 | iot_action_local[iot_index] = 1 163 | 164 | # COMPUTATION QUEUE UPDATE =================== 165 | for iot_index in range(self.n_iot): 166 | 167 | iot_bitarrive = np.squeeze(self.bitArrive[self.time_count, iot_index]) 168 | iot_comp_cap = np.squeeze(self.comp_cap_iot[iot_index]) 169 | iot_comp_density = self.comp_density[iot_index] 170 | 171 | # INPUT 172 | if iot_action_local[iot_index] == 1: 173 | tmp_dict = {'size': iot_bitarrive, 'time': self.time_count} 174 | self.Queue_iot_comp[iot_index].put(tmp_dict) 175 | 176 | # TASK ON PROCESS 177 | if math.isnan(self.task_on_process_local[iot_index]['remain']) \ 178 | and (not self.Queue_iot_comp[iot_index].empty()): 179 | while not self.Queue_iot_comp[iot_index].empty(): 180 | # only put the non-zero task to the processor 181 | get_task = self.Queue_iot_comp[iot_index].get() 182 | # since it is at the beginning of the time slot, = self.max_delay is acceptable 183 | if get_task['size'] != 0: 184 | if self.time_count - get_task['time'] + 1 <= self.max_delay: 185 | self.task_on_process_local[iot_index]['size'] = get_task['size'] 186 | self.task_on_process_local[iot_index]['time'] = get_task['time'] 187 | self.task_on_process_local[iot_index]['remain'] \ 188 | = self.task_on_process_local[iot_index]['size'] 189 | break 190 | else: 191 | self.process_delay[get_task['time'], iot_index] = self.max_delay 192 | self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1 193 | 194 | # PROCESS 195 | if self.task_on_process_local[iot_index]['remain'] > 0: 196 | self.task_on_process_local[iot_index]['remain'] = \ 197 | self.task_on_process_local[iot_index]['remain'] - iot_comp_cap / iot_comp_density 198 | # if no remain, compute processing delay 199 | if self.task_on_process_local[iot_index]['remain'] <= 0: 200 | self.process_delay[self.task_on_process_local[iot_index]['time'], iot_index] \ 201 | = self.time_count - self.task_on_process_local[iot_index]['time'] + 1 202 | self.task_on_process_local[iot_index]['remain'] = np.nan 203 | elif self.time_count - self.task_on_process_local[iot_index]['time'] + 1 == self.max_delay: 204 | self.process_delay[self.task_on_process_local[iot_index]['time'], iot_index] = self.max_delay 205 | self.process_delay_unfinish_ind[self.task_on_process_local[iot_index]['time'], iot_index] = 1 206 | self.task_on_process_local[iot_index]['remain'] = np.nan 207 | 208 | self.drop_iot_count = self.drop_iot_count + 1 209 | 210 | # OTHER INFO self.t_iot_comp[iot_index] 211 | # update self.t_iot_comp[iot_index] only when iot_bitrate != 0 212 | if iot_bitarrive != 0: 213 | tmp_tilde_t_iot_comp = np.max([self.t_iot_comp[iot_index] + 1, self.time_count]) 214 | self.t_iot_comp[iot_index] = np.min([tmp_tilde_t_iot_comp 215 | + math.ceil(iot_bitarrive * iot_action_local[iot_index] 216 | / (iot_comp_cap / iot_comp_density)) - 1, 217 | self.time_count + self.max_delay - 1]) 218 | 219 | # FOG QUEUE UPDATE ========================= 220 | for iot_index in range(self.n_iot): 221 | 222 | iot_comp_density = self.comp_density[iot_index] 223 | 224 | for fog_index in range(self.n_fog): 225 | 226 | # TASK ON PROCESS 227 | if math.isnan(self.task_on_process_fog[iot_index][fog_index]['remain']) \ 228 | and (not self.Queue_fog_comp[iot_index][fog_index].empty()): 229 | while not self.Queue_fog_comp[iot_index][fog_index].empty(): 230 | get_task = self.Queue_fog_comp[iot_index][fog_index].get() 231 | if self.time_count - get_task['time'] + 1 <= self.max_delay: 232 | self.task_on_process_fog[iot_index][fog_index]['size'] \ 233 | = get_task['size'] 234 | self.task_on_process_fog[iot_index][fog_index]['time'] \ 235 | = get_task['time'] 236 | self.task_on_process_fog[iot_index][fog_index]['remain'] \ 237 | = self.task_on_process_fog[iot_index][fog_index]['size'] 238 | break 239 | else: 240 | self.process_delay[get_task['time'], iot_index] = self.max_delay 241 | self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1 242 | 243 | # PROCESS 244 | self.fog_drop[iot_index, fog_index] = 0 245 | if self.task_on_process_fog[iot_index][fog_index]['remain'] > 0: 246 | self.task_on_process_fog[iot_index][fog_index]['remain'] = \ 247 | self.task_on_process_fog[iot_index][fog_index]['remain'] \ 248 | - self.comp_cap_fog[fog_index] / iot_comp_density / self.fog_iot_m[fog_index] 249 | # if no remain, compute processing delay 250 | if self.task_on_process_fog[iot_index][fog_index]['remain'] <= 0: 251 | self.process_delay[self.task_on_process_fog[iot_index][fog_index]['time'],iot_index] \ 252 | = self.time_count - self.task_on_process_fog[iot_index][fog_index]['time'] + 1 253 | self.task_on_process_fog[iot_index][fog_index]['remain'] = np.nan 254 | elif self.time_count - self.task_on_process_fog[iot_index][fog_index]['time'] + 1 == self.max_delay: 255 | self.process_delay[self.task_on_process_fog[iot_index][fog_index]['time'], iot_index] = \ 256 | self.max_delay 257 | self.process_delay_unfinish_ind[self.task_on_process_fog[iot_index][fog_index]['time'], 258 | iot_index] = 1 259 | self.fog_drop[iot_index, fog_index] = self.task_on_process_fog[iot_index][fog_index]['remain'] 260 | self.task_on_process_fog[iot_index][fog_index]['remain'] = np.nan 261 | 262 | self.drop_fog_count = self.drop_fog_count + 1 263 | 264 | # OTHER INFO 265 | if self.fog_iot_m[fog_index] != 0: 266 | self.b_fog_comp[iot_index, fog_index] \ 267 | = np.max([self.b_fog_comp[iot_index, fog_index] 268 | - self.comp_cap_fog[fog_index] / iot_comp_density / self.fog_iot_m[fog_index] 269 | - self.fog_drop[iot_index, fog_index], 0]) 270 | 271 | # TRANSMISSION QUEUE UPDATE =================== 272 | for iot_index in range(self.n_iot): 273 | 274 | iot_tran_cap = np.squeeze(self.tran_cap_iot[iot_index,:]) 275 | iot_bitarrive = np.squeeze(self.bitArrive[self.time_count, iot_index]) 276 | 277 | # INPUT 278 | if iot_action_local[iot_index] == 0: 279 | tmp_dict = {'size': self.bitArrive[self.time_count, iot_index], 'time': self.time_count, 280 | 'fog': iot_action_fog[iot_index]} 281 | self.Queue_iot_tran[iot_index].put(tmp_dict) 282 | 283 | # TASK ON PROCESS 284 | if math.isnan(self.task_on_transmit_local[iot_index]['remain']) \ 285 | and (not self.Queue_iot_tran[iot_index].empty()): 286 | while not self.Queue_iot_tran[iot_index].empty(): 287 | get_task = self.Queue_iot_tran[iot_index].get() 288 | if get_task['size'] != 0: 289 | if self.time_count - get_task['time'] + 1 <= self.max_delay: 290 | self.task_on_transmit_local[iot_index]['size'] = get_task['size'] 291 | self.task_on_transmit_local[iot_index]['time'] = get_task['time'] 292 | self.task_on_transmit_local[iot_index]['fog'] = int(get_task['fog']) 293 | self.task_on_transmit_local[iot_index]['remain'] = \ 294 | self.task_on_transmit_local[iot_index]['size'] 295 | break 296 | else: 297 | self.process_delay[get_task['time'], iot_index] = self.max_delay 298 | self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1 299 | 300 | # PROCESS 301 | if self.task_on_transmit_local[iot_index]['remain'] > 0: 302 | self.task_on_transmit_local[iot_index]['remain'] = \ 303 | self.task_on_transmit_local[iot_index]['remain'] \ 304 | - iot_tran_cap[self.task_on_transmit_local[iot_index]['fog']] 305 | 306 | # UPDATE FOG QUEUE 307 | if self.task_on_transmit_local[iot_index]['remain'] <= 0: 308 | tmp_dict = {'size': self.task_on_transmit_local[iot_index]['size'], 309 | 'time': self.task_on_transmit_local[iot_index]['time']} 310 | self.Queue_fog_comp[iot_index][self.task_on_transmit_local[iot_index]['fog']].put(tmp_dict) 311 | 312 | # OTHER INFO 313 | fog_index = self.task_on_transmit_local[iot_index]['fog'] 314 | self.b_fog_comp[iot_index, fog_index] \ 315 | = self.b_fog_comp[iot_index, fog_index] + self.task_on_transmit_local[iot_index]['size'] 316 | self.process_delay_trans[self.task_on_transmit_local[iot_index]['time'], iot_index] \ 317 | = self.time_count - self.task_on_transmit_local[iot_index]['time'] + 1 318 | self.task_on_transmit_local[iot_index]['remain'] = np.nan 319 | 320 | elif self.time_count - self.task_on_transmit_local[iot_index]['time'] + 1 == self.max_delay: 321 | self.process_delay[self.task_on_transmit_local[iot_index]['time'], iot_index] = self.max_delay 322 | self.process_delay_trans[self.task_on_transmit_local[iot_index]['time'], iot_index] \ 323 | = self.max_delay 324 | self.process_delay_unfinish_ind[self.task_on_transmit_local[iot_index]['time'], iot_index] = 1 325 | self.task_on_transmit_local[iot_index]['remain'] = np.nan 326 | 327 | self.drop_trans_count = self.drop_trans_count + 1 328 | 329 | # OTHER INFO 330 | if iot_bitarrive != 0: 331 | tmp_tilde_t_iot_tran = np.max([self.t_iot_tran[iot_index] + 1, self.time_count]) 332 | self.t_iot_comp[iot_index] = np.min([tmp_tilde_t_iot_tran 333 | + math.ceil(iot_bitarrive * (1 - iot_action_local[iot_index]) 334 | / iot_tran_cap[iot_action_fog[iot_index]]) - 1, 335 | self.time_count + self.max_delay - 1]) 336 | 337 | # COMPUTE CONGESTION (FOR NEXT TIME SLOT) 338 | self.fog_iot_m_observe = self.fog_iot_m 339 | self.fog_iot_m = np.zeros(self.n_fog) 340 | for fog_index in range(self.n_fog): 341 | for iot_index in range(self.n_iot): 342 | if (not self.Queue_fog_comp[iot_index][fog_index].empty()) \ 343 | or self.task_on_process_fog[iot_index][fog_index]['remain'] > 0: 344 | self.fog_iot_m[fog_index] += 1 345 | 346 | # TIME UPDATE 347 | self.time_count = self.time_count + 1 348 | done = False 349 | if self.time_count >= self.n_time: 350 | done = True 351 | # set all the tasks' processing delay and unfinished indicator 352 | for time_index in range(self.n_time): 353 | for iot_index in range(self.n_iot): 354 | if self.process_delay[time_index, iot_index] == 0 and self.bitArrive[time_index, iot_index] != 0: 355 | self.process_delay[time_index, iot_index] = (self.time_count - 1) - time_index + 1 356 | self.process_delay_unfinish_ind[time_index, iot_index] = 1 357 | 358 | # OBSERVATION 359 | observation_all_ = np.zeros([self.n_iot, self.n_features]) 360 | lstm_state_all_ = np.zeros([self.n_iot, self.n_lstm_state]) 361 | if not done: 362 | for iot_index in range(self.n_iot): 363 | # observation is zero if there is no task arrival 364 | if self.bitArrive[self.time_count, iot_index] != 0: 365 | # state [A, B^{comp}, B^{tran}, [B^{fog}]] 366 | observation_all_[iot_index, :] = np.hstack([ 367 | self.bitArrive[self.time_count, iot_index], 368 | self.t_iot_comp[iot_index] - self.time_count + 1, 369 | self.t_iot_tran[iot_index] - self.time_count + 1, 370 | self.b_fog_comp[iot_index, :]]) 371 | 372 | lstm_state_all_[iot_index, :] = np.hstack(self.fog_iot_m_observe) 373 | 374 | return observation_all_, lstm_state_all_, done -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from glob import glob 3 | import argparse 4 | import json 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def plot_avg_cost_graph(costs, colors, labels, title='Title', show=True, save=True, 9 | path=None): 10 | fig, axs = plt.subplots(1, figsize=(10, 6)) 11 | x = np.arange(len(costs[0])).tolist() 12 | 13 | for cost, color, label in zip(costs, colors, labels): 14 | axs.plot(x, cost, color=color, label=label) 15 | axs.set(title=title) 16 | axs.set(ylabel='Avg. Cost') 17 | axs.set(xlabel='Episode') 18 | axs.legend(loc='upper right') 19 | 20 | if save: 21 | plt.savefig(path + "avg_cost_plot.png") 22 | 23 | if show: 24 | plt.show(block=False) 25 | input() 26 | 27 | 28 | def plot_dropped_ratio_graph(dropped_ratios, x_label, title='Title', show=True, save=True, 29 | path=None): 30 | fig, axs = plt.subplots(1, figsize=(10, 6)) 31 | 32 | dropped_ratios = np.array(sorted(dropped_ratios)) 33 | 34 | axs.plot(dropped_ratios[:, 0], dropped_ratios[:, 1], color='green', label='DRL') 35 | axs.set(title=title) 36 | axs.set(ylabel='Dropper Tast Ratio') 37 | axs.set(xlabel=x_label) 38 | axs.legend(loc='upper right') 39 | 40 | if save: 41 | plt.savefig(path + "dropped_ratio_plot.png") 42 | 43 | if show: 44 | plt.show(block=False) 45 | input() 46 | 47 | 48 | def plot_avg_delay_graph(avg_delay, x_label, title='Title', show=True, save=True, 49 | path=None): 50 | fig, axs = plt.subplots(1, figsize=(10, 6)) 51 | 52 | avg_delay = np.array(sorted(avg_delay)) 53 | 54 | axs.plot(avg_delay[:, 0], avg_delay[:, 1], color='green', label='DRL') 55 | axs.set(title=title) 56 | axs.set(ylabel='Avg. Delay (Sec)') 57 | axs.set(xlabel=x_label) 58 | axs.legend(loc='upper right') 59 | 60 | if save: 61 | plt.savefig(path + "avg_delay_plot.png") 62 | 63 | if show: 64 | plt.show(block=False) 65 | input() 66 | 67 | 68 | def main(args): 69 | dirs = glob(f"{args.path}/*/") 70 | 71 | if args.type == 'cost': 72 | costs = list() 73 | colors = list() 74 | labels = list() 75 | for dir in dirs: 76 | avg_costs_np = np.load(dir + "/plots/avg_cost.npy") 77 | avg_costs_np = np.convolve(avg_costs_np, np.ones((args.window,))/args.window, 78 | mode='valid') 79 | costs.append(avg_costs_np) 80 | with open(dir + "/plots/plot_props.dat") as fp: 81 | data = json.load(fp) 82 | colors.append(data['color']) 83 | labels.append(data['label']) 84 | plot_avg_cost_graph(costs, colors, labels, args.title, path=args.path) 85 | elif args.type == 'dropped': 86 | dropped_ratios = list() 87 | for dir in dirs: 88 | with open(dir + "/results/results.dat") as fp: 89 | data = json.load(fp) 90 | dropped_ratios.append(data['avg_dropped'])tigonglege1 91 | plot_dropped_ratio_graph(dropped_ratios, args.x_label, args.title, path=args.path) 92 | elif args.type == 'delay': 93 | avg_delays = list() 94 | for dir in dirs: 95 | with open(dir + "/results/results.dat") as fp: 96 | data = json.load(fp) 97 | avg_delays.append(data['avg_delay']) 98 | plot_avg_delay_graph(avg_delays, args.x_label, args.title, path=args.path) 99 | 100 | 101 | if __name__ == "__main__": 102 | 103 | parser = argparse.ArgumentParser(description='Plot Results for Mobile Edge Computing') 104 | parser.add_argument('--type', type=str, default='cost', 105 | help='plot type: {cost, dropped, delay} (default: cost)') 106 | parser.add_argument('--path', type=str, default=None, 107 | help='path to results directory (default: None)') 108 | parser.add_argument('--window', type=int, default=50, 109 | help='moving average window size (default: 50)') 110 | parser.add_argument('--x_label', type=str, default=None, 111 | help='x_label for dropper task and avg. delay plots') 112 | parser.add_argument('--title', type=str, default='Title', 113 | help='plot title (default: Title)') 114 | args = parser.parse_args() 115 | 116 | main(args) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import random 4 | import time 5 | import os 6 | import argparse 7 | import json 8 | import matplotlib.pyplot as plt 9 | 10 | from datetime import datetime 11 | from shutil import rmtree 12 | 13 | from fog_env import Offload 14 | from RL_brain import DeepQNetwork 15 | from utils import plot_graphs 16 | 17 | np.set_printoptions(threshold=np.inf) 18 | 19 | 20 | def random_pick(some_list, probabilities): 21 | x = random.uniform(0, 1) 22 | cumulative_probability = 0.0 23 | for item, item_probability in zip(some_list, probabilities): 24 | cumulative_probability += item_probability 25 | if x < cumulative_probability: 26 | break 27 | return item 28 | 29 | 30 | def reward_fun(delay, max_delay, unfinish_indi): 31 | # still use reward, but use the negative value 32 | if unfinish_indi: 33 | reward = - max_delay * 2 34 | else: 35 | reward = - delay 36 | 37 | return reward 38 | 39 | 40 | def train(env, iot_RL_list, num_episodes, learning_freq=10, show=False, random=False, 41 | training_dir=None): 42 | start_time = time.time() 43 | 44 | RL_step = 0 45 | 46 | episode_rewards = list() 47 | episode_dropped = list() 48 | episode_delay = list() 49 | 50 | fig, axs = plt.subplots(3, figsize=(10, 12), sharex=True) 51 | 52 | for episode in range(num_episodes): 53 | # BITRATE ARRIVAL 54 | bitarrive = np.random.uniform(env.min_bit_arrive, env.max_bit_arrive, 55 | size=[env.n_time, env.n_iot]) 56 | task_prob = env.task_arrive_prob 57 | bitarrive = bitarrive * ( 58 | np.random.uniform(0, 1, size=[env.n_time, env.n_iot]) < task_prob) 59 | bitarrive[-env.max_delay:, :] = np.zeros([env.max_delay, env.n_iot]) 60 | 61 | # rewards_dict = {d: [] for d in range(env.n_iot)} 62 | rewards_list = list() 63 | dropped_list = list() 64 | delay_list = list() 65 | 66 | # ============================================================================= # 67 | # ========================================= DRL =============================== # 68 | # ============================================================================= # 69 | 70 | # OBSERVATION MATRIX SETTING 71 | history = list() 72 | for time_index in range(env.n_time): 73 | history.append(list()) 74 | for iot_index in range(env.n_iot): 75 | tmp_dict = {'observation': np.zeros(env.n_features), 76 | 'lstm': np.zeros(env.n_lstm_state), 77 | 'action': np.nan, 78 | 'observation_': np.zeros(env.n_features), 79 | 'lstm_': np.zeros(env.n_lstm_state)} 80 | history[time_index].append(tmp_dict) 81 | reward_indicator = np.zeros([env.n_time, env.n_iot]) 82 | 83 | # INITIALIZE OBSERVATION 84 | observation_all, lstm_state_all = env.reset(bitarrive) 85 | 86 | # TRAIN DRL 87 | while True: 88 | 89 | # PERFORM ACTION 90 | action_all = np.zeros([env.n_iot]) 91 | for iot_index in range(env.n_iot): 92 | 93 | observation = np.squeeze(observation_all[iot_index, :]) 94 | 95 | if np.sum(observation) == 0: 96 | # if there is no task, action = 0 (also need to be stored) 97 | action_all[iot_index] = 0 98 | else: 99 | if random: # Follow a random action 100 | action_all[iot_index] = np.random.randint(env.n_actions) 101 | else: # Follow RL agent action 102 | action_all[iot_index] = \ 103 | iot_RL_list[iot_index].choose_action(observation) 104 | 105 | if observation[0] != 0: 106 | iot_RL_list[iot_index].do_store_action(episode, env.time_count, 107 | action_all[iot_index]) 108 | 109 | # OBSERVE THE NEXT STATE AND PROCESS DELAY (REWARD) 110 | observation_all_, lstm_state_all_, done = env.step(action_all) 111 | 112 | # should store this information in EACH time slot 113 | for iot_index in range(env.n_iot): 114 | iot_RL_list[iot_index].update_lstm(lstm_state_all_[iot_index, :]) 115 | 116 | process_delay = env.process_delay 117 | unfinish_indi = env.process_delay_unfinish_ind 118 | 119 | # STORE MEMORY; STORE TRANSITION IF THE TASK PROCESS DELAY IS JUST UPDATED 120 | for iot_index in range(env.n_iot): 121 | 122 | history[env.time_count - 1][iot_index]['observation'] = \ 123 | observation_all[iot_index, :] 124 | history[env.time_count - 1][iot_index]['lstm'] = \ 125 | np.squeeze(lstm_state_all[iot_index, :]) 126 | history[env.time_count - 1][iot_index]['action'] = action_all[iot_index] 127 | history[env.time_count - 1][iot_index]['observation_'] = \ 128 | observation_all_[iot_index] 129 | history[env.time_count - 1][iot_index]['lstm_'] = \ 130 | np.squeeze(lstm_state_all_[iot_index, :]) 131 | 132 | update_index = np.where((1 - reward_indicator[:, iot_index]) * 133 | process_delay[:, iot_index] > 0)[0] 134 | 135 | if len(update_index) != 0: 136 | for update_ii in range(len(update_index)): 137 | time_index = update_index[update_ii] 138 | 139 | reward = reward_fun( 140 | process_delay[time_index, iot_index], env.max_delay, 141 | unfinish_indi[time_index, iot_index]) 142 | 143 | dropped_list.append(unfinish_indi[time_index, iot_index]) 144 | if not unfinish_indi[time_index, iot_index]: 145 | delay_list.append(process_delay[time_index, iot_index]) 146 | 147 | iot_RL_list[iot_index].store_transition( 148 | history[time_index][iot_index]['observation'], 149 | history[time_index][iot_index]['lstm'], 150 | history[time_index][iot_index]['action'], 151 | reward, 152 | history[time_index][iot_index]['observation_'], 153 | history[time_index][iot_index]['lstm_']) 154 | 155 | iot_RL_list[iot_index].do_store_reward( 156 | episode, time_index, reward) 157 | 158 | iot_RL_list[iot_index].do_store_delay( 159 | episode, time_index, process_delay[time_index, iot_index]) 160 | 161 | reward_indicator[time_index, iot_index] = 1 162 | 163 | # rewards_dict[iot_index].append(-reward) 164 | rewards_list.append(-reward) 165 | 166 | # ADD STEP (one step does not mean one store) 167 | RL_step += 1 168 | 169 | # UPDATE OBSERVATION 170 | observation_all = observation_all_ 171 | lstm_state_all = lstm_state_all_ 172 | 173 | # CONTROL LEARNING START TIME AND FREQUENCY 174 | if (RL_step > 200) and (RL_step % learning_freq == 0): 175 | for iot in range(env.n_iot): 176 | iot_RL_list[iot].learn() 177 | 178 | # GAME ENDS 179 | if done: 180 | break 181 | 182 | avg_reward = np.mean(rewards_list)/env.n_iot 183 | episode_rewards.append(avg_reward) 184 | 185 | dropped_ratio = np.mean(dropped_list)/env.n_iot 186 | episode_dropped.append(dropped_ratio) 187 | 188 | avg_delay = np.mean(delay_list)/env.n_iot 189 | episode_delay.append(avg_delay) 190 | 191 | print(f"Episode: {episode} - Reward: {avg_reward} - Dropped: {dropped_ratio} - " 192 | + f"Delay: {avg_delay}") 193 | 194 | if episode % 10 == 0: 195 | plot_graphs(axs, episode_rewards, episode_dropped, episode_delay, show=show, 196 | save=True, path=training_dir) 197 | 198 | # ============================================================================ # 199 | # ======================================== DRL END============================ # 200 | # ============================================================================ # 201 | 202 | plot_graphs(axs, episode_rewards, episode_dropped, episode_delay, show=show, 203 | save=True, path=training_dir) 204 | 205 | end_time = time.time() 206 | print("\nTraining Time: %.2f(s)" % (end_time - start_time)) 207 | input("Completed training.\nPress Enter to Finish") 208 | 209 | 210 | def evaluate(env, iot_RL_list, num_episodes, random=False, training_dir=None, 211 | plot_x=None): 212 | episode_rewards = list() 213 | episode_dropped = list() 214 | episode_delay = list() 215 | 216 | for episode in range(num_episodes): 217 | # BITRATE ARRIVAL 218 | bitarrive = np.random.uniform(env.min_bit_arrive, env.max_bit_arrive, 219 | size=[env.n_time, env.n_iot]) 220 | task_prob = env.task_arrive_prob 221 | bitarrive = bitarrive * ( 222 | np.random.uniform(0, 1, size=[env.n_time, env.n_iot]) < task_prob) 223 | bitarrive[-env.max_delay:, :] = np.zeros([env.max_delay, env.n_iot]) 224 | 225 | # rewards_dict = {d: [] for d in range(env.n_iot)} 226 | rewards_list = list() 227 | dropped_list = list() 228 | delay_list = list() 229 | 230 | reward_indicator = np.zeros([env.n_time, env.n_iot]) 231 | 232 | # INITIALIZE OBSERVATION 233 | observation_all, lstm_state_all = env.reset(bitarrive) 234 | 235 | # Episode until done 236 | while True: 237 | 238 | # PERFORM ACTION 239 | action_all = np.zeros([env.n_iot]) 240 | for iot_index in range(env.n_iot): 241 | 242 | observation = np.squeeze(observation_all[iot_index, :]) 243 | 244 | if np.sum(observation) == 0: 245 | # if there is no task, action = 0 (also need to be stored) 246 | action_all[iot_index] = 0 247 | else: 248 | if random: # Follow a random action 249 | action_all[iot_index] = np.random.randint(env.n_actions) 250 | else: # Follow RL agent action 251 | action_all[iot_index] = \ 252 | iot_RL_list[iot_index].choose_action(observation, 253 | inference=True) 254 | 255 | if observation[0] != 0: 256 | iot_RL_list[iot_index].do_store_action(episode, env.time_count, 257 | action_all[iot_index]) 258 | 259 | # OBSERVE THE NEXT STATE AND PROCESS DELAY (REWARD) 260 | observation_all_, lstm_state_all_, done = env.step(action_all) 261 | 262 | process_delay = env.process_delay 263 | unfinish_indi = env.process_delay_unfinish_ind 264 | 265 | # STORE MEMORY; STORE TRANSITION IF THE TASK PROCESS DELAY IS JUST UPDATED 266 | for iot_index in range(env.n_iot): 267 | update_index = np.where((1 - reward_indicator[:, iot_index]) * 268 | process_delay[:, iot_index] > 0)[0] 269 | 270 | if len(update_index) != 0: 271 | for update_ii in range(len(update_index)): 272 | time_index = update_index[update_ii] 273 | 274 | reward = reward_fun( 275 | process_delay[time_index, iot_index], env.max_delay, 276 | unfinish_indi[time_index, iot_index]) 277 | 278 | dropped_list.append(unfinish_indi[time_index, iot_index]) 279 | if not unfinish_indi[time_index, iot_index]: 280 | delay_list.append(process_delay[time_index, iot_index]) 281 | 282 | reward_indicator[time_index, iot_index] = 1 283 | 284 | rewards_list.append(-reward) 285 | 286 | # UPDATE OBSERVATION 287 | observation_all = observation_all_ 288 | 289 | # GAME ENDS 290 | if done: 291 | break 292 | 293 | avg_reward = np.mean(rewards_list)/env.n_iot 294 | episode_rewards.append(avg_reward) 295 | 296 | dropped_ratio = np.mean(dropped_list)/env.n_iot 297 | episode_dropped.append(dropped_ratio) 298 | 299 | avg_delay = np.mean(delay_list)/env.n_iot 300 | episode_delay.append(avg_delay) 301 | 302 | avg_episode_rewards = np.mean(episode_rewards) 303 | avg_episode_dropped = np.mean(episode_dropped) 304 | avg_episode_delay = np.mean(episode_delay) 305 | 306 | print(f"\nAvg. Eval Reward: {avg_episode_rewards} - " + 307 | f"Avg. Eval Dropped: {avg_episode_dropped} - " + 308 | f"Avg. Eval Delay: {avg_episode_delay}") 309 | 310 | eval_results = dict() 311 | eval_results['avg_rewards'] = (plot_x, avg_episode_rewards) 312 | eval_results['avg_dropped'] = (plot_x, avg_episode_dropped) 313 | eval_results['avg_delay'] = (plot_x, avg_episode_delay) 314 | 315 | with open(training_dir + 'results/results.dat', 'w') as jf: 316 | json.dump(eval_results, jf, indent=4) 317 | 318 | input("Completed Evaluation") 319 | 320 | 321 | def main(args): 322 | # Set random generator seed 323 | tf.set_random_seed(args.seed) 324 | np.random.seed(args.seed) 325 | random.seed(args.seed) 326 | 327 | # Create a timestamp directory to save model, parameter and log files 328 | training_dir = \ 329 | ('training/' + ('' if args.path is None else args.path + '/') + 330 | str(datetime.now().date()) + '_' + str(datetime.now().hour).zfill(2) + '-' + 331 | str(datetime.now().minute).zfill(2) + '/') 332 | 333 | # Delete if a directory with the same name already exists 334 | if os.path.exists(training_dir): 335 | rmtree(training_dir) 336 | 337 | # Create empty directories for saving model, parameter and log files 338 | os.makedirs(training_dir) 339 | os.makedirs(training_dir + 'plots') 340 | os.makedirs(training_dir + 'results') 341 | os.makedirs(training_dir + 'params') 342 | 343 | # Dump params to file 344 | with open(training_dir + 'params/params.dat', 'w') as jf: 345 | json.dump(vars(args), jf, indent=4) 346 | 347 | plot_dict = {'color': args.plot_color, 'label': args.plot_label} 348 | with open(training_dir + 'plots/plot_props.dat', 'w') as jf: 349 | json.dump(plot_dict, jf, indent=4) 350 | 351 | # GENERATE ENVIRONMENT 352 | env = Offload(args.num_iot, args.num_fog, NUM_TIME, MAX_DELAY, args.task_arrival_prob) 353 | 354 | # GENERATE MULTIPLE CLASSES FOR RL 355 | iot_RL_list = list() 356 | for iot in range(args.num_iot): 357 | iot_RL_list.append(DeepQNetwork(env.n_actions, env.n_features, env.n_lstm_state, 358 | env.n_time, 359 | learning_rate=args.lr, 360 | reward_decay=0.9, 361 | e_greedy=0.99, 362 | replace_target_iter=200, # update target net 363 | memory_size=500, # maximum of memory 364 | batch_size=args.batch_size, 365 | optimizer=args.optimizer, 366 | seed=args.seed, 367 | )) 368 | 369 | # TRAIN THE SYSTEM 370 | train(env, iot_RL_list, args.num_episodes, args.learning_freq, args.plot, args.random, 371 | training_dir) 372 | print('Training Finished') 373 | 374 | if args.training_var is not None: 375 | if args.training_var == 'lr': 376 | plot_x = args.lr 377 | elif args.training_var == 'batch_size': 378 | plot_x = args.batch_size 379 | elif args.training_var == 'optimizer': 380 | plot_x = args.optimizer 381 | elif args.training_var == 'learning_freq': 382 | plot_x = args.learning_freq 383 | elif args.training_var == 'task_arrival_prob': 384 | plot_x = args.task_arrival_prob 385 | elif args.training_var == 'num_iot': 386 | plot_x = args.num_iot 387 | else: 388 | plot_x = None 389 | 390 | evaluate(env, iot_RL_list, 20, args.random, training_dir, plot_x) 391 | 392 | 393 | if __name__ == "__main__": 394 | 395 | NUM_TIME_BASE = 100 396 | MAX_DELAY = 10 397 | NUM_TIME = NUM_TIME_BASE + MAX_DELAY 398 | 399 | parser = argparse.ArgumentParser(description='DQL for Mobile Edge Computing') 400 | parser.add_argument('--num_iot', type=int, default=50, 401 | help='number of IOT devices (default: 50)') 402 | parser.add_argument('--num_fog', type=int, default=5, 403 | help='number of FOG stations (default: 5)') 404 | parser.add_argument('--task_arrival_prob', type=float, default=0.3, 405 | help='Task Arrival Probability (default: 0.3)') 406 | parser.add_argument('--num_episodes', type=int, default=1000, 407 | help='number of training episodes (default: 1000)') 408 | parser.add_argument('--batch_size', type=int, default=32, 409 | help='input batch size for training (default: 32)') 410 | parser.add_argument('--lr', type=float, default=0.001, 411 | help='learning rate for optimizer (default: 0.001)') 412 | parser.add_argument('--optimizer', type=str, default='rms_prop', 413 | help='optimizer for updating the NN (default: rms_prop)') 414 | parser.add_argument('--learning_freq', type=int, default=10, 415 | help='frequency of updating main/eval network (default: 10)') 416 | parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)') 417 | parser.add_argument('--plot', default=False, action='store_true', 418 | help='plot learning curve (default: False)') 419 | parser.add_argument('--random', default=False, action='store_true', 420 | help='follow a random policy (default: False)') 421 | parser.add_argument('--path', type=str, default=None, 422 | help='path postfix for saving training results (default: None)') 423 | parser.add_argument('--training_var', type=str, default=None, 424 | help='training variant: {lr, task_prob, num_iot, ...}') 425 | parser.add_argument('--plot_color', type=str, default='red', 426 | help='plot color (default: red)') 427 | parser.add_argument('--plot_label', type=str, default='X', 428 | help='plot label (default: X)') 429 | args = parser.parse_args() 430 | 431 | main(args) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | 5 | def plot_graphs(axs, train_cost, train_dropped, train_delay, show=False, save=False, 6 | path=None): 7 | x = np.arange(len(train_cost)).tolist() 8 | axs[0].clear() 9 | axs[0].plot(x, train_cost, color='red', label='Training') 10 | axs[0].set(title='Avg. Cost') 11 | axs[0].set(ylabel='Avg. Cost') 12 | axs[0].set(xlabel='Episode') 13 | axs[0].legend(loc='upper right') 14 | 15 | axs[1].clear() 16 | axs[1].plot(x, train_dropped, color='blue', label='Training') 17 | axs[1].set(title='Ratio of Dropped Tasks') 18 | axs[1].set(ylabel='Dropped Ratio') 19 | axs[1].set(xlabel='Episode') 20 | axs[1].legend(loc='upper right') 21 | 22 | axs[2].clear() 23 | axs[2].plot(x, train_delay, color='green', label='Training') 24 | axs[2].set(title='Avg. Task Delay') 25 | axs[2].set(ylabel='Avg. Delay (Sec)') 26 | axs[2].set(xlabel='Episode') 27 | axs[2].legend(loc='upper right') 28 | 29 | if save: 30 | plt.savefig(path + "plots/learning_curves.png") 31 | 32 | with open(path + 'plots/avg_cost.npy', 'wb') as f: 33 | np.save(f, np.array(train_cost)) 34 | 35 | with open(path + 'plots/dropped_ratio.npy', 'wb') as f: 36 | np.save(f, np.array(train_dropped)) 37 | 38 | with open(path + 'plots/avg_delay.npy', 'wb') as f: 39 | np.save(f, np.array(train_delay)) 40 | 41 | if show: 42 | plt.show(block=False) 43 | plt.pause(0.01) --------------------------------------------------------------------------------