├── README.md
├── RL_brain.py
├── ResearchReport.pdf
├── citation.txt
├── fog_env.py
├── plot.py
├── train.py
└── utils.py


/README.md:
--------------------------------------------------------------------------------
1 | # Deep-Q-learning-for-mobile-edge-computing
2 | 
3 | More details can be found at https://ieeexplore.ieee.org/document/9253665
4 | 
5 | To run the code, please install tensorflow 1.4.0. File train.py is the main code. File fog_env.py contains the code for mobile edge computing environment. File RL_brain.py contains the code for deep reinforcement learning.
6 | 
7 | If you use this code for research, please cite the following paper:
8 | Ming Tang and Vincent W.S. Wong, “Deep Reinforcement Learning for Task Offloading in Mobile Edge Computing Systems,” IEEE Transactions on Mobile Computing, 2020 (Early Access).
9 | 


--------------------------------------------------------------------------------
/RL_brain.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from collections import deque
  4 | 
  5 | 
  6 | class DeepQNetwork:
  7 | 
  8 |     def __init__(self,
  9 |                  n_actions,                  # the number of actions
 10 |                  n_features,
 11 |                  n_lstm_features,
 12 |                  n_time,
 13 |                  learning_rate=0.01,
 14 |                  reward_decay=0.9,
 15 |                  e_greedy=0.99,
 16 |                  replace_target_iter=200,  # each 200 steps, update target net
 17 |                  memory_size=500,  # maximum of memory
 18 |                  batch_size=32,
 19 |                  e_greedy_increment=0.00025,
 20 |                  n_lstm_step=10,
 21 |                  dueling=True,
 22 |                  double_q=True,
 23 |                  N_L1=20,
 24 |                  N_lstm=20,
 25 |                  optimizer='rms_prop',
 26 |                  seed=0):
 27 | 
 28 |         self.n_actions = n_actions
 29 |         self.n_features = n_features
 30 |         self.n_time = n_time
 31 |         self.lr = learning_rate
 32 |         self.gamma = reward_decay
 33 |         self.epsilon_max = e_greedy
 34 |         self.replace_target_iter = replace_target_iter
 35 |         self.memory_size = memory_size
 36 |         self.batch_size = batch_size    # select self.batch_size number of time sequence for learning
 37 |         self.epsilon_increment = e_greedy_increment
 38 |         self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
 39 |         self.dueling = dueling
 40 |         self.double_q = double_q
 41 |         self.learn_step_counter = 0
 42 |         self.N_L1 = N_L1
 43 |         self.seed = seed
 44 | 
 45 |         if optimizer not in ['adam', 'gd', 'rms_prop']:
 46 |             raise SystemExit(
 47 |                 "Invalid optimizer: {optimizer}.\nChoose one of " +
 48 |                 "['adam', 'rms_prop', 'gd'], via CLI with flag --optimizer")
 49 |         else:
 50 |             self.optimizer = optimizer
 51 | 
 52 |         # lstm
 53 |         self.N_lstm = N_lstm
 54 |         self.n_lstm_step = n_lstm_step       # step_size in lstm
 55 |         self.n_lstm_state = n_lstm_features  # [fog1, fog2, ...., fogn, M_n(t)]
 56 | 
 57 |         # initialize zero memory np.hstack((s, [a, r], s_, lstm_s, lstm_s_))
 58 |         self.memory = np.zeros((self.memory_size, self.n_features + 1 + 1
 59 |                                 + self.n_features + self.n_lstm_state + self.n_lstm_state))
 60 | 
 61 |         # consist of [target_net, evaluate_net]
 62 |         self._build_net(optimizer=self.optimizer, seed=self.seed)
 63 | 
 64 |         # replace the parameters in target net
 65 |         t_params = tf.get_collection('target_net_params')  # obtain the parameters in target_net
 66 |         e_params = tf.get_collection('eval_net_params')  # obtain the parameters in eval_net
 67 |         self.replace_target_op = [tf.assign(t, e) for t, e in
 68 |                                   zip(t_params, e_params)]  # update the parameters in target_net
 69 | 
 70 |         self.sess = tf.Session()
 71 | 
 72 |         self.sess.run(tf.global_variables_initializer())
 73 |         self.reward_store = list()
 74 |         self.action_store = list()
 75 |         self.delay_store = list()
 76 | 
 77 |         self.lstm_history = deque(maxlen=self.n_lstm_step)
 78 |         for ii in range(self.n_lstm_step):
 79 |             self.lstm_history.append(np.zeros([self.n_lstm_state]))
 80 | 
 81 |         self.store_q_value = list()
 82 | 
 83 |     def _build_net(self, optimizer='rms_prop', seed=0):
 84 | 
 85 |         tf.reset_default_graph()
 86 |         tf.set_random_seed(seed)
 87 | 
 88 |         def build_layers(s,lstm_s,c_names, n_l1, n_lstm, w_initializer, b_initializer):
 89 | 
 90 |             # lstm for load levels
 91 |             with tf.variable_scope('l0'):
 92 |                 lstm_dnn = tf.contrib.rnn.BasicLSTMCell(n_lstm)
 93 |                 lstm_dnn.zero_state(self.batch_size, tf.float32)
 94 |                 lstm_output,lstm_state = tf.nn.dynamic_rnn(lstm_dnn, lstm_s, dtype=tf.float32)
 95 |                 lstm_output_reduced = tf.reshape(lstm_output[:, -1, :], shape=[-1, n_lstm])
 96 | 
 97 |             # first layer
 98 |             with tf.variable_scope('l1'):
 99 |                 w1 = tf.get_variable('w1',[n_lstm + self.n_features, n_l1], initializer=w_initializer,
100 |                                      collections=c_names)
101 |                 b1 = tf.get_variable('b1',[1,n_l1],initializer=b_initializer, collections=c_names)
102 |                 l1 = tf.nn.relu(tf.matmul(tf.concat([lstm_output_reduced, s],1), w1) + b1)
103 | 
104 |             # second layer
105 |             with tf.variable_scope('l12'):
106 |                 w12 = tf.get_variable('w12', [n_l1, n_l1], initializer=w_initializer,
107 |                                          collections=c_names)
108 |                 b12 = tf.get_variable('b12', [1, n_l1], initializer=b_initializer, collections=c_names)
109 |                 l12 = tf.nn.relu(tf.matmul(l1, w12) + b12)
110 | 
111 |             # the second layer is different
112 |             if self.dueling:
113 |                 # Dueling DQN
114 |                 # a single output n_l1 -> 1
115 |                 with tf.variable_scope('Value'):
116 |                     w2 = tf.get_variable('w2',[n_l1,1],initializer=w_initializer,collections=c_names)
117 |                     b2 = tf.get_variable('b2',[1,1],initializer=b_initializer,collections=c_names)
118 |                     self.V = tf.matmul(l12,w2) + b2
119 |                 # n_l1 -> n_actions
120 |                 with tf.variable_scope('Advantage'):
121 |                     w2 = tf.get_variable('w2',[n_l1,self.n_actions],initializer=w_initializer,collections=c_names)
122 |                     b2 = tf.get_variable('b2',[1,self.n_actions],initializer=b_initializer,collections=c_names)
123 |                     self.A = tf.matmul(l12,w2) + b2
124 | 
125 |                 with tf.variable_scope('Q'):
126 |                     out = self.V + (self.A - tf.reduce_mean(self.A,axis=1,keep_dims=True))  # Q = V(s) +A(s,a)
127 | 
128 |             else:
129 |                 with tf.variable_scope('Q'):
130 |                     w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
131 |                     b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
132 |                     out = tf.matmul(l1, w2) + b2
133 | 
134 |             return out
135 | 
136 |         # input for eval_net
137 |         self.s = tf.placeholder(tf.float32,[None,self.n_features], name = 's')  # state (observation)
138 |         self.lstm_s = tf.placeholder(tf.float32,[None,self.n_lstm_step,self.n_lstm_state], name='lstm1_s')
139 | 
140 |         self.q_target = tf.placeholder(tf.float32,[None,self.n_actions], name = 'Q_target') # q_target
141 | 
142 |         # input for target_net
143 |         self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')
144 |         self.lstm_s_ = tf.placeholder(tf.float32,[None,self.n_lstm_step,self.n_lstm_state], name='lstm1_s_')
145 | 
146 |         # generate EVAL_NET, update parameters
147 |         with tf.variable_scope('eval_net'):
148 | 
149 |             # c_names(collections_names), will be used when update target_net
150 |             # tf.random_normal_initializer(mean=0.0, stddev=1.0, seed=None, dtype=tf.float32), return a initializer
151 |             c_names, n_l1, n_lstm, w_initializer, b_initializer =  \
152 |                 ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], self.N_L1, self.N_lstm,\
153 |                 tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers
154 | 
155 |             # input (n_feature) -> l1 (n_l1) -> l2 (n_actions)
156 |             self.q_eval = build_layers(self.s, self.lstm_s, c_names, n_l1, n_lstm, w_initializer, b_initializer)
157 | 
158 |         # generate TARGET_NET
159 |         with tf.variable_scope('target_net'):
160 |             c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
161 | 
162 |             self.q_next = build_layers(self.s_, self.lstm_s_, c_names, n_l1, n_lstm, w_initializer, b_initializer)
163 | 
164 |         # loss and train
165 |         with tf.variable_scope('loss'):
166 |             self.loss = tf.reduce_mean(tf.squared_difference(self.q_target,self.q_eval))
167 |         with tf.variable_scope('train'):
168 |             if optimizer == 'rms_prop':
169 |                 self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
170 |             elif optimizer == 'adam':
171 |                 self._train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
172 |             elif optimizer == 'gd':
173 |                 self._train_op = \
174 |                     tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss)
175 | 
176 |     def store_transition(self, s, lstm_s,  a, r, s_, lstm_s_):
177 |         # RL.store_transition(observation,action,reward,observation_)
178 |         # hasattr(object, name), if object has name attribute
179 |         if not hasattr(self, 'memory_counter'):
180 |             self.memory_counter = 0
181 | 
182 |         # store np.hstack((s, [a, r], s_, lstm_s, lstm_s_))
183 |         transition = np.hstack((s, [a, r], s_, lstm_s, lstm_s_))  # stack in horizontal direction
184 | 
185 |         # if memory overflows, replace old memory with new one
186 |         index = self.memory_counter % self.memory_size
187 |         # print(transition)
188 |         self.memory[index, :] = transition
189 |         self.memory_counter += 1
190 | 
191 |     def update_lstm(self, lstm_s):
192 | 
193 |         self.lstm_history.append(lstm_s)
194 | 
195 |     def choose_action(self, observation, inference=False):
196 |         # the shape of the observation (1, size_of_observation)
197 |         # x1 = np.array([1, 2, 3, 4, 5]), x1_new = x1[np.newaxis, :], now, the shape of x1_new is (1, 5)
198 |         observation = observation[np.newaxis, :]
199 | 
200 |         if inference or np.random.uniform() < self.epsilon:
201 | 
202 |             # lstm only contains history, there is no current observation
203 |             lstm_observation = np.array(self.lstm_history)
204 | 
205 |             actions_value = self.sess.run(self.q_eval,
206 |                                           feed_dict={self.s: observation,
207 |                                                      self.lstm_s: lstm_observation.reshape(1, self.n_lstm_step,
208 |                                                                                            self.n_lstm_state),
209 |                                                      })
210 | 
211 |             self.store_q_value.append({'observation': observation, 'q_value': actions_value})
212 | 
213 |             action = np.argmax(actions_value)
214 | 
215 |         else:
216 | 
217 |             action = np.random.randint(0, self.n_actions)
218 | 
219 |         return action
220 | 
221 |     def learn(self):
222 | 
223 |         # check if replace target_net parameters
224 |         if self.learn_step_counter % self.replace_target_iter == 0:
225 |             # run the self.replace_target_op in __int__
226 |             self.sess.run(self.replace_target_op)
227 |             # print(f"{self.learn_step_counter}: target_params_replaced\n")
228 | 
229 |         # randomly pick [batch_size] memory from memory np.hstack((s, [a, r], s_, lstm_s, lstm_s_))
230 |         if self.memory_counter > self.memory_size:
231 |             sample_index = np.random.choice(self.memory_size - self.n_lstm_step, size=self.batch_size)
232 |         else:
233 |             sample_index = np.random.choice(self.memory_counter - self.n_lstm_step, size=self.batch_size)\
234 | 
235 |         #  transition = np.hstack(s, [a, r], s_, lstm_s, lstm_s_)
236 |         batch_memory = self.memory[sample_index, :self.n_features+1+1+self.n_features]
237 |         lstm_batch_memory = np.zeros([self.batch_size, self.n_lstm_step, self.n_lstm_state * 2])
238 |         for ii in range(len(sample_index)):
239 |             for jj in range(self.n_lstm_step):
240 |                 lstm_batch_memory[ii,jj,:] = self.memory[sample_index[ii]+jj,
241 |                                               self.n_features+1+1+self.n_features:]
242 | 
243 |         # obtain q_next (from target_net) (to q_target) and q_eval (from eval_net)
244 |         # minimize（target_q - q_eval）^2
245 |         # q_target = reward + gamma * q_next
246 |         # in the size of bacth_memory
247 |         # q_next, given the next state from batch, what will be the q_next from q_next
248 |         # q_eval4next, given the next state from batch, what will be the q_eval4next from q_eval
249 |         q_next, q_eval4next = self.sess.run(
250 |             [self.q_next, self.q_eval],  # output
251 |             feed_dict={
252 |                 # [s, a, r, s_]
253 |                 # input for target_q (last)
254 |                 self.s_: batch_memory[:, -self.n_features:], self.lstm_s_: lstm_batch_memory[:,:,self.n_lstm_state:],
255 |                 # input for eval_q (last)
256 |                 self.s: batch_memory[:, -self.n_features:], self.lstm_s: lstm_batch_memory[:,:,self.n_lstm_state:],
257 |             }
258 |         )
259 |         # q_eval, given the current state from batch, what will be the q_eval from q_eval
260 |         q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features],
261 |                                                  self.lstm_s: lstm_batch_memory[:,:,:self.n_lstm_state]})
262 |         q_target = q_eval.copy()
263 |         batch_index = np.arange(self.batch_size, dtype=np.int32)
264 |         eval_act_index = batch_memory[:, self.n_features].astype(int)  # action with a single value (int action)
265 |         reward = batch_memory[:, self.n_features + 1]  # reward with a single value
266 | 
267 |         # update the q_target at the particular batch at the correponding action
268 |         if self.double_q:
269 |             max_act4next = np.argmax(q_eval4next, axis=1)
270 |             selected_q_next = q_next[batch_index, max_act4next]
271 |         else:
272 |             selected_q_next = np.max(q_next, axis=1)
273 | 
274 |         q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next
275 | 
276 |         # both self.s and self.q_target belong to eval_q
277 |         # input self.s and self.q_target, output self._train_op, self.loss (to minimize the gap)
278 |         # self.sess.run: given input (feed), output the required element
279 |         _, self.cost = self.sess.run([self._train_op, self.loss],
280 |                                      feed_dict={self.s: batch_memory[:, :self.n_features],
281 |                                                 self.lstm_s: lstm_batch_memory[:, :, :self.n_lstm_state],
282 |                                                 self.q_target: q_target})
283 | 
284 |         # gradually increase epsilon
285 |         self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
286 |         self.learn_step_counter += 1
287 | 
288 |     def do_store_reward(self, episode, time, reward):
289 |         while episode >= len(self.reward_store):
290 |             self.reward_store.append(np.zeros([self.n_time]))
291 |         self.reward_store[episode][time] = reward
292 | 
293 |     def do_store_action(self,episode,time, action):
294 |         while episode >= len(self.action_store):
295 |             self.action_store.append(- np.ones([self.n_time]))
296 |         self.action_store[episode][time] = action
297 | 
298 |     def do_store_delay(self, episode, time, delay):
299 |         while episode >= len(self.delay_store):
300 |             self.delay_store.append(np.zeros([self.n_time]))
301 |         self.delay_store[episode][time] = delay


--------------------------------------------------------------------------------
/ResearchReport.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SZU-AdvTech-2024/059-Deep-Reinforcement-Learning-for-Task-Offloading-in-Mobile-Edge-Computing-Systems/c63a038ecd7254a0a7bb2ca7c787984e9a5ae2b3/ResearchReport.pdf


--------------------------------------------------------------------------------
/citation.txt:
--------------------------------------------------------------------------------
 1 | @article{REPO059,
 2 |     author = "Tang, Ming and Wong, Vincent WS",
 3 |     journal = "IEEE Transactions on Mobile Computing",
 4 |     number = "6",
 5 |     pages = "1985--1997",
 6 |     publisher = "IEEE",
 7 |     title = "{Deep reinforcement learning for task offloading in mobile edge computing systems}",
 8 |     volume = "21",
 9 |     year = "2020"
10 | }
11 | 


--------------------------------------------------------------------------------
/fog_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import math
  4 | import queue
  5 | 
  6 | class Offload:
  7 | 
  8 |     def __init__(self, num_iot, num_fog, num_time, max_delay, task_arrive_prob):
  9 | 
 10 |         # INPUT DATA
 11 |         self.n_iot = num_iot
 12 |         self.n_fog = num_fog
 13 |         self.n_time = num_time
 14 |         self.duration = 0.1
 15 | 
 16 |         # test
 17 |         self.drop_trans_count = 0
 18 |         self.drop_fog_count = 0
 19 |         self.drop_iot_count = 0
 20 | 
 21 |         # CONSIDER A SCENARIO RANDOM IS NOT GOOD
 22 |         # LOCAL CAP SHOULD NOT BE TOO SMALL, OTHERWISE, THE STATE MATRIX IS TOO LARGE (EXCEED THE MAXIMUM)
 23 |         # SHOULD NOT BE LESS THAN ONE
 24 |         self.comp_cap_iot = 2.5 * np.ones(self.n_iot) * self.duration  # 2.5 Gigacycles per second  * duration
 25 |         self.comp_cap_fog = 41.8 * np.ones([self.n_fog]) * self.duration  # Gigacycles per second * duration
 26 |         self.tran_cap_iot = 14 * np.ones([self.n_iot, self.n_fog]) * self.duration  # Mbps * duration
 27 |         self.comp_density = 0.297 * np.ones([self.n_iot])  # 0.297 Gigacycles per Mbits
 28 |         self.max_delay = max_delay # time slots
 29 | 
 30 |         # BITARRIVE_SET (MARKOVIAN)
 31 |         self.task_arrive_prob = task_arrive_prob # 0.3
 32 |         self.max_bit_arrive = 5 # Mbits
 33 |         self.min_bit_arrive = 2 # Mbits
 34 |         self.bitArrive_set = np.arange(self.min_bit_arrive, self.max_bit_arrive, 0.1)
 35 |         self.bitArrive = np.zeros([self.n_time, self.n_iot])
 36 | 
 37 |         # ACTION: 0, local; 1, fog 0; 2, fog 1; ...; n, fog n - 1
 38 |         self.n_actions = 1 + num_fog
 39 |         # STATE: [A, t^{comp}, t^{tran}, [B^{fog}]]
 40 |         self.n_features = 1 + 1 + 1 + num_fog
 41 |         # LSTM STATE
 42 |         self.n_lstm_state = self.n_fog  # [fog1, fog2, ...., fogn]
 43 | 
 44 |         # TIME COUNT
 45 |         self.time_count = int(0)
 46 | 
 47 |         # QUEUE INITIALIZATION: size -> task size; time -> arrive time
 48 |         self.Queue_iot_comp = list()
 49 |         self.Queue_iot_tran = list()
 50 |         self.Queue_fog_comp = list()
 51 | 
 52 |         for iot in range(self.n_iot):
 53 |             self.Queue_iot_comp.append(queue.Queue())
 54 |             self.Queue_iot_tran.append(queue.Queue())
 55 |             self.Queue_fog_comp.append(list())
 56 |             for fog in range(self.n_fog):
 57 |                 self.Queue_fog_comp[iot].append(queue.Queue())
 58 | 
 59 |         # QUEUE INFO INITIALIZATION
 60 |         self.t_iot_comp = - np.ones([self.n_iot])
 61 |         self.t_iot_tran = - np.ones([self.n_iot])
 62 |         self.b_fog_comp = np.zeros([self.n_iot, self.n_fog])
 63 | 
 64 |         # TASK INDICATOR
 65 |         self.task_on_process_local = list()
 66 |         self.task_on_transmit_local = list()
 67 |         self.task_on_process_fog = list()
 68 |         self.fog_iot_m = np.zeros(self.n_fog)
 69 |         self.fog_iot_m_observe = np.zeros(self.n_fog)
 70 | 
 71 |         for iot in range(self.n_iot):
 72 |             self.task_on_process_local.append({'size': np.nan, 'time': np.nan, 'remain': np.nan})
 73 |             self.task_on_transmit_local.append({'size': np.nan, 'time': np.nan,
 74 |                                                 'fog': np.nan, 'remain': np.nan})
 75 |             self.task_on_process_fog.append(list())
 76 |             for fog in range(self.n_fog):
 77 |                 self.task_on_process_fog[iot].append({'size': np.nan, 'time': np.nan, 'remain': np.nan})
 78 | 
 79 |         # TASK DELAY
 80 |         self.process_delay = np.zeros([self.n_time, self.n_iot])    # total delay
 81 |         self.process_delay_unfinish_ind = np.zeros([self.n_time, self.n_iot])  # unfinished indicator
 82 |         self.process_delay_trans = np.zeros([self.n_time, self.n_iot])  # transmission delay (if applied)
 83 | 
 84 |         self.fog_drop = np.zeros([self.n_iot, self.n_fog])
 85 | 
 86 |     # reset the network scenario
 87 |     def reset(self, bitArrive):
 88 | 
 89 |         # test
 90 |         self.drop_trans_count = 0
 91 |         self.drop_fog_count = 0
 92 |         self.drop_iot_count = 0
 93 | 
 94 |         # BITRATE
 95 |         self.bitArrive = bitArrive
 96 | 
 97 |         # TIME COUNT
 98 |         self.time_count = int(0)
 99 | 
100 |         # QUEUE INITIALIZATION
101 |         self.Queue_iot_comp = list()
102 |         self.Queue_iot_tran = list()
103 |         self.Queue_fog_comp = list()
104 | 
105 |         for iot in range(self.n_iot):
106 |             self.Queue_iot_comp.append(queue.Queue())
107 |             self.Queue_iot_tran.append(queue.Queue())
108 |             self.Queue_fog_comp.append(list())
109 |             for fog in range(self.n_fog):
110 |                 self.Queue_fog_comp[iot].append(queue.Queue())
111 | 
112 |         # QUEUE INFO INITIALIZATION
113 |         self.t_iot_comp = - np.ones([self.n_iot])
114 |         self.t_iot_tran = - np.ones([self.n_iot])
115 |         self.b_fog_comp = np.zeros([self.n_iot, self.n_fog])
116 | 
117 |         # TASK INDICATOR
118 |         self.task_on_process_local = list()
119 |         self.task_on_transmit_local = list()
120 |         self.task_on_process_fog = list()
121 | 
122 |         for iot in range(self.n_iot):
123 |             self.task_on_process_local.append({'size': np.nan, 'time': np.nan, 'remain': np.nan})
124 |             self.task_on_transmit_local.append({'size': np.nan, 'time': np.nan,
125 |                                                 'fog': np.nan, 'remain': np.nan})
126 |             self.task_on_process_fog.append(list())
127 |             for fog in range(self.n_fog):
128 |                 self.task_on_process_fog[iot].append({'size': np.nan, 'time': np.nan, 'remain': np.nan})
129 | 
130 |         # TASK DELAY
131 |         self.process_delay = np.zeros([self.n_time, self.n_iot])
132 |         self.process_delay_unfinish_ind = np.zeros([self.n_time, self.n_iot])  # unfinished indicator
133 |         self.process_delay_trans = np.zeros([self.n_time, self.n_iot])  # transmission delay (if applied)
134 | 
135 |         self.fog_drop = np.zeros([self.n_iot, self.n_fog])
136 | 
137 |         # INITIAL
138 |         observation_all = np.zeros([self.n_iot, self.n_features])
139 |         for iot_index in range(self.n_iot):
140 |             # observation is zero if there is no task arrival
141 |             if self.bitArrive[self.time_count, iot_index] != 0:
142 |                 # state [A, B^{comp}, B^{tran}, [B^{fog}]]
143 |                 observation_all[iot_index, :] = np.hstack([
144 |                     self.bitArrive[self.time_count, iot_index], self.t_iot_comp[iot_index],
145 |                     self.t_iot_tran[iot_index],
146 |                     np.squeeze(self.b_fog_comp[iot_index, :])])
147 | 
148 |         lstm_state_all = np.zeros([self.n_iot, self.n_lstm_state])
149 | 
150 |         return observation_all, lstm_state_all
151 | 
152 |     # perform action, observe state and delay (several steps later)
153 |     def step(self, action):
154 | 
155 |         # EXTRACT ACTION FOR EACH IOT
156 |         iot_action_local = np.zeros([self.n_iot], np.int32)
157 |         iot_action_fog = np.zeros([self.n_iot], np.int32)
158 |         for iot_index in range(self.n_iot):
159 |             iot_action = action[iot_index]
160 |             iot_action_fog[iot_index] = int(iot_action - 1)
161 |             if iot_action == 0:
162 |                 iot_action_local[iot_index] = 1
163 | 
164 |         # COMPUTATION QUEUE UPDATE ===================
165 |         for iot_index in range(self.n_iot):
166 | 
167 |             iot_bitarrive = np.squeeze(self.bitArrive[self.time_count, iot_index])
168 |             iot_comp_cap = np.squeeze(self.comp_cap_iot[iot_index])
169 |             iot_comp_density = self.comp_density[iot_index]
170 | 
171 |             # INPUT
172 |             if iot_action_local[iot_index] == 1:
173 |                 tmp_dict = {'size': iot_bitarrive, 'time': self.time_count}
174 |                 self.Queue_iot_comp[iot_index].put(tmp_dict)
175 | 
176 |             # TASK ON PROCESS
177 |             if math.isnan(self.task_on_process_local[iot_index]['remain']) \
178 |                     and (not self.Queue_iot_comp[iot_index].empty()):
179 |                 while not self.Queue_iot_comp[iot_index].empty():
180 |                     # only put the non-zero task to the processor
181 |                     get_task = self.Queue_iot_comp[iot_index].get()
182 |                     # since it is at the beginning of the time slot, = self.max_delay is acceptable
183 |                     if get_task['size'] != 0:
184 |                         if self.time_count - get_task['time'] + 1 <= self.max_delay:
185 |                             self.task_on_process_local[iot_index]['size'] = get_task['size']
186 |                             self.task_on_process_local[iot_index]['time'] = get_task['time']
187 |                             self.task_on_process_local[iot_index]['remain'] \
188 |                                 = self.task_on_process_local[iot_index]['size']
189 |                             break
190 |                         else:
191 |                             self.process_delay[get_task['time'], iot_index] = self.max_delay
192 |                             self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1
193 | 
194 |             # PROCESS
195 |             if self.task_on_process_local[iot_index]['remain'] > 0:
196 |                 self.task_on_process_local[iot_index]['remain'] = \
197 |                     self.task_on_process_local[iot_index]['remain'] - iot_comp_cap / iot_comp_density
198 |                 # if no remain, compute processing delay
199 |                 if self.task_on_process_local[iot_index]['remain'] <= 0:
200 |                     self.process_delay[self.task_on_process_local[iot_index]['time'], iot_index] \
201 |                         = self.time_count - self.task_on_process_local[iot_index]['time'] + 1
202 |                     self.task_on_process_local[iot_index]['remain'] = np.nan
203 |                 elif self.time_count - self.task_on_process_local[iot_index]['time'] + 1 == self.max_delay:
204 |                     self.process_delay[self.task_on_process_local[iot_index]['time'], iot_index] = self.max_delay
205 |                     self.process_delay_unfinish_ind[self.task_on_process_local[iot_index]['time'], iot_index] = 1
206 |                     self.task_on_process_local[iot_index]['remain'] = np.nan
207 | 
208 |                     self.drop_iot_count = self.drop_iot_count + 1
209 | 
210 |             # OTHER INFO self.t_iot_comp[iot_index]
211 |             # update self.t_iot_comp[iot_index] only when iot_bitrate != 0
212 |             if iot_bitarrive != 0:
213 |                 tmp_tilde_t_iot_comp = np.max([self.t_iot_comp[iot_index] + 1, self.time_count])
214 |                 self.t_iot_comp[iot_index] = np.min([tmp_tilde_t_iot_comp
215 |                                                     + math.ceil(iot_bitarrive * iot_action_local[iot_index]
216 |                                                      / (iot_comp_cap / iot_comp_density)) - 1,
217 |                                                     self.time_count + self.max_delay - 1])
218 | 
219 |         # FOG QUEUE UPDATE =========================
220 |         for iot_index in range(self.n_iot):
221 | 
222 |             iot_comp_density = self.comp_density[iot_index]
223 | 
224 |             for fog_index in range(self.n_fog):
225 | 
226 |                 # TASK ON PROCESS
227 |                 if math.isnan(self.task_on_process_fog[iot_index][fog_index]['remain']) \
228 |                         and (not self.Queue_fog_comp[iot_index][fog_index].empty()):
229 |                     while not self.Queue_fog_comp[iot_index][fog_index].empty():
230 |                         get_task = self.Queue_fog_comp[iot_index][fog_index].get()
231 |                         if self.time_count - get_task['time'] + 1 <= self.max_delay:
232 |                             self.task_on_process_fog[iot_index][fog_index]['size'] \
233 |                                 = get_task['size']
234 |                             self.task_on_process_fog[iot_index][fog_index]['time'] \
235 |                                 = get_task['time']
236 |                             self.task_on_process_fog[iot_index][fog_index]['remain'] \
237 |                                 = self.task_on_process_fog[iot_index][fog_index]['size']
238 |                             break
239 |                         else:
240 |                             self.process_delay[get_task['time'], iot_index] = self.max_delay
241 |                             self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1
242 | 
243 |                 # PROCESS
244 |                 self.fog_drop[iot_index, fog_index] = 0
245 |                 if self.task_on_process_fog[iot_index][fog_index]['remain'] > 0:
246 |                     self.task_on_process_fog[iot_index][fog_index]['remain'] = \
247 |                         self.task_on_process_fog[iot_index][fog_index]['remain'] \
248 |                         - self.comp_cap_fog[fog_index] / iot_comp_density / self.fog_iot_m[fog_index]
249 |                     # if no remain, compute processing delay
250 |                     if self.task_on_process_fog[iot_index][fog_index]['remain'] <= 0:
251 |                         self.process_delay[self.task_on_process_fog[iot_index][fog_index]['time'],iot_index] \
252 |                             = self.time_count - self.task_on_process_fog[iot_index][fog_index]['time'] + 1
253 |                         self.task_on_process_fog[iot_index][fog_index]['remain'] = np.nan
254 |                     elif self.time_count - self.task_on_process_fog[iot_index][fog_index]['time'] + 1 == self.max_delay:
255 |                         self.process_delay[self.task_on_process_fog[iot_index][fog_index]['time'], iot_index] = \
256 |                             self.max_delay
257 |                         self.process_delay_unfinish_ind[self.task_on_process_fog[iot_index][fog_index]['time'],
258 |                                                         iot_index] = 1
259 |                         self.fog_drop[iot_index, fog_index] = self.task_on_process_fog[iot_index][fog_index]['remain']
260 |                         self.task_on_process_fog[iot_index][fog_index]['remain'] = np.nan
261 | 
262 |                         self.drop_fog_count = self.drop_fog_count + 1
263 | 
264 |                 # OTHER INFO
265 |                 if self.fog_iot_m[fog_index] != 0:
266 |                     self.b_fog_comp[iot_index, fog_index] \
267 |                         = np.max([self.b_fog_comp[iot_index, fog_index]
268 |                                   - self.comp_cap_fog[fog_index] / iot_comp_density / self.fog_iot_m[fog_index]
269 |                                   - self.fog_drop[iot_index, fog_index], 0])
270 | 
271 |         # TRANSMISSION QUEUE UPDATE ===================
272 |         for iot_index in range(self.n_iot):
273 | 
274 |             iot_tran_cap = np.squeeze(self.tran_cap_iot[iot_index,:])
275 |             iot_bitarrive = np.squeeze(self.bitArrive[self.time_count, iot_index])
276 | 
277 |             # INPUT
278 |             if iot_action_local[iot_index] == 0:
279 |                 tmp_dict = {'size': self.bitArrive[self.time_count, iot_index], 'time': self.time_count,
280 |                             'fog': iot_action_fog[iot_index]}
281 |                 self.Queue_iot_tran[iot_index].put(tmp_dict)
282 | 
283 |             # TASK ON PROCESS
284 |             if math.isnan(self.task_on_transmit_local[iot_index]['remain']) \
285 |                     and (not self.Queue_iot_tran[iot_index].empty()):
286 |                 while not self.Queue_iot_tran[iot_index].empty():
287 |                     get_task = self.Queue_iot_tran[iot_index].get()
288 |                     if get_task['size'] != 0:
289 |                         if self.time_count - get_task['time'] + 1 <= self.max_delay:
290 |                             self.task_on_transmit_local[iot_index]['size'] = get_task['size']
291 |                             self.task_on_transmit_local[iot_index]['time'] = get_task['time']
292 |                             self.task_on_transmit_local[iot_index]['fog'] = int(get_task['fog'])
293 |                             self.task_on_transmit_local[iot_index]['remain'] = \
294 |                                 self.task_on_transmit_local[iot_index]['size']
295 |                             break
296 |                         else:
297 |                             self.process_delay[get_task['time'], iot_index] = self.max_delay
298 |                             self.process_delay_unfinish_ind[get_task['time'], iot_index] = 1
299 | 
300 |             # PROCESS
301 |             if self.task_on_transmit_local[iot_index]['remain'] > 0:
302 |                 self.task_on_transmit_local[iot_index]['remain'] = \
303 |                     self.task_on_transmit_local[iot_index]['remain'] \
304 |                     - iot_tran_cap[self.task_on_transmit_local[iot_index]['fog']]
305 | 
306 |                 # UPDATE FOG QUEUE
307 |                 if self.task_on_transmit_local[iot_index]['remain'] <= 0:
308 |                     tmp_dict = {'size': self.task_on_transmit_local[iot_index]['size'],
309 |                                 'time': self.task_on_transmit_local[iot_index]['time']}
310 |                     self.Queue_fog_comp[iot_index][self.task_on_transmit_local[iot_index]['fog']].put(tmp_dict)
311 | 
312 |                     # OTHER INFO
313 |                     fog_index = self.task_on_transmit_local[iot_index]['fog']
314 |                     self.b_fog_comp[iot_index, fog_index] \
315 |                         = self.b_fog_comp[iot_index, fog_index] + self.task_on_transmit_local[iot_index]['size']
316 |                     self.process_delay_trans[self.task_on_transmit_local[iot_index]['time'], iot_index] \
317 |                         = self.time_count - self.task_on_transmit_local[iot_index]['time'] + 1
318 |                     self.task_on_transmit_local[iot_index]['remain'] = np.nan
319 | 
320 |                 elif self.time_count - self.task_on_transmit_local[iot_index]['time'] + 1 == self.max_delay:
321 |                      self.process_delay[self.task_on_transmit_local[iot_index]['time'], iot_index] = self.max_delay
322 |                      self.process_delay_trans[self.task_on_transmit_local[iot_index]['time'], iot_index] \
323 |                         = self.max_delay
324 |                      self.process_delay_unfinish_ind[self.task_on_transmit_local[iot_index]['time'], iot_index] = 1
325 |                      self.task_on_transmit_local[iot_index]['remain'] = np.nan
326 | 
327 |                 self.drop_trans_count = self.drop_trans_count + 1
328 | 
329 |             # OTHER INFO
330 |             if iot_bitarrive != 0:
331 |                 tmp_tilde_t_iot_tran = np.max([self.t_iot_tran[iot_index] + 1, self.time_count])
332 |                 self.t_iot_comp[iot_index] = np.min([tmp_tilde_t_iot_tran
333 |                                                     + math.ceil(iot_bitarrive * (1 - iot_action_local[iot_index])
334 |                                                      / iot_tran_cap[iot_action_fog[iot_index]]) - 1,
335 |                                                     self.time_count + self.max_delay - 1])
336 | 
337 |         # COMPUTE CONGESTION (FOR NEXT TIME SLOT)
338 |         self.fog_iot_m_observe = self.fog_iot_m
339 |         self.fog_iot_m = np.zeros(self.n_fog)
340 |         for fog_index in range(self.n_fog):
341 |             for iot_index in range(self.n_iot):
342 |                 if (not self.Queue_fog_comp[iot_index][fog_index].empty()) \
343 |                         or self.task_on_process_fog[iot_index][fog_index]['remain'] > 0:
344 |                     self.fog_iot_m[fog_index] += 1
345 | 
346 |         # TIME UPDATE
347 |         self.time_count = self.time_count + 1
348 |         done = False
349 |         if self.time_count >= self.n_time:
350 |             done = True
351 |             # set all the tasks' processing delay and unfinished indicator
352 |             for time_index in range(self.n_time):
353 |                 for iot_index in range(self.n_iot):
354 |                     if self.process_delay[time_index, iot_index] == 0 and self.bitArrive[time_index, iot_index] != 0:
355 |                         self.process_delay[time_index, iot_index] = (self.time_count - 1) - time_index + 1
356 |                         self.process_delay_unfinish_ind[time_index, iot_index] = 1
357 | 
358 |         # OBSERVATION
359 |         observation_all_ = np.zeros([self.n_iot, self.n_features])
360 |         lstm_state_all_ = np.zeros([self.n_iot, self.n_lstm_state])
361 |         if not done:
362 |             for iot_index in range(self.n_iot):
363 |                 # observation is zero if there is no task arrival
364 |                 if self.bitArrive[self.time_count, iot_index] != 0:
365 |                     # state [A, B^{comp}, B^{tran}, [B^{fog}]]
366 |                     observation_all_[iot_index, :] = np.hstack([
367 |                         self.bitArrive[self.time_count, iot_index],
368 |                         self.t_iot_comp[iot_index] - self.time_count + 1,
369 |                         self.t_iot_tran[iot_index] - self.time_count + 1,
370 |                         self.b_fog_comp[iot_index, :]])
371 | 
372 |                 lstm_state_all_[iot_index, :] = np.hstack(self.fog_iot_m_observe)
373 | 
374 |         return observation_all_, lstm_state_all_, done


--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from glob import glob
  3 | import argparse
  4 | import json
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | def plot_avg_cost_graph(costs, colors, labels, title='Title', show=True, save=True,
  9 |                         path=None):
 10 |     fig, axs = plt.subplots(1, figsize=(10, 6))
 11 |     x = np.arange(len(costs[0])).tolist()
 12 | 
 13 |     for cost, color, label in zip(costs, colors, labels):
 14 |         axs.plot(x, cost, color=color, label=label)
 15 |     axs.set(title=title)
 16 |     axs.set(ylabel='Avg. Cost')
 17 |     axs.set(xlabel='Episode')
 18 |     axs.legend(loc='upper right')
 19 | 
 20 |     if save:
 21 |         plt.savefig(path + "avg_cost_plot.png")
 22 | 
 23 |     if show:
 24 |         plt.show(block=False)
 25 |         input()
 26 | 
 27 | 
 28 | def plot_dropped_ratio_graph(dropped_ratios, x_label, title='Title', show=True, save=True,
 29 |                              path=None):
 30 |     fig, axs = plt.subplots(1, figsize=(10, 6))
 31 | 
 32 |     dropped_ratios = np.array(sorted(dropped_ratios))
 33 | 
 34 |     axs.plot(dropped_ratios[:, 0], dropped_ratios[:, 1], color='green', label='DRL')
 35 |     axs.set(title=title)
 36 |     axs.set(ylabel='Dropper Tast Ratio')
 37 |     axs.set(xlabel=x_label)
 38 |     axs.legend(loc='upper right')
 39 | 
 40 |     if save:
 41 |         plt.savefig(path + "dropped_ratio_plot.png")
 42 | 
 43 |     if show:
 44 |         plt.show(block=False)
 45 |         input()
 46 | 
 47 | 
 48 | def plot_avg_delay_graph(avg_delay, x_label, title='Title', show=True, save=True,
 49 |                          path=None):
 50 |     fig, axs = plt.subplots(1, figsize=(10, 6))
 51 | 
 52 |     avg_delay = np.array(sorted(avg_delay))
 53 | 
 54 |     axs.plot(avg_delay[:, 0], avg_delay[:, 1], color='green', label='DRL')
 55 |     axs.set(title=title)
 56 |     axs.set(ylabel='Avg. Delay (Sec)')
 57 |     axs.set(xlabel=x_label)
 58 |     axs.legend(loc='upper right')
 59 | 
 60 |     if save:
 61 |         plt.savefig(path + "avg_delay_plot.png")
 62 | 
 63 |     if show:
 64 |         plt.show(block=False)
 65 |         input()
 66 | 
 67 | 
 68 | def main(args):
 69 |     dirs = glob(f"{args.path}/*/")
 70 | 
 71 |     if args.type == 'cost':
 72 |         costs = list()
 73 |         colors = list()
 74 |         labels = list()
 75 |         for dir in dirs:
 76 |             avg_costs_np = np.load(dir + "/plots/avg_cost.npy")
 77 |             avg_costs_np = np.convolve(avg_costs_np, np.ones((args.window,))/args.window,
 78 |                                        mode='valid')
 79 |             costs.append(avg_costs_np)
 80 |             with open(dir + "/plots/plot_props.dat") as fp:
 81 |                 data = json.load(fp)
 82 |             colors.append(data['color'])
 83 |             labels.append(data['label'])
 84 |         plot_avg_cost_graph(costs, colors, labels, args.title, path=args.path)
 85 |     elif args.type == 'dropped':
 86 |         dropped_ratios = list()
 87 |         for dir in dirs:
 88 |             with open(dir + "/results/results.dat") as fp:
 89 |                 data = json.load(fp)
 90 |             dropped_ratios.append(data['avg_dropped'])tigonglege1
 91 |         plot_dropped_ratio_graph(dropped_ratios, args.x_label, args.title, path=args.path)
 92 |     elif args.type == 'delay':
 93 |         avg_delays = list()
 94 |         for dir in dirs:
 95 |             with open(dir + "/results/results.dat") as fp:
 96 |                 data = json.load(fp)
 97 |             avg_delays.append(data['avg_delay'])
 98 |         plot_avg_delay_graph(avg_delays, args.x_label, args.title, path=args.path)
 99 | 
100 | 
101 | if __name__ == "__main__":
102 | 
103 |     parser = argparse.ArgumentParser(description='Plot Results for Mobile Edge Computing')
104 |     parser.add_argument('--type', type=str, default='cost',
105 |                         help='plot type: {cost, dropped, delay} (default: cost)')
106 |     parser.add_argument('--path', type=str, default=None,
107 |                         help='path to results directory (default: None)')
108 |     parser.add_argument('--window', type=int, default=50,
109 |                         help='moving average window size (default: 50)')
110 |     parser.add_argument('--x_label', type=str, default=None,
111 |                         help='x_label for dropper task and avg. delay plots')
112 |     parser.add_argument('--title', type=str, default='Title',
113 |                         help='plot title (default: Title)')
114 |     args = parser.parse_args()
115 | 
116 |     main(args)


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import random
  4 | import time
  5 | import os
  6 | import argparse
  7 | import json
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from datetime import datetime
 11 | from shutil import rmtree
 12 | 
 13 | from fog_env import Offload
 14 | from RL_brain import DeepQNetwork
 15 | from utils import plot_graphs
 16 | 
 17 | np.set_printoptions(threshold=np.inf)
 18 | 
 19 | 
 20 | def random_pick(some_list, probabilities):
 21 |     x = random.uniform(0, 1)
 22 |     cumulative_probability = 0.0
 23 |     for item, item_probability in zip(some_list, probabilities):
 24 |         cumulative_probability += item_probability
 25 |         if x < cumulative_probability:
 26 |             break
 27 |     return item
 28 | 
 29 | 
 30 | def reward_fun(delay, max_delay, unfinish_indi):
 31 |     # still use reward, but use the negative value
 32 |     if unfinish_indi:
 33 |         reward = - max_delay * 2
 34 |     else:
 35 |         reward = - delay
 36 | 
 37 |     return reward
 38 | 
 39 | 
 40 | def train(env, iot_RL_list, num_episodes, learning_freq=10, show=False, random=False,
 41 |           training_dir=None):
 42 |     start_time = time.time()
 43 | 
 44 |     RL_step = 0
 45 | 
 46 |     episode_rewards = list()
 47 |     episode_dropped = list()
 48 |     episode_delay = list()
 49 | 
 50 |     fig, axs = plt.subplots(3, figsize=(10, 12), sharex=True)
 51 | 
 52 |     for episode in range(num_episodes):
 53 |         # BITRATE ARRIVAL
 54 |         bitarrive = np.random.uniform(env.min_bit_arrive, env.max_bit_arrive,
 55 |                                       size=[env.n_time, env.n_iot])
 56 |         task_prob = env.task_arrive_prob
 57 |         bitarrive = bitarrive * (
 58 |             np.random.uniform(0, 1, size=[env.n_time, env.n_iot]) < task_prob)
 59 |         bitarrive[-env.max_delay:, :] = np.zeros([env.max_delay, env.n_iot])
 60 | 
 61 |         # rewards_dict = {d: [] for d in range(env.n_iot)}
 62 |         rewards_list = list()
 63 |         dropped_list = list()
 64 |         delay_list = list()
 65 | 
 66 |         # ============================================================================= #
 67 |         # ========================================= DRL =============================== #
 68 |         # ============================================================================= #
 69 | 
 70 |         # OBSERVATION MATRIX SETTING
 71 |         history = list()
 72 |         for time_index in range(env.n_time):
 73 |             history.append(list())
 74 |             for iot_index in range(env.n_iot):
 75 |                 tmp_dict = {'observation': np.zeros(env.n_features),
 76 |                             'lstm': np.zeros(env.n_lstm_state),
 77 |                             'action': np.nan,
 78 |                             'observation_': np.zeros(env.n_features),
 79 |                             'lstm_': np.zeros(env.n_lstm_state)}
 80 |                 history[time_index].append(tmp_dict)
 81 |         reward_indicator = np.zeros([env.n_time, env.n_iot])
 82 | 
 83 |         # INITIALIZE OBSERVATION
 84 |         observation_all, lstm_state_all = env.reset(bitarrive)
 85 | 
 86 |         # TRAIN DRL
 87 |         while True:
 88 | 
 89 |             # PERFORM ACTION
 90 |             action_all = np.zeros([env.n_iot])
 91 |             for iot_index in range(env.n_iot):
 92 | 
 93 |                 observation = np.squeeze(observation_all[iot_index, :])
 94 | 
 95 |                 if np.sum(observation) == 0:
 96 |                     # if there is no task, action = 0 (also need to be stored)
 97 |                     action_all[iot_index] = 0
 98 |                 else:
 99 |                     if random:  # Follow a random action
100 |                         action_all[iot_index] = np.random.randint(env.n_actions)
101 |                     else:  # Follow RL agent action
102 |                         action_all[iot_index] = \
103 |                             iot_RL_list[iot_index].choose_action(observation)
104 | 
105 |                 if observation[0] != 0:
106 |                     iot_RL_list[iot_index].do_store_action(episode, env.time_count,
107 |                                                            action_all[iot_index])
108 | 
109 |             # OBSERVE THE NEXT STATE AND PROCESS DELAY (REWARD)
110 |             observation_all_, lstm_state_all_, done = env.step(action_all)
111 | 
112 |             # should store this information in EACH time slot
113 |             for iot_index in range(env.n_iot):
114 |                 iot_RL_list[iot_index].update_lstm(lstm_state_all_[iot_index, :])
115 | 
116 |             process_delay = env.process_delay
117 |             unfinish_indi = env.process_delay_unfinish_ind
118 | 
119 |             # STORE MEMORY; STORE TRANSITION IF THE TASK PROCESS DELAY IS JUST UPDATED
120 |             for iot_index in range(env.n_iot):
121 | 
122 |                 history[env.time_count - 1][iot_index]['observation'] = \
123 |                     observation_all[iot_index, :]
124 |                 history[env.time_count - 1][iot_index]['lstm'] = \
125 |                     np.squeeze(lstm_state_all[iot_index, :])
126 |                 history[env.time_count - 1][iot_index]['action'] = action_all[iot_index]
127 |                 history[env.time_count - 1][iot_index]['observation_'] = \
128 |                     observation_all_[iot_index]
129 |                 history[env.time_count - 1][iot_index]['lstm_'] = \
130 |                     np.squeeze(lstm_state_all_[iot_index, :])
131 | 
132 |                 update_index = np.where((1 - reward_indicator[:, iot_index]) *
133 |                                         process_delay[:, iot_index] > 0)[0]
134 | 
135 |                 if len(update_index) != 0:
136 |                     for update_ii in range(len(update_index)):
137 |                         time_index = update_index[update_ii]
138 | 
139 |                         reward = reward_fun(
140 |                             process_delay[time_index, iot_index], env.max_delay,
141 |                             unfinish_indi[time_index, iot_index])
142 | 
143 |                         dropped_list.append(unfinish_indi[time_index, iot_index])
144 |                         if not unfinish_indi[time_index, iot_index]:
145 |                             delay_list.append(process_delay[time_index, iot_index])
146 | 
147 |                         iot_RL_list[iot_index].store_transition(
148 |                             history[time_index][iot_index]['observation'],
149 |                             history[time_index][iot_index]['lstm'],
150 |                             history[time_index][iot_index]['action'],
151 |                             reward,
152 |                             history[time_index][iot_index]['observation_'],
153 |                             history[time_index][iot_index]['lstm_'])
154 | 
155 |                         iot_RL_list[iot_index].do_store_reward(
156 |                             episode, time_index, reward)
157 | 
158 |                         iot_RL_list[iot_index].do_store_delay(
159 |                             episode, time_index, process_delay[time_index, iot_index])
160 | 
161 |                         reward_indicator[time_index, iot_index] = 1
162 | 
163 |                         # rewards_dict[iot_index].append(-reward)
164 |                         rewards_list.append(-reward)
165 | 
166 |             # ADD STEP (one step does not mean one store)
167 |             RL_step += 1
168 | 
169 |             # UPDATE OBSERVATION
170 |             observation_all = observation_all_
171 |             lstm_state_all = lstm_state_all_
172 | 
173 |             # CONTROL LEARNING START TIME AND FREQUENCY
174 |             if (RL_step > 200) and (RL_step % learning_freq == 0):
175 |                 for iot in range(env.n_iot):
176 |                     iot_RL_list[iot].learn()
177 | 
178 |             # GAME ENDS
179 |             if done:
180 |                 break
181 | 
182 |         avg_reward = np.mean(rewards_list)/env.n_iot
183 |         episode_rewards.append(avg_reward)
184 | 
185 |         dropped_ratio = np.mean(dropped_list)/env.n_iot
186 |         episode_dropped.append(dropped_ratio)
187 | 
188 |         avg_delay = np.mean(delay_list)/env.n_iot
189 |         episode_delay.append(avg_delay)
190 | 
191 |         print(f"Episode: {episode} - Reward: {avg_reward} - Dropped: {dropped_ratio} - "
192 |               + f"Delay: {avg_delay}")
193 | 
194 |         if episode % 10 == 0:
195 |             plot_graphs(axs, episode_rewards, episode_dropped, episode_delay, show=show,
196 |                         save=True, path=training_dir)
197 | 
198 |         #  ============================================================================ #
199 |         #  ======================================== DRL END============================ #
200 |         #  ============================================================================ #
201 | 
202 |     plot_graphs(axs, episode_rewards, episode_dropped, episode_delay, show=show,
203 |                 save=True, path=training_dir)
204 | 
205 |     end_time = time.time()
206 |     print("\nTraining Time: %.2f(s)" % (end_time - start_time))
207 |     input("Completed training.\nPress Enter to Finish")
208 | 
209 | 
210 | def evaluate(env, iot_RL_list, num_episodes, random=False, training_dir=None,
211 |              plot_x=None):
212 |     episode_rewards = list()
213 |     episode_dropped = list()
214 |     episode_delay = list()
215 | 
216 |     for episode in range(num_episodes):
217 |         # BITRATE ARRIVAL
218 |         bitarrive = np.random.uniform(env.min_bit_arrive, env.max_bit_arrive,
219 |                                       size=[env.n_time, env.n_iot])
220 |         task_prob = env.task_arrive_prob
221 |         bitarrive = bitarrive * (
222 |             np.random.uniform(0, 1, size=[env.n_time, env.n_iot]) < task_prob)
223 |         bitarrive[-env.max_delay:, :] = np.zeros([env.max_delay, env.n_iot])
224 | 
225 |         # rewards_dict = {d: [] for d in range(env.n_iot)}
226 |         rewards_list = list()
227 |         dropped_list = list()
228 |         delay_list = list()
229 | 
230 |         reward_indicator = np.zeros([env.n_time, env.n_iot])
231 | 
232 |         # INITIALIZE OBSERVATION
233 |         observation_all, lstm_state_all = env.reset(bitarrive)
234 | 
235 |         # Episode until done
236 |         while True:
237 | 
238 |             # PERFORM ACTION
239 |             action_all = np.zeros([env.n_iot])
240 |             for iot_index in range(env.n_iot):
241 | 
242 |                 observation = np.squeeze(observation_all[iot_index, :])
243 | 
244 |                 if np.sum(observation) == 0:
245 |                     # if there is no task, action = 0 (also need to be stored)
246 |                     action_all[iot_index] = 0
247 |                 else:
248 |                     if random:  # Follow a random action
249 |                         action_all[iot_index] = np.random.randint(env.n_actions)
250 |                     else:  # Follow RL agent action
251 |                         action_all[iot_index] = \
252 |                             iot_RL_list[iot_index].choose_action(observation,
253 |                                                                  inference=True)
254 | 
255 |                 if observation[0] != 0:
256 |                     iot_RL_list[iot_index].do_store_action(episode, env.time_count,
257 |                                                            action_all[iot_index])
258 | 
259 |             # OBSERVE THE NEXT STATE AND PROCESS DELAY (REWARD)
260 |             observation_all_, lstm_state_all_, done = env.step(action_all)
261 | 
262 |             process_delay = env.process_delay
263 |             unfinish_indi = env.process_delay_unfinish_ind
264 | 
265 |             # STORE MEMORY; STORE TRANSITION IF THE TASK PROCESS DELAY IS JUST UPDATED
266 |             for iot_index in range(env.n_iot):
267 |                 update_index = np.where((1 - reward_indicator[:, iot_index]) *
268 |                                         process_delay[:, iot_index] > 0)[0]
269 | 
270 |                 if len(update_index) != 0:
271 |                     for update_ii in range(len(update_index)):
272 |                         time_index = update_index[update_ii]
273 | 
274 |                         reward = reward_fun(
275 |                             process_delay[time_index, iot_index], env.max_delay,
276 |                             unfinish_indi[time_index, iot_index])
277 | 
278 |                         dropped_list.append(unfinish_indi[time_index, iot_index])
279 |                         if not unfinish_indi[time_index, iot_index]:
280 |                             delay_list.append(process_delay[time_index, iot_index])
281 | 
282 |                         reward_indicator[time_index, iot_index] = 1
283 | 
284 |                         rewards_list.append(-reward)
285 | 
286 |             # UPDATE OBSERVATION
287 |             observation_all = observation_all_
288 | 
289 |             # GAME ENDS
290 |             if done:
291 |                 break
292 | 
293 |         avg_reward = np.mean(rewards_list)/env.n_iot
294 |         episode_rewards.append(avg_reward)
295 | 
296 |         dropped_ratio = np.mean(dropped_list)/env.n_iot
297 |         episode_dropped.append(dropped_ratio)
298 | 
299 |         avg_delay = np.mean(delay_list)/env.n_iot
300 |         episode_delay.append(avg_delay)
301 | 
302 |     avg_episode_rewards = np.mean(episode_rewards)
303 |     avg_episode_dropped = np.mean(episode_dropped)
304 |     avg_episode_delay = np.mean(episode_delay)
305 | 
306 |     print(f"\nAvg. Eval Reward: {avg_episode_rewards} - " +
307 |           f"Avg. Eval Dropped: {avg_episode_dropped} - " +
308 |           f"Avg. Eval Delay: {avg_episode_delay}")
309 | 
310 |     eval_results = dict()
311 |     eval_results['avg_rewards'] = (plot_x, avg_episode_rewards)
312 |     eval_results['avg_dropped'] = (plot_x, avg_episode_dropped)
313 |     eval_results['avg_delay'] = (plot_x, avg_episode_delay)
314 | 
315 |     with open(training_dir + 'results/results.dat', 'w') as jf:
316 |         json.dump(eval_results, jf, indent=4)
317 | 
318 |     input("Completed Evaluation")
319 | 
320 | 
321 | def main(args):
322 |     # Set random generator seed
323 |     tf.set_random_seed(args.seed)
324 |     np.random.seed(args.seed)
325 |     random.seed(args.seed)
326 | 
327 |     # Create a timestamp directory to save model, parameter and log files
328 |     training_dir = \
329 |         ('training/' + ('' if args.path is None else args.path + '/') +
330 |          str(datetime.now().date()) + '_' + str(datetime.now().hour).zfill(2) + '-' +
331 |          str(datetime.now().minute).zfill(2) + '/')
332 | 
333 |     # Delete if a directory with the same name already exists
334 |     if os.path.exists(training_dir):
335 |         rmtree(training_dir)
336 | 
337 |     # Create empty directories for saving model, parameter and log files
338 |     os.makedirs(training_dir)
339 |     os.makedirs(training_dir + 'plots')
340 |     os.makedirs(training_dir + 'results')
341 |     os.makedirs(training_dir + 'params')
342 | 
343 |     # Dump params to file
344 |     with open(training_dir + 'params/params.dat', 'w') as jf:
345 |         json.dump(vars(args), jf, indent=4)
346 | 
347 |     plot_dict = {'color': args.plot_color, 'label': args.plot_label}
348 |     with open(training_dir + 'plots/plot_props.dat', 'w') as jf:
349 |         json.dump(plot_dict, jf, indent=4)
350 | 
351 |     # GENERATE ENVIRONMENT
352 |     env = Offload(args.num_iot, args.num_fog, NUM_TIME, MAX_DELAY, args.task_arrival_prob)
353 | 
354 |     # GENERATE MULTIPLE CLASSES FOR RL
355 |     iot_RL_list = list()
356 |     for iot in range(args.num_iot):
357 |         iot_RL_list.append(DeepQNetwork(env.n_actions, env.n_features, env.n_lstm_state,
358 |                                         env.n_time,
359 |                                         learning_rate=args.lr,
360 |                                         reward_decay=0.9,
361 |                                         e_greedy=0.99,
362 |                                         replace_target_iter=200,  # update target net
363 |                                         memory_size=500,  # maximum of memory
364 |                                         batch_size=args.batch_size,
365 |                                         optimizer=args.optimizer,
366 |                                         seed=args.seed,
367 |                                         ))
368 | 
369 |     # TRAIN THE SYSTEM
370 |     train(env, iot_RL_list, args.num_episodes, args.learning_freq, args.plot, args.random,
371 |           training_dir)
372 |     print('Training Finished')
373 | 
374 |     if args.training_var is not None:
375 |         if args.training_var == 'lr':
376 |             plot_x = args.lr
377 |         elif args.training_var == 'batch_size':
378 |             plot_x = args.batch_size
379 |         elif args.training_var == 'optimizer':
380 |             plot_x = args.optimizer
381 |         elif args.training_var == 'learning_freq':
382 |             plot_x = args.learning_freq
383 |         elif args.training_var == 'task_arrival_prob':
384 |             plot_x = args.task_arrival_prob
385 |         elif args.training_var == 'num_iot':
386 |             plot_x = args.num_iot
387 |     else:
388 |         plot_x = None
389 | 
390 |     evaluate(env, iot_RL_list, 20, args.random, training_dir, plot_x)
391 | 
392 | 
393 | if __name__ == "__main__":
394 | 
395 |     NUM_TIME_BASE = 100
396 |     MAX_DELAY = 10
397 |     NUM_TIME = NUM_TIME_BASE + MAX_DELAY
398 | 
399 |     parser = argparse.ArgumentParser(description='DQL for Mobile Edge Computing')
400 |     parser.add_argument('--num_iot', type=int, default=50,
401 |                         help='number of IOT devices (default: 50)')
402 |     parser.add_argument('--num_fog', type=int, default=5,
403 |                         help='number of FOG stations (default: 5)')
404 |     parser.add_argument('--task_arrival_prob', type=float, default=0.3,
405 |                         help='Task Arrival Probability (default: 0.3)')
406 |     parser.add_argument('--num_episodes', type=int, default=1000,
407 |                         help='number of training episodes (default: 1000)')
408 |     parser.add_argument('--batch_size', type=int, default=32,
409 |                         help='input batch size for training (default: 32)')
410 |     parser.add_argument('--lr', type=float, default=0.001,
411 |                         help='learning rate for optimizer (default: 0.001)')
412 |     parser.add_argument('--optimizer', type=str, default='rms_prop',
413 |                         help='optimizer for updating the NN (default: rms_prop)')
414 |     parser.add_argument('--learning_freq', type=int, default=10,
415 |                         help='frequency of updating main/eval network (default: 10)')
416 |     parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)')
417 |     parser.add_argument('--plot',  default=False, action='store_true',
418 |                         help='plot learning curve (default: False)')
419 |     parser.add_argument('--random',  default=False, action='store_true',
420 |                         help='follow a random policy (default: False)')
421 |     parser.add_argument('--path', type=str, default=None,
422 |                         help='path postfix for saving training results (default: None)')
423 |     parser.add_argument('--training_var', type=str, default=None,
424 |                         help='training variant: {lr, task_prob, num_iot, ...}')
425 |     parser.add_argument('--plot_color', type=str, default='red',
426 |                         help='plot color (default: red)')
427 |     parser.add_argument('--plot_label', type=str, default='X',
428 |                         help='plot label (default: X)')
429 |     args = parser.parse_args()
430 | 
431 |     main(args)


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | 
 5 | def plot_graphs(axs, train_cost, train_dropped, train_delay, show=False, save=False,
 6 |                 path=None):
 7 |     x = np.arange(len(train_cost)).tolist()
 8 |     axs[0].clear()
 9 |     axs[0].plot(x, train_cost, color='red', label='Training')
10 |     axs[0].set(title='Avg. Cost')
11 |     axs[0].set(ylabel='Avg. Cost')
12 |     axs[0].set(xlabel='Episode')
13 |     axs[0].legend(loc='upper right')
14 | 
15 |     axs[1].clear()
16 |     axs[1].plot(x, train_dropped, color='blue', label='Training')
17 |     axs[1].set(title='Ratio of Dropped Tasks')
18 |     axs[1].set(ylabel='Dropped Ratio')
19 |     axs[1].set(xlabel='Episode')
20 |     axs[1].legend(loc='upper right')
21 | 
22 |     axs[2].clear()
23 |     axs[2].plot(x, train_delay, color='green', label='Training')
24 |     axs[2].set(title='Avg. Task Delay')
25 |     axs[2].set(ylabel='Avg. Delay (Sec)')
26 |     axs[2].set(xlabel='Episode')
27 |     axs[2].legend(loc='upper right')
28 | 
29 |     if save:
30 |         plt.savefig(path + "plots/learning_curves.png")
31 | 
32 |         with open(path + 'plots/avg_cost.npy', 'wb') as f:
33 |             np.save(f, np.array(train_cost))
34 | 
35 |         with open(path + 'plots/dropped_ratio.npy', 'wb') as f:
36 |             np.save(f, np.array(train_dropped))
37 | 
38 |         with open(path + 'plots/avg_delay.npy', 'wb') as f:
39 |             np.save(f, np.array(train_delay))
40 | 
41 |     if show:
42 |         plt.show(block=False)
43 |         plt.pause(0.01)


--------------------------------------------------------------------------------