├── DQN.py
├── Double.py
├── Dueling.py
├── README.md
├── Simulation.py
├── Training.py
└── systemModel.py


/DQN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | np.random.seed(1)
  5 | tf.set_random_seed(1)
  6 | 
  7 | 
  8 | # Deep Q Network off-policy
  9 | class DeepQNetwork:
 10 |     def __init__(
 11 |             self,
 12 |             n_actions,
 13 |             n_features,
 14 |             learning_rate=0.01,
 15 |             reward_decay=0.9,
 16 |             e_greedy=0.9,
 17 |             replace_target_iter=100,
 18 |             memory_size=500,
 19 |             batch_size=32,
 20 |             e_greedy_increment=None,
 21 |             output_graph=False,
 22 |     ):
 23 |         self.n_actions = n_actions
 24 |         self.n_features = n_features
 25 |         self.lr = learning_rate
 26 |         self.gamma = reward_decay
 27 |         self.epsilon_max = e_greedy
 28 |         self.replace_target_iter = replace_target_iter
 29 |         self.memory_size = memory_size
 30 |         self.batch_size = batch_size
 31 |         self.epsilon_increment = e_greedy_increment
 32 |         self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
 33 | 
 34 |         self.learn_step_counter = 0
 35 | 
 36 |      
 37 |         self.memory = np.zeros((self.memory_size, n_features * 2 + 2))
 38 | 
 39 |   
 40 |         self._build_net()
 41 |         t_params = tf.get_collection('target_net_params')
 42 |         e_params = tf.get_collection('eval_net_params')
 43 |         self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 44 | 
 45 |         self.sess = tf.Session()
 46 | 
 47 |         if output_graph:
 48 |             tf.summary.FileWriter("logs/", self.sess.graph)
 49 | 
 50 |         self.sess.run(tf.global_variables_initializer())
 51 |         self.cost_his = []
 52 | 
 53 |     def _build_net(self):
 54 |         # ------------------ build evaluate_net ------------------
 55 |         self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s')  
 56 |         self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target')  
 57 |         with tf.variable_scope('eval_net'):
 58 |          
 59 |             c_names, n_l1,n_l2, w_initializer, b_initializer = \
 60 |                 ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, 20,\
 61 |                 tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  
 62 | 
 63 |         
 64 |             with tf.variable_scope('l1'):
 65 |                 w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
 66 |                 b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
 67 |                 l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)
 68 |                 
 69 |       
 70 |             with tf.variable_scope('lm1'):
 71 |                 wm1 = tf.get_variable('wm1', [n_l1, n_l2], initializer=w_initializer, collections=c_names)
 72 |                 bm1 = tf.get_variable('bm1', [1, n_l2], initializer=b_initializer, collections=c_names)
 73 |                 lm1 = tf.nn.relu(tf.matmul(l1, wm1) + bm1)
 74 | 
 75 |         
 76 |             with tf.variable_scope('l2'):
 77 |                 w2 = tf.get_variable('w2', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names)
 78 |                 b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
 79 |                 self.q_eval = tf.matmul(lm1, w2) + b2
 80 | 
 81 |         with tf.variable_scope('loss'):
 82 |             self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
 83 |         with tf.variable_scope('train'):
 84 |             self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
 85 | 
 86 |         # ------------------ build target_net ------------------
 87 |         self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_')  
 88 |         with tf.variable_scope('target_net'):
 89 |       
 90 |             c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
 91 | 
 92 |             with tf.variable_scope('l1'):
 93 |                 w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
 94 |                 b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
 95 |                 l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1)
 96 | 
 97 |             with tf.variable_scope('lm1'):
 98 |                 wm1 = tf.get_variable('wm1', [n_l1, n_l2], initializer=w_initializer, collections=c_names)
 99 |                 bm1 = tf.get_variable('bm1', [1, n_l2], initializer=b_initializer, collections=c_names)
100 |                 lm1 = tf.nn.relu(tf.matmul(l1, wm1) + bm1)
101 | 
102 |             with tf.variable_scope('l2'):
103 |                 w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
104 |                 b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
105 |                 self.q_next = tf.matmul(l1, w2) + b2
106 | 
107 |     def store_transition(self, s, a, r, s_):
108 |         if not hasattr(self, 'memory_counter'):
109 |             self.memory_counter = 0
110 | 
111 |         transition = np.hstack((s, [a, r], s_))
112 | 
113 |         # replace the old memory with new memory
114 |         index = self.memory_counter % self.memory_size
115 |         self.memory[index, :] = transition
116 | 
117 |         self.memory_counter += 1
118 | 
119 |     def choose_action(self, observation):
120 | 
121 |         observation = observation[np.newaxis, :]
122 | 
123 |         if np.random.uniform() < self.epsilon:
124 | 
125 |             actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
126 |             action = np.argmax(actions_value)
127 |         else:
128 |             action = np.random.randint(0, self.n_actions)
129 |         return action
130 | 
131 |     def learn(self):
132 | 
133 |         if self.learn_step_counter % self.replace_target_iter == 0:
134 |             self.sess.run(self.replace_target_op)
135 |             print('\ntarget_params_replaced\n')
136 | 
137 | 
138 |         if self.memory_counter > self.memory_size:
139 |             sample_index = np.random.choice(self.memory_size, size=self.batch_size)
140 |         else:
141 |             sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
142 |         batch_memory = self.memory[sample_index, :]
143 | 
144 |         q_next, q_eval = self.sess.run(
145 |             [self.q_next, self.q_eval],
146 |             feed_dict={
147 |                 self.s_: batch_memory[:, -self.n_features:],  # fixed params
148 |                 self.s: batch_memory[:, :self.n_features],  # newest params
149 |             })
150 | 
151 |         # change q_target w.r.t q_eval's action
152 |         q_target = q_eval.copy()
153 | 
154 |         batch_index = np.arange(self.batch_size, dtype=np.int32)
155 |         eval_act_index = batch_memory[:, self.n_features].astype(int)
156 |         reward = batch_memory[:, self.n_features + 1]
157 | 
158 |         q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
159 | 
160 | 
161 |         # train eval network
162 |         _, self.cost = self.sess.run([self._train_op, self.loss],
163 |                                      feed_dict={self.s: batch_memory[:, :self.n_features],
164 |                                                 self.q_target: q_target})
165 |         self.cost_his.append(self.cost)
166 | 
167 |         # increasing epsilon
168 |         self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
169 |         self.learn_step_counter += 1
170 | 
171 |     def plot_cost(self,name= 'RL'):
172 |         import matplotlib.pyplot as plt
173 |         plt.plot(np.arange(len(self.cost_his)), self.cost_his)
174 |         plt.ylabel('Cost')
175 |         plt.xlabel('training steps')
176 |         plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400)
177 |         plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) 
178 |         #plt.show()
179 | 
180 | 


--------------------------------------------------------------------------------
/Double.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | np.random.seed(1)
  5 | tf.set_random_seed(1)
  6 | 
  7 | 
  8 | class DoubleDQN:
  9 |     def __init__(
 10 |             self,
 11 |             n_actions,
 12 |             n_features,
 13 |             learning_rate=0.005,
 14 |             reward_decay=0.9,
 15 |             e_greedy=0.9,
 16 |             replace_target_iter=200,
 17 |             memory_size=3000,
 18 |             batch_size=32,
 19 |             e_greedy_increment=None,
 20 |             output_graph=False,
 21 |             double_q=True,
 22 |             sess=None,
 23 |     ):
 24 |         self.n_actions = n_actions
 25 |         self.n_features = n_features
 26 |         self.lr = learning_rate
 27 |         self.gamma = reward_decay
 28 |         self.epsilon_max = e_greedy
 29 |         self.replace_target_iter = replace_target_iter
 30 |         self.memory_size = memory_size
 31 |         self.batch_size = batch_size
 32 |         self.epsilon_increment = e_greedy_increment
 33 |         self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
 34 | 
 35 |         self.double_q = double_q    # decide to use double q or not
 36 | 
 37 |         self.learn_step_counter = 0
 38 |         self.memory = np.zeros((self.memory_size, n_features*2+2))
 39 |         self._build_net()
 40 |         t_params = tf.get_collection('target_net_params')
 41 |         e_params = tf.get_collection('eval_net_params')
 42 |         self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 43 | 
 44 |         if sess is None:
 45 |             self.sess = tf.Session()
 46 |             self.sess.run(tf.global_variables_initializer())
 47 |         else:
 48 |             self.sess = sess
 49 |         if output_graph:
 50 |             tf.summary.FileWriter("logs/", self.sess.graph)
 51 |         self.cost_his = []
 52 | 
 53 |     def _build_net(self):
 54 |         def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
 55 |             with tf.variable_scope('l1'):
 56 |                 w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
 57 |                 b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
 58 |                 l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
 59 |             with tf.variable_scope('h1'):
 60 |                     wh1 = tf.get_variable('wh1', [n_l1, n_l1], initializer=w_initializer, collections=c_names)
 61 |                     bh1 = tf.get_variable('bh1', [1, n_l1], initializer=b_initializer, collections=c_names)
 62 |                     lh1 = tf.nn.relu(tf.matmul(l1, wh1) + bh1)
 63 |             
 64 |             with tf.variable_scope('h2'):
 65 |                 
 66 |                     wh2 = tf.get_variable('wh2', [n_l1, n_l1], initializer=w_initializer, collections=c_names)
 67 |                     bh2 = tf.get_variable('bh2', [1, n_l1], initializer=b_initializer, collections=c_names)
 68 |                     lh2 = tf.nn.relu(tf.matmul(lh1, wh2) + bh2)
 69 | 
 70 |             with tf.variable_scope('l2'):
 71 |                 w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
 72 |                 b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
 73 |                 out = tf.matmul(lh2, w2) + b2
 74 |             return out
 75 |         # ------------------ build evaluate_net ------------------
 76 |         self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s')  # input
 77 |         self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
 78 | 
 79 |         with tf.variable_scope('eval_net'):
 80 |             c_names, n_l1, w_initializer, b_initializer = \
 81 |                 ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \
 82 |                 tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers
 83 | 
 84 |             self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)
 85 | 
 86 |         with tf.variable_scope('loss'):
 87 |             self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
 88 |         with tf.variable_scope('train'):
 89 |             self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
 90 | 
 91 |         # ------------------ build target_net ------------------
 92 |         self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_')    # input
 93 |         with tf.variable_scope('target_net'):
 94 |             c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
 95 | 
 96 |             self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer)
 97 | 
 98 |     def store_transition(self, s, a, r, s_):
 99 |         if not hasattr(self, 'memory_counter'):
100 |             self.memory_counter = 0
101 |         transition = np.hstack((s, [a, r], s_))
102 |         index = self.memory_counter % self.memory_size
103 |         self.memory[index, :] = transition
104 |         self.memory_counter += 1
105 | 
106 |     def choose_action(self, observation):
107 |         observation = observation[np.newaxis, :]
108 |         actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
109 |         action = np.argmax(actions_value)
110 | 
111 |         if not hasattr(self, 'q'):  # record action value it gets
112 |             self.q = []
113 |             self.running_q = 0
114 |         self.running_q = self.running_q*0.99 + 0.01 * np.max(actions_value)
115 |         self.q.append(self.running_q)
116 | 
117 |         if np.random.uniform() > self.epsilon:  # choosing action
118 |             action = np.random.randint(0, self.n_actions)
119 |         return action
120 | 
121 |     def learn(self):
122 |         if self.learn_step_counter % self.replace_target_iter == 0:
123 |             self.sess.run(self.replace_target_op)
124 |             print('\ntarget_params_replaced\n')
125 | 
126 |         if self.memory_counter > self.memory_size:
127 |             sample_index = np.random.choice(self.memory_size, size=self.batch_size)
128 |         else:
129 |             sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
130 |         batch_memory = self.memory[sample_index, :]
131 | 
132 |         q_next, q_eval4next = self.sess.run(
133 |             [self.q_next, self.q_eval],
134 |             feed_dict={self.s_: batch_memory[:, -self.n_features:],    # next observation
135 |                        self.s: batch_memory[:, -self.n_features:]})    # next observation
136 |         q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features]})
137 | 
138 |         q_target = q_eval.copy()
139 | 
140 |         batch_index = np.arange(self.batch_size, dtype=np.int32)
141 |         eval_act_index = batch_memory[:, self.n_features].astype(int)
142 |         reward = batch_memory[:, self.n_features + 1]
143 | 
144 |         if self.double_q:
145 |             max_act4next = np.argmax(q_eval4next, axis=1)        # the action that brings the highest value is evaluated by q_eval
146 |             selected_q_next = q_next[batch_index, max_act4next]  # Double DQN, select q_next depending on above actions
147 |         else:
148 |             selected_q_next = np.max(q_next, axis=1)    # the natural DQN
149 | 
150 |         q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next
151 | 
152 |         _, self.cost = self.sess.run([self._train_op, self.loss],
153 |                                      feed_dict={self.s: batch_memory[:, :self.n_features],
154 |                                                 self.q_target: q_target})
155 |         self.cost_his.append(self.cost)
156 | 
157 |         self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
158 |         self.learn_step_counter += 1
159 |         
160 |     def plot_cost(self,name= 'RL'):
161 |         import matplotlib.pyplot as plt
162 |         plt.plot(np.arange(len(self.cost_his)), self.cost_his)
163 |         np.save('.\\data\\NNcost'+name+'.npy',np.array(self.cost_his))
164 |         plt.ylabel('Cost')
165 |         plt.xlabel('training steps')
166 |         plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400)
167 |         plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) 
168 |     


--------------------------------------------------------------------------------
/Dueling.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | np.random.seed(1)
  5 | tf.set_random_seed(1)
  6 | 
  7 | 
  8 | class DuelingDQN:
  9 |     def __init__(
 10 |             self,
 11 |             n_actions,
 12 |             n_features,
 13 |             learning_rate=0.001,
 14 |             reward_decay=0.9,
 15 |             e_greedy=0.9,
 16 |             replace_target_iter=200,
 17 |             memory_size=500,
 18 |             batch_size=32,
 19 |             e_greedy_increment=None,
 20 |             output_graph=False,
 21 |             dueling=True,
 22 |             sess=None,
 23 |     ):
 24 |         self.n_actions = n_actions
 25 |         self.n_features = n_features
 26 |         self.lr = learning_rate
 27 |         self.gamma = reward_decay
 28 |         self.epsilon_max = e_greedy
 29 |         self.replace_target_iter = replace_target_iter
 30 |         self.memory_size = memory_size
 31 |         self.batch_size = batch_size
 32 |         self.epsilon_increment = e_greedy_increment
 33 |         self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max
 34 | 
 35 |         self.dueling = dueling      # decide to use dueling DQN or not
 36 | 
 37 |         self.learn_step_counter = 0
 38 |         self.memory = np.zeros((self.memory_size, n_features*2+2))
 39 |         self._build_net()
 40 |         t_params = tf.get_collection('target_net_params')
 41 |         e_params = tf.get_collection('eval_net_params')
 42 |         self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 43 | 
 44 |         if sess is None:
 45 |             self.sess = tf.Session()
 46 |             self.sess.run(tf.global_variables_initializer())
 47 |         else:
 48 |             self.sess = sess
 49 |         if output_graph:
 50 |             tf.summary.FileWriter("logs/", self.sess.graph)
 51 |         self.cost_his = []
 52 | 
 53 |     def _build_net(self):
 54 |         def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
 55 |             with tf.variable_scope('l1'):
 56 |                 w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
 57 |                 b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names)
 58 |                 l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
 59 |             with tf.variable_scope('h1'):
 60 |                     wh1 = tf.get_variable('wh1', [n_l1, n_l1], initializer=w_initializer, collections=c_names)
 61 |                     bh1 = tf.get_variable('bh1', [1, n_l1], initializer=b_initializer, collections=c_names)
 62 |                     lh1 = tf.nn.relu(tf.matmul(l1, wh1) + bh1)
 63 |             
 64 |             with tf.variable_scope('h2'):
 65 |                 
 66 |                     wh2 = tf.get_variable('wh2', [n_l1, n_l1], initializer=w_initializer, collections=c_names)
 67 |                     bh2 = tf.get_variable('bh2', [1, n_l1], initializer=b_initializer, collections=c_names)
 68 |                     lh2 = tf.nn.relu(tf.matmul(lh1, wh2) + bh2)
 69 | 
 70 |             if self.dueling:
 71 |                 # Dueling DQN
 72 |                 with tf.variable_scope('Value'):
 73 |                     w2 = tf.get_variable('w2', [n_l1, 1], initializer=w_initializer, collections=c_names)
 74 |                     b2 = tf.get_variable('b2', [1, 1], initializer=b_initializer, collections=c_names)
 75 |                     self.V = tf.matmul(lh2, w2) + b2
 76 | 
 77 |                 with tf.variable_scope('Advantage'):
 78 |                     w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
 79 |                     b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
 80 |                     self.A = tf.matmul(lh2, w2) + b2
 81 | 
 82 |                 with tf.variable_scope('Q'):
 83 |                     out = self.V + (self.A - tf.reduce_mean(self.A, axis=1, keep_dims=True))     # Q = V(s) + A(s,a)
 84 |             else:
 85 |                 with tf.variable_scope('Q'):
 86 |                     w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
 87 |                     b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
 88 |                     out = tf.matmul(l1, w2) + b2
 89 | 
 90 |             return out
 91 | 
 92 |         # ------------------ build evaluate_net ------------------
 93 |         self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s')  # input
 94 |         self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target')  # for calculating loss
 95 |         with tf.variable_scope('eval_net'):
 96 |             c_names, n_l1, w_initializer, b_initializer = \
 97 |                 ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \
 98 |                 tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers
 99 | 
100 |             self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer)
101 | 
102 |         with tf.variable_scope('loss'):
103 |             self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval))
104 |         with tf.variable_scope('train'):
105 |             self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
106 | 
107 |         # ------------------ build target_net ------------------
108 |         self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_')    # input
109 |         with tf.variable_scope('target_net'):
110 |             c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
111 | 
112 |             self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer)
113 | 
114 |     def store_transition(self, s, a, r, s_):
115 |         if not hasattr(self, 'memory_counter'):
116 |             self.memory_counter = 0
117 |         transition = np.hstack((s, [a, r], s_))
118 |         index = self.memory_counter % self.memory_size
119 |         self.memory[index, :] = transition
120 |         self.memory_counter += 1
121 | 
122 |     def choose_action(self, observation):
123 |         observation = observation[np.newaxis, :]
124 |         if np.random.uniform() < self.epsilon:  # choosing action
125 |             actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
126 |             action = np.argmax(actions_value)
127 |         else:
128 |             action = np.random.randint(0, self.n_actions)
129 |         return action
130 | 
131 |     def learn(self):
132 |         if self.learn_step_counter % self.replace_target_iter == 0:
133 |             self.sess.run(self.replace_target_op)
134 |             print('\ntarget_params_replaced\n')
135 | 
136 |         sample_index = np.random.choice(self.memory_size, size=self.batch_size)
137 |         batch_memory = self.memory[sample_index, :]
138 | 
139 |         q_next = self.sess.run(self.q_next, feed_dict={self.s_: batch_memory[:, -self.n_features:]}) # next observation
140 |         q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features]})
141 | 
142 |         q_target = q_eval.copy()
143 | 
144 |         batch_index = np.arange(self.batch_size, dtype=np.int32)
145 |         eval_act_index = batch_memory[:, self.n_features].astype(int)
146 |         reward = batch_memory[:, self.n_features + 1]
147 | 
148 |         q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1)
149 | 
150 |         _, self.cost = self.sess.run([self._train_op, self.loss],
151 |                                      feed_dict={self.s: batch_memory[:, :self.n_features],
152 |                                                 self.q_target: q_target})
153 |         self.cost_his.append(self.cost)
154 | 
155 |         self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
156 |         self.learn_step_counter += 1
157 | 
158 |     def plot_cost(self,name= 'RL'):
159 |         import matplotlib.pyplot as plt
160 |         plt.plot(np.arange(len(self.cost_his)), self.cost_his)
161 |         np.save('.\\data\\NNcost'+name+'.npy',np.array(self.cost_his))
162 |         plt.ylabel('Cost')
163 |         plt.xlabel('training steps')
164 |         plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400)
165 |         plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Deep Reinforcement Learning for task offloading in edge computing environments.
 2 | 
 3 | It includes:
 4 | 
 5 | - systemModel.py: including definition of the task, MEC server, ...
 6 | 
 7 | - Training.py: RL  training process.
 8 | 
 9 | - Deep Reinforcement Learning algorithms: DQN (DQN.py), Dueling DQN(Dueling.o), Double DQN(Double.py).
10 | 
11 | - Simulation.py: main simulation file.
12 | 
13 | ## Required packages
14 | - SimPy and Tensorflow 2.X
15 | 
16 | ## How the code works
17 | - run the file Simulation.py.
18 | 
19 | - For changing the numbers of user equipment, change the global variable 'UN' in the file Simulation.py.
20 | 
21 | - For changing the DQN algorithms, change the import of package in the file Training.py.
22 | 


--------------------------------------------------------------------------------
/Simulation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import systemModel
 4 | from Training import OFFLOADQ
 5 | 
 6 | import random
 7 | import simpy
 8 | 
 9 | 
10 | SIM_TIME = 150000
11 | RANDOM_SEED = 40
12 | RHO = 2
13 | BUFFER = 500 
14 | 
15 | #Simulation of comparative non-RL Experiment
16 | def Simulation(rho,name,function):
17 |     random.seed(RANDOM_SEED)
18 |     mec = systemModel.MEC()
19 |     mec.RHO = rho*mec.RHO
20 |     name += str(mec.USERS_NUM)
21 |     print ("Envronment create!")
22 |     env = simpy.Environment()
23 |     print ("User create!") 
24 |     for i in range(mec.USERS_NUM):
25 |         user = systemModel.User(i)
26 |         user.usersetting()
27 |         user.usercreat()
28 |         mec.USER_LIST.append(user)
29 | 
30 |     WAITING_LEN = simpy.Container(env,BUFFER, init=len(mec.WAITING_LIST))
31 |     
32 |     env.process(mec.runremote(env,WAITING_LEN))
33 |     env.process(mec.refreshsys(env,WAITING_LEN,name,'rho'+str(mec.RHO),1))
34 |     if function == 'offline':
35 |         env.process(mec.offloadOF(env,WAITING_LEN))
36 |     elif function == 'online':
37 |         env.process(mec.offloadOL(env,WAITING_LEN))
38 |     elif function == 'Semi':
39 |         env.process(mec.offloadSe(env,WAITING_LEN))
40 |     
41 |     env.process(mec.writelog(env,name,'rho',int(mec.RHO)))
42 | 
43 |     env.run(until=SIM_TIME)
44 |     
45 |     mec.writeoffload(name,'rho',int(mec.RHO))
46 |     for u in mec.USER_LIST:
47 |         u.userprint()
48 | 
49 | # Simulation of comparative RL Experiment
50 | def SimulationRL(rho,rl):
51 | 
52 |     random.seed(RANDOM_SEED)
53 |     mec = systemModel.MEC()
54 |     mec.RHO = rho*mec.RHO
55 |         
56 |     print ("Envronment create!")
57 |     env = simpy.Environment()
58 |     print ("User create!") 
59 |     for i in range(mec.USERS_NUM):
60 |         user = systemModel.User(i)
61 |         user.usersetting()
62 |         user.usercreat()
63 |         mec.USER_LIST.append(user)
64 | 
65 |     WAITING_LEN = simpy.Container(env,BUFFER, init=len(mec.WAITING_LIST))
66 |     env.process(mec.runremote(env,WAITING_LEN))
67 |     env.process(mec.refreshsys(env,WAITING_LEN,rl.name,'rho'+str(mec.RHO),1))
68 |     env.process(mec.offloadDQ(env,WAITING_LEN,rl))
69 |     env.process(mec.writelog(env,rl.name,'rho',int(mec.RHO)))
70 |     env.run(until=SIM_TIME)
71 |     mec.writeoffload(rl.name,'rho',int(mec.RHO))
72 |     for u in mec.USER_LIST:
73 |         u.userprint()
74 | 
75 | 
76 | online = 'online'+str(systemModel.CD)+'_'
77 | Simulation(RHO,online,'online')
78 | 
79 | offline = 'offline'+str(systemModel.CD)+'_'
80 | Simulation(RHO,offline,'offline')
81 | 
82 | semi = 'semi'+str(systemModel.CD)+'_'
83 | Simulation(RHO,semi,'semi')
84 | 
85 | 
86 | ##########RL##############
87 | print("BEGIN training!")
88 | rl = OFFLOADQ()
89 | rl.mec.RHO = 4
90 | rl.update(RANDOM_SEED)
91 | rl.printCost()        
92 | #####################################
93 | SimulationRL(RHO,rl)
94 | #tf.reset_default_graph()
95 | 


--------------------------------------------------------------------------------
/Training.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import systemModel
 4 | from DQN import DeepQNetwork as DQN
 5 | from Double import DoubleDQN as DQN
 6 | from Dueling import DuelingDQN as DQN
 7 | import simpy
 8 | import random
 9 | 
10 | 
11 | 
12 | USERS_NUM = systemModel.UN  
13 | 
14 | SIM_TIME = 10000
15 | BUFFER = 500 
16 | LEPI = 500 
17 | 
18 | class OFFLOADQ(object):
19 |     def __init__(self):
20 |        
21 |         self.name = 'DQN'+str(systemModel.CD)+'_'+str(USERS_NUM)
22 |         self.mec = systemModel.MEC()
23 |         self.action_space = [str(i) for i in range(USERS_NUM)]
24 |         self.n_actions = 2**USERS_NUM
25 |         self.n_features = 6
26 |         self.RL = DQN(self.n_actions, self.n_features,
27 |                       learning_rate=0.01,
28 |                       reward_decay=0.9,
29 |                       e_greedy=0.9,
30 |                       replace_target_iter=200,
31 |                       memory_size=2000,
32 |                       )
33 |         
34 |         self.done = True
35 |         self.stepcount = 0
36 |     
37 |     def reset(self):
38 |         self.mec.reset()  
39 |         self.done = True
40 |     def printCost(self):
41 |         self.RL.plot_cost(self.name)
42 |     def step(self, mec_, observation,env_, WAITING_LEN_):
43 |         count = 0
44 |         while True:
45 |             count+=1
46 |             if mec_.CHANNEL - mec_.CHANNEL_USED <= 1: 
47 |                 mec_.SCORE = -abs(mec_.SCORE)
48 |                 yield env_.timeout(mec_.TIMER*mec_.Delta*2)
49 |                 continue
50 |             yield env_.timeout(mec_.TIMER*mec_.Delta)
51 |             
52 |             action = self.RL.choose_action(observation)
53 |             userlist = mec_.randombin(action) 
54 |             channel = mec_.CHANNEL-mec_.CHANNEL_USED
55 |             for i in range(len(userlist)):
56 |                 if userlist[i] == 1:
57 |                     userID = i
58 |                     mec_.offloadOne(env_,userID,sum(userlist),channel)
59 |             
60 |             observation_ = mec_.getstate()
61 |             reward = mec_.SCORE
62 |             self.RL.store_transition(observation, action, reward, observation_)
63 |             if (self.stepcount > 40) and (self.stepcount % 4 == 0):
64 |                 self.RL.learn()
65 |             observation = observation_
66 |     
67 |     def update(self, RDSEED):
68 |         self.reset()
69 |         for episode in range(LEPI):
70 |             self.reset()
71 |             print ("learing episode %d" % (episode))
72 |             random.seed(RDSEED)
73 |             for i in range(USERS_NUM):
74 |                 user = systemModel.User(i)
75 |                 user.usersetting()
76 |                 user.usercreat()
77 |                 self.mec.USER_LIST.append(user)
78 |             env_ = simpy.Environment()
79 |             WAITING_LEN_ = simpy.Container(env_, BUFFER, init=len(self.mec.WAITING_LIST))
80 |             
81 |             observation = self.mec.getstate()
82 |             env_.process(self.mec.runremote(env_,WAITING_LEN_))
83 |             env_.process(self.mec.refreshsys(env_,WAITING_LEN_))
84 |             env_.process(self.step(self.mec,observation,env_,WAITING_LEN_))
85 |             env_.run(until=SIM_TIME)
86 |             
87 |             self.stepcount += 1
88 |         self.setpcount = 0
89 |         self.reset()
90 |     
91 |  
92 |     
93 |             
94 |             
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/systemModel.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | random.seed(40)
  5 | 
  6 | 
  7 | UN = 5 #Numbers of user equipment
  8 | CD = 2 #channel bandwidth allocation factor
  9 | 
 10 | class Job(object):
 11 |     def __init__(self, userID, jobID):
 12 |         ######base info##########
 13 |         self.userID = userID 
 14 |         self.jobID = jobID 
 15 |         self.jobTran = 0.0 
 16 |         self.jobDTran = 0.0 
 17 |         self.jobRun = 0.0 
 18 |         self.jobCPU = 0.0 
 19 |         self.jobCEnergy = 0.0 
 20 |         self.jobLEnergy = 0.0 
 21 |         self.jobType = 'normal' 
 22 |         self.jobState = 'LW' #=act,inh,lw,lr,ts,rw,rr,cp,fl
 23 |         #############online changing#########
 24 |         self.jobRunLeft = 0.0 
 25 |         self.jobTransLeft = 0.0 
 26 |         self.jobChannel = 0.0 
 27 |         ###########log#################
 28 |         self.jobBegin = 0.0 
 29 |         self.jobFinish = 0.0 
 30 |         self.jobOffload = 0.0 
 31 |         self.jobRT =0.0
 32 |         self.jobTT = 0.0
 33 |         self.jobAge = 0.0 
 34 |    
 35 | 
 36 | class User(object):
 37 |     def __init__(self, userID):
 38 |         self.userID = userID
 39 |         self.JOB_LIST = [] 
 40 |         self.jobData = 0.0 
 41 |         self.jobTrans = [20] 
 42 |         self.jobRuns = [20] 
 43 |         self.jobCPU = 0.1 
 44 |         self.jobNums = 50 
 45 |         self.jobCEnergy = [20] 
 46 |         self.jobLEnergy = [20] 
 47 |         ###############log###########
 48 |         self.Throughout = 0.0 
 49 |         self.CEnergy = 0.0 
 50 |         self.LEnergy = 0.0 
 51 |         self.commTotal = 0.0
 52 |         self.Age = 0.0 
 53 |        
 54 |     
 55 |      ##############################################################
 56 |      
 57 |     def usersetting(self):
 58 |         self.jobNums = 10
 59 |         self.jobData = (UN-self.userID)*64
 60 |         self.jobRuns = [(self.userID+1)*25*i for i in range(1,5)]
 61 |         self.jobCPU = 0.1
 62 |         self.jobLEnergy = [(self.userID+1)*1.25*i for i in range(7,25)]
 63 |         
 64 | 
 65 |     def setjobenergy(self,jid,jobtran):
 66 |         BDu = self.JOB_LIST[jid].jobChannel
 67 |         BDd = BDu/2
 68 |         self.JOB_LIST[jid].jobTran = self.jobData/BDu 
 69 |         self.JOB_LIST[jid].jobDTran = self.jobData/BDd  
 70 |         LET = BDu*0.438 + 0.051*BDd + 1.288
 71 |         #WIFI = BDu*0.283 + 0.137*BDd + 0.132
 72 |         #self.JOB_LIST[jid].jobCEnergy = random.choice([LET,WIFI])*(jobtran/1000)
 73 |         self.JOB_LIST[jid].jobCEnergy = LET*(jobtran/1000)
 74 |         
 75 |         
 76 |        
 77 |     def jobcreat(self,jobid,jobtype='normal'):
 78 |         jobrun = random.choice(self.jobRuns)
 79 |         onejob = Job(self.userID, jobid)
 80 |         onejob.jobRun = jobrun
 81 |         onejob.jobType = jobtype
 82 |         onejob.jobCPU = self.jobCPU
 83 |         onejob.jobLEnergy = random.choice(self.jobLEnergy)
 84 |         
 85 |         return onejob
 86 |     
 87 |     def usercreat(self):
 88 |         
 89 |         onejob = self.jobcreat(0)
 90 |         self.JOB_LIST.append(onejob)
 91 |         
 92 |         for i in range(1,self.jobNums):
 93 |             onejob = self.jobcreat(i)
 94 |             self.JOB_LIST.append(onejob)
 95 |             
 96 |     
 97 |     def userprint(self):
 98 |         print("User %d totalfinish %.2f, energy %.2f , age %.2f." % (self.userID, self.Throughout, self.CEnergy, self.Age))
 99 |     
100 |     def usersend(self):
101 |         jobid = -1
102 |         for i in range(len(self.JOB_LIST)):
103 |             job = self.JOB_LIST[i]
104 |             if  job.jobState == 'LW':
105 |                 jobid = i
106 |                 self.jobappend()
107 |                 return jobid
108 |         if jobid == -1:
109 |             self.jobappend()
110 |             job = self.JOB_LIST[-1]
111 |         return jobid
112 |     
113 |     def userrun(self):
114 |         jobid = -1
115 |         for i in range(len(self.JOB_LIST)):
116 |             job = self.JOB_LIST[i]
117 |             if  job.jobState == 'LW':
118 |                 jobid = i
119 |                 return jobid
120 |         return jobid
121 |     
122 |     def jobrefresh(self,env, fjob):
123 |         jobID = fjob.jobID
124 |         self.Throughout += 1
125 |         self.JOB_LIST[jobID].jobFinish= env.now
126 |     
127 |     def jobappend(self):
128 |         jid = len(self.JOB_LIST)
129 |         onejob = self.jobcreat(jid)
130 |         self.JOB_LIST.append(onejob)
131 |         
132 |     def runlocal(self,env): 
133 |         while True:
134 |             jobID = self.userrun()
135 |             if jobID == -1:
136 |                 self.jobappend()
137 |                 continue
138 |             else:
139 |                 self.JOB_LIST[jobID].jobState = 'LR' 
140 |                 self.JOB_LIST[jobID].jobBegin = env.now
141 |                 RUNNINGTIME = self.JOB_LIST[jobID].jobRun
142 |                 yield env.timeout(RUNNINGTIME)
143 |                 self.JOB_LIST[jobID].jobState = 'CP' 
144 |                 self.LEnergy += self.JOB_LIST[jobID].jobLEnergy
145 |                 self.jobrefresh(env,self.JOB_LIST[jobID])
146 |                 self.jobappend()
147 |     
148 | 
149 | class MEC(object):
150 |     def __init__(self):
151 |         ##########basic info##########
152 |         self.USERS_NUM = UN
153 |         self.USER_LIST = [] 
154 |         self.CHANNEL = 50.0
155 |         self.RHO = 2.0 
156 |         self.TIMER = 10 
157 |         self.Delta = UN 
158 |         self.CD = CD
159 |         ##########online changing#######
160 |         self.JOB_POOL = [] 
161 |         self.TRANS_POOL = [] 
162 |         self.WAITING_LIST = []  
163 |         self.CHANNEL_USED = 0.0 
164 |         self.SYS_TIME = 0.0 
165 |         self.SYS_CPU = 0.0 
166 |         self.ACTION = 0 
167 |         self.SCORE = 0.0 
168 |         ####################log################
169 |         self.offloadJob = [] 
170 |         self.Age = 0.0
171 |         self.commTime = 0.0
172 |         self.commEnergy = 0.0
173 |         self.Run = 0.0
174 |         #################RL###################
175 |         self.REWARD = 0.0 
176 |  
177 | 
178 |     ###############################system log###################################################      
179 |     def writelog(self,env,fn, name, value, timeslot = 5000):
180 |         yield env.timeout(5000)
181 |         f = open('.\\data\\USER_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','w')
182 |         oneline = 'TIMESLOT \t Throughout \t Age \t Run \t commTotal \t commEnergy \t reward\n'
183 |         f.write(oneline)
184 |         f.close()
185 |         while True:
186 |             yield env.timeout(timeslot)
187 |             age = 0.0
188 |             run = 0.0
189 |             throu = 0.0
190 |             comm = 0.0
191 |             energy = 0.0
192 |             sumreward = self.REWARD
193 |             ucout = len(self.USER_LIST)
194 |             for u in self.USER_LIST:
195 |                 throu += float(u.Throughout)
196 |             age = self.Age/ucout/1000
197 |             run = self.Run/ucout
198 |             throu = throu/ucout
199 |             comm = self.commTime/ucout/1000
200 |             energy = self.commEnergy/ucout
201 |             sumreward = self.REWARD
202 |             f = open('.\\data\\USER_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','a')
203 |             oneline = str(env.now/1000)+'\t'+str(throu)+'\t'+str(age)+'\t'+str(run)+'\t'+str(comm)+'\t'+str(energy)+'\t'+str(sumreward)+'\n'
204 |             f.write(oneline)
205 |         f.close()
206 |     
207 |     def writeoffload(self,fn, name, value):
208 |         f = open('.\\data\\JOB_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','w')
209 |         titleline = 'No \t Uid \t Jid  \t offloadtime \t begintime \t commutime \t runtime \t energy \t AoI\n'
210 |         f.write(titleline)
211 |         i = 0
212 |         for j in self.offloadJob:
213 |             oneline = str(i) +'\t'+ str(j.userID) +'\t'+ str(j.jobID) +'\t'+str(j.jobOffload/1000) +'\t'+ str(j.jobBegin/1000) +'\t'
214 |             oneline += str(j.jobTran/1000) +'\t'+ str(j.jobRun/1000) +'\t'+ str(j.jobCEnergy) +'\t'+ str(j.jobAge/1000) +'\n'
215 |             i +=1
216 |             f.write(oneline)
217 |         f.close()
218 |       
219 |     ######RL#############
220 |     def getstate(self): 
221 |         state = []
222 |         state.append(self.CHANNEL_USED)
223 |         state.append(self.SYS_CPU)
224 |         state.append(len(self.JOB_POOL))
225 |         state.append(len(self.TRANS_POOL))
226 |         
227 |         uwait = 0.0
228 |         utran = 0.0
229 |         for i in self.JOB_POOL:
230 |             uwait += self.USER_LIST[i[0]].JOB_LIST[i[1]].jobRunLeft
231 |         for j in self.TRANS_POOL:
232 |             utran += self.USER_LIST[j[0]].JOB_LIST[j[1]].jobTransLeft
233 |         state.append(uwait)
234 |         state.append(utran)
235 |         state = np.array(state)
236 |         return state
237 |     
238 |     def reset(self):
239 |         self.USER_LIST = [] 
240 |         
241 |         self.JOB_POOL = [] 
242 |         self.TRANS_POOL = [] 
243 |         self.WAITING_LIST = []  
244 |         self.CHANNEL_USED = 0.0 
245 |         self.SYS_TIME = 0.0 
246 |         self.SYS_CPU = 0.0 
247 |        
248 |         self.offloadJob = [] 
249 |         self.REWARD = 0.0    
250 |     ######RL#############
251 |   
252 |     #####################################
253 |     def channeldisturb(self, userID,jobID,jobnum,channel):
254 |         disturb = np.log2(1+1/(self.CD+jobnum))
255 |         cl = channel*disturb
256 |         
257 |         if self.CHANNEL_USED+cl > self.CHANNEL:
258 |             return -1
259 |         self.CHANNEL_USED += cl
260 |         jt = self.USER_LIST[userID].jobData/cl
261 |         self.USER_LIST[userID].JOB_LIST[jobID].jobChannel = cl
262 |         return jt
263 |     ###################################################
264 |     
265 |     
266 |     def offloadOne(self,env,userID,jobnum,channel):
267 |         jobID = self.USER_LIST[userID].usersend()
268 |         if jobID == -1:
269 |             return
270 |         
271 |         TRANSPOTTIME = self.channeldisturb(userID,jobID,jobnum,channel)
272 |         if TRANSPOTTIME == -1:
273 |             self.SCORE = -abs(self.SCORE)
274 |             return
275 |         
276 |         self.USER_LIST[userID].JOB_LIST[jobID].jobOffload = env.now       
277 |         self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'TS' 
278 |         self.USER_LIST[userID].JOB_LIST[jobID].jobAge = env.now 
279 |         self.USER_LIST[userID].JOB_LIST[jobID].jobTT = TRANSPOTTIME 
280 |         self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft = TRANSPOTTIME
281 |         self.USER_LIST[userID].setjobenergy(jobID,TRANSPOTTIME) 
282 |         self.commEnergy += self.USER_LIST[userID].JOB_LIST[jobID].jobCEnergy 
283 |         self.TRANS_POOL.append((userID,jobID)) 
284 |         
285 |     def runremote(self,env, WAITING_LEN):
286 |         while True:
287 |             yield env.timeout(self.TIMER)
288 | 
289 |             if self.SYS_CPU > 0.8: 
290 |                 yield env.timeout(self.TIMER*2)
291 |                 self.SCORE = -abs(self.SCORE)
292 |                 continue
293 |             else:
294 |                 yield WAITING_LEN.get(1) 
295 |                 job = self.WAITING_LIST.pop(0)
296 |                 userID = job.userID
297 |                 jobID = job.jobID
298 |                 self.JOB_POOL.append((userID,jobID)) 
299 |                 self.SYS_CPU += self.USER_LIST[userID].JOB_LIST[jobID].jobCPU
300 |                 #######################################################################################
301 |                 self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'RR' 
302 |                 self.USER_LIST[userID].JOB_LIST[jobID].jobBegin = env.now
303 |                 RUNNINGTIME = float(self.USER_LIST[userID].JOB_LIST[jobID].jobRun)/self.RHO
304 |                 self.USER_LIST[userID].JOB_LIST[jobID].jobRT = RUNNINGTIME
305 |                 self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft = RUNNINGTIME
306 |             
307 |         
308 |     def refreshsys(self,env,WAITING_LEN,name='',value='',flag = 0):
309 |         if flag ==1:
310 |             f = open('.\\data\\ACTION_'+str(name)+'_'+str(value)+'.data','w')
311 |             oneline = 'sysTime \t'+'ACTION \t'+'ChannelUsed \t'+'TransJob \t'+'CPU \t'+'RunningJob \t'+'ActionQos \n'
312 |             f.write(oneline)
313 |             f.close()
314 |         while True:
315 |             yield env.timeout(self.TIMER)
316 |             TIMER = env.now - self.SYS_TIME
317 |             self.SYS_TIME = env.now
318 |             if flag ==1:
319 |                 f = open('.\\data\\ACTION_'+str(name)+'_'+str(value)+'.data','a')
320 |                 oneline = str(self.SYS_TIME)+'\t' +str(self.ACTION)+'\t' +str(self.CHANNEL_USED)+ '\t' + str(len(self.TRANS_POOL)) + '\t' +str(self.SYS_CPU)+ '\t' + str(len(self.JOB_POOL))
321 |                 oneline += '\t' +str(self.SCORE) + '\n'
322 |                 f.write(oneline)
323 |             
324 |             transpool = []
325 |             for Jt in self.TRANS_POOL:
326 |                 userID = Jt[0]
327 |                 jobID = Jt[1]
328 |                 onejob = self.USER_LIST[userID].JOB_LIST[jobID]
329 |                 if onejob.jobTransLeft > TIMER:
330 |                     transpool.append((userID,jobID))
331 |                     self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft = self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft-TIMER
332 |                 else:
333 |                     self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'RW'  
334 |                     self.CHANNEL_USED -= self.USER_LIST[userID].JOB_LIST[jobID].jobChannel
335 |                     self.WAITING_LIST.append(self.USER_LIST[userID].JOB_LIST[jobID])
336 |                     self.USER_LIST[userID].jobappend()
337 |                     yield WAITING_LEN.put(1)
338 |             self.TRANS_POOL = transpool
339 |             
340 |             
341 |             jobpool = []
342 |             for Jr in self.JOB_POOL:
343 |                 userID = Jr[0]
344 |                 jobID = Jr[1]
345 |                 onejob = self.USER_LIST[userID].JOB_LIST[jobID]
346 |                 if onejob.jobRunLeft > TIMER:
347 |                     jobpool.append((userID,jobID))
348 |                     self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft = self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft-TIMER
349 |                 else:
350 |                     self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'CP'  
351 |                     self.SYS_CPU -= self.USER_LIST[userID].JOB_LIST[jobID].jobCPU
352 |                     self.USER_LIST[userID].jobrefresh(env,self.USER_LIST[userID].JOB_LIST[jobID])
353 |                     self.offloadJob.append(self.USER_LIST[userID].JOB_LIST[jobID])
354 |                     ########################################################################
355 |                     self.USER_LIST[userID].JOB_LIST[jobID].jobAge = env.now - self.USER_LIST[userID].JOB_LIST[jobID].jobAge
356 |                     self.Age += self.USER_LIST[userID].JOB_LIST[jobID].jobAge
357 |                     self.Run += self.USER_LIST[userID].JOB_LIST[jobID].jobRun
358 |                     self.commTime += self.USER_LIST[userID].JOB_LIST[jobID].jobTT
359 |                     ###################################REWARD######################################
360 |                     self.SCORE = self.USER_LIST[userID].JOB_LIST[jobID].jobRun/self.USER_LIST[userID].JOB_LIST[jobID].jobCEnergy
361 |                     self.REWARD += self.SCORE
362 |                     #################################################################################
363 |             self.JOB_POOL = jobpool
364 |         f.close()
365 |     
366 |   
367 |     
368 |     def offline(self):
369 |         score = 0.0
370 |         action = 1
371 |         for i in range(2**self.USERS_NUM):
372 |             userlist = self.randombin(i)
373 |             score_ = 0
374 |             jobnum = sum(userlist)
375 |             channel = self.CHANNEL-self.CHANNEL_USED
376 |             cl = 0
377 |             for u in range(len(userlist)):
378 |                 if userlist[u] == 1:
379 |                     userID = u
380 |                     disturb = np.log2(1+1/(self.CD+jobnum))
381 |                     cl = channel*disturb
382 |                     score_ += np.average(self.USER_LIST[userID].jobRuns)/self.USER_LIST[userID].jobData*cl
383 |             if score_ > score:
384 |                 score = score_
385 |                 action = i
386 |         return action
387 |     
388 |     def spac(self):
389 |         score = 100000.0
390 |         action = 1
391 |         for i in range(2**self.USERS_NUM):
392 |             userlist = self.randombin(i)
393 |             score_ = 100000.0
394 |             jobnum = sum(userlist)
395 |             channel = self.CHANNEL-self.CHANNEL_USED
396 |             cl = 0
397 |             for u in range(len(userlist)):
398 |                 if userlist[u] == 1:
399 |                     userID = u
400 |                     disturb = np.log2(1+1/(self.CD+jobnum))
401 |                     cl = channel*disturb
402 |                     if cl < 1:
403 |                         score_ = 100000.0
404 |                     else:
405 |                         score_ += self.USER_LIST[userID].jobData/cl
406 |             if score_ < score:
407 |                 score = score_
408 |                 action = i
409 |         return action
410 |             
411 |     def randombin(self,action):
412 |         userlist = list(bin(action).replace('0b',''))
413 |         zeros = self.USERS_NUM - len(userlist)
414 |         ll = [0 for i in range(zeros)]
415 |         for i in userlist:
416 |             ll.append(int(i))
417 |         return ll
418 |     #################################offloading strategy########################################
419 |     #online
420 |     def offloadOL(self,env, WAITING_LEN):
421 |         while True:
422 |             if self.CHANNEL - self.CHANNEL_USED <= 1: 
423 |                 self.SCORE = -abs(self.SCORE)
424 |                 yield env.timeout(self.TIMER*self.Delta*2)
425 |                 continue
426 |             yield env.timeout(self.TIMER*self.Delta)
427 |             self.ACTION = random.randint(1,2**self.USERS_NUM-1)
428 |             userlist = self.randombin(self.ACTION) 
429 |             jobnum = sum(userlist)
430 |             channel = self.CHANNEL-self.CHANNEL_USED
431 |             for i in range(len(userlist)):
432 |                 if userlist[i] == 1:
433 |                     userID = i
434 |                     self.offloadOne(env,userID,jobnum,channel)
435 |     #offline
436 |     def offloadOF(self,env, WAITING_LEN):
437 |         while True:
438 |             if self.CHANNEL - self.CHANNEL_USED <= 1: 
439 |                 self.SCORE = -abs(self.SCORE)
440 |                 yield env.timeout(self.TIMER*self.Delta*2)
441 |                 continue
442 |             yield env.timeout(self.TIMER*self.Delta)
443 |             self.ACTION = self.offline() 
444 |             userlist = self.randombin(self.ACTION) 
445 |             jobnum = sum(userlist)
446 |             channel = self.CHANNEL-self.CHANNEL_USED
447 |             for i in range(len(userlist)):
448 |                 if userlist[i] == 1:
449 |                     userID = i
450 |                     self.offloadOne(env,userID,jobnum,channel)
451 |     #semi-online
452 |     def offloadSe(self,env, WAITING_LEN):
453 |         while True:
454 |             if self.CHANNEL - self.CHANNEL_USED <= 1: 
455 |                 self.SCORE = -abs(self.SCORE)
456 |                 yield env.timeout(self.TIMER*self.Delta*2)
457 |                 continue
458 |             yield env.timeout(self.TIMER*self.Delta)
459 |             self.ACTION = 1 
460 |             userlist = self.randombin(self.ACTION) 
461 |             jobnum = sum(userlist)
462 |             channel = self.CHANNEL-self.CHANNEL_USED
463 |             for i in range(len(userlist)):
464 |                 if userlist[i] == 1:
465 |                     userID = i
466 |                     self.offloadOne(env,userID,jobnum,channel)
467 |     #RL               
468 |     def offloadDQ(self, env,WAITING_LEN,ql):
469 |         while True:
470 |             observation = self.getstate()
471 |             if self.CHANNEL - self.CHANNEL_USED <= 1: 
472 |                 self.SCORE = -abs(self.SCORE)
473 |                 yield env.timeout(self.TIMER*self.Delta*2)
474 |                 continue
475 |             yield env.timeout(self.TIMER*self.Delta)
476 |             self.ACTION = ql.RL.choose_action(observation)
477 |             userlist = self.randombin(self.ACTION) 
478 |             channel = self.CHANNEL-self.CHANNEL_USED
479 |             for i in range(len(userlist)):
480 |                 if userlist[i] == 1:
481 |                     userID = i
482 |                     self.offloadOne(env,userID,sum(userlist),channel)
483 |     
484 |                     
485 |             


--------------------------------------------------------------------------------