├── DQN.py ├── Double.py ├── Dueling.py ├── README.md ├── Simulation.py ├── Training.py └── systemModel.py /DQN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | np.random.seed(1) 5 | tf.set_random_seed(1) 6 | 7 | 8 | # Deep Q Network off-policy 9 | class DeepQNetwork: 10 | def __init__( 11 | self, 12 | n_actions, 13 | n_features, 14 | learning_rate=0.01, 15 | reward_decay=0.9, 16 | e_greedy=0.9, 17 | replace_target_iter=100, 18 | memory_size=500, 19 | batch_size=32, 20 | e_greedy_increment=None, 21 | output_graph=False, 22 | ): 23 | self.n_actions = n_actions 24 | self.n_features = n_features 25 | self.lr = learning_rate 26 | self.gamma = reward_decay 27 | self.epsilon_max = e_greedy 28 | self.replace_target_iter = replace_target_iter 29 | self.memory_size = memory_size 30 | self.batch_size = batch_size 31 | self.epsilon_increment = e_greedy_increment 32 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 33 | 34 | self.learn_step_counter = 0 35 | 36 | 37 | self.memory = np.zeros((self.memory_size, n_features * 2 + 2)) 38 | 39 | 40 | self._build_net() 41 | t_params = tf.get_collection('target_net_params') 42 | e_params = tf.get_collection('eval_net_params') 43 | self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 44 | 45 | self.sess = tf.Session() 46 | 47 | if output_graph: 48 | tf.summary.FileWriter("logs/", self.sess.graph) 49 | 50 | self.sess.run(tf.global_variables_initializer()) 51 | self.cost_his = [] 52 | 53 | def _build_net(self): 54 | # ------------------ build evaluate_net ------------------ 55 | self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s') 56 | self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target') 57 | with tf.variable_scope('eval_net'): 58 | 59 | c_names, n_l1,n_l2, w_initializer, b_initializer = \ 60 | ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, 20,\ 61 | tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) 62 | 63 | 64 | with tf.variable_scope('l1'): 65 | w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) 66 | b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) 67 | l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) 68 | 69 | 70 | with tf.variable_scope('lm1'): 71 | wm1 = tf.get_variable('wm1', [n_l1, n_l2], initializer=w_initializer, collections=c_names) 72 | bm1 = tf.get_variable('bm1', [1, n_l2], initializer=b_initializer, collections=c_names) 73 | lm1 = tf.nn.relu(tf.matmul(l1, wm1) + bm1) 74 | 75 | 76 | with tf.variable_scope('l2'): 77 | w2 = tf.get_variable('w2', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names) 78 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 79 | self.q_eval = tf.matmul(lm1, w2) + b2 80 | 81 | with tf.variable_scope('loss'): 82 | self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) 83 | with tf.variable_scope('train'): 84 | self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 85 | 86 | # ------------------ build target_net ------------------ 87 | self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_') 88 | with tf.variable_scope('target_net'): 89 | 90 | c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] 91 | 92 | with tf.variable_scope('l1'): 93 | w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) 94 | b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) 95 | l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1) 96 | 97 | with tf.variable_scope('lm1'): 98 | wm1 = tf.get_variable('wm1', [n_l1, n_l2], initializer=w_initializer, collections=c_names) 99 | bm1 = tf.get_variable('bm1', [1, n_l2], initializer=b_initializer, collections=c_names) 100 | lm1 = tf.nn.relu(tf.matmul(l1, wm1) + bm1) 101 | 102 | with tf.variable_scope('l2'): 103 | w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) 104 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 105 | self.q_next = tf.matmul(l1, w2) + b2 106 | 107 | def store_transition(self, s, a, r, s_): 108 | if not hasattr(self, 'memory_counter'): 109 | self.memory_counter = 0 110 | 111 | transition = np.hstack((s, [a, r], s_)) 112 | 113 | # replace the old memory with new memory 114 | index = self.memory_counter % self.memory_size 115 | self.memory[index, :] = transition 116 | 117 | self.memory_counter += 1 118 | 119 | def choose_action(self, observation): 120 | 121 | observation = observation[np.newaxis, :] 122 | 123 | if np.random.uniform() < self.epsilon: 124 | 125 | actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation}) 126 | action = np.argmax(actions_value) 127 | else: 128 | action = np.random.randint(0, self.n_actions) 129 | return action 130 | 131 | def learn(self): 132 | 133 | if self.learn_step_counter % self.replace_target_iter == 0: 134 | self.sess.run(self.replace_target_op) 135 | print('\ntarget_params_replaced\n') 136 | 137 | 138 | if self.memory_counter > self.memory_size: 139 | sample_index = np.random.choice(self.memory_size, size=self.batch_size) 140 | else: 141 | sample_index = np.random.choice(self.memory_counter, size=self.batch_size) 142 | batch_memory = self.memory[sample_index, :] 143 | 144 | q_next, q_eval = self.sess.run( 145 | [self.q_next, self.q_eval], 146 | feed_dict={ 147 | self.s_: batch_memory[:, -self.n_features:], # fixed params 148 | self.s: batch_memory[:, :self.n_features], # newest params 149 | }) 150 | 151 | # change q_target w.r.t q_eval's action 152 | q_target = q_eval.copy() 153 | 154 | batch_index = np.arange(self.batch_size, dtype=np.int32) 155 | eval_act_index = batch_memory[:, self.n_features].astype(int) 156 | reward = batch_memory[:, self.n_features + 1] 157 | 158 | q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1) 159 | 160 | 161 | # train eval network 162 | _, self.cost = self.sess.run([self._train_op, self.loss], 163 | feed_dict={self.s: batch_memory[:, :self.n_features], 164 | self.q_target: q_target}) 165 | self.cost_his.append(self.cost) 166 | 167 | # increasing epsilon 168 | self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max 169 | self.learn_step_counter += 1 170 | 171 | def plot_cost(self,name= 'RL'): 172 | import matplotlib.pyplot as plt 173 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 174 | plt.ylabel('Cost') 175 | plt.xlabel('training steps') 176 | plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400) 177 | plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) 178 | #plt.show() 179 | 180 | -------------------------------------------------------------------------------- /Double.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | np.random.seed(1) 5 | tf.set_random_seed(1) 6 | 7 | 8 | class DoubleDQN: 9 | def __init__( 10 | self, 11 | n_actions, 12 | n_features, 13 | learning_rate=0.005, 14 | reward_decay=0.9, 15 | e_greedy=0.9, 16 | replace_target_iter=200, 17 | memory_size=3000, 18 | batch_size=32, 19 | e_greedy_increment=None, 20 | output_graph=False, 21 | double_q=True, 22 | sess=None, 23 | ): 24 | self.n_actions = n_actions 25 | self.n_features = n_features 26 | self.lr = learning_rate 27 | self.gamma = reward_decay 28 | self.epsilon_max = e_greedy 29 | self.replace_target_iter = replace_target_iter 30 | self.memory_size = memory_size 31 | self.batch_size = batch_size 32 | self.epsilon_increment = e_greedy_increment 33 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 34 | 35 | self.double_q = double_q # decide to use double q or not 36 | 37 | self.learn_step_counter = 0 38 | self.memory = np.zeros((self.memory_size, n_features*2+2)) 39 | self._build_net() 40 | t_params = tf.get_collection('target_net_params') 41 | e_params = tf.get_collection('eval_net_params') 42 | self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 43 | 44 | if sess is None: 45 | self.sess = tf.Session() 46 | self.sess.run(tf.global_variables_initializer()) 47 | else: 48 | self.sess = sess 49 | if output_graph: 50 | tf.summary.FileWriter("logs/", self.sess.graph) 51 | self.cost_his = [] 52 | 53 | def _build_net(self): 54 | def build_layers(s, c_names, n_l1, w_initializer, b_initializer): 55 | with tf.variable_scope('l1'): 56 | w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) 57 | b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) 58 | l1 = tf.nn.relu(tf.matmul(s, w1) + b1) 59 | with tf.variable_scope('h1'): 60 | wh1 = tf.get_variable('wh1', [n_l1, n_l1], initializer=w_initializer, collections=c_names) 61 | bh1 = tf.get_variable('bh1', [1, n_l1], initializer=b_initializer, collections=c_names) 62 | lh1 = tf.nn.relu(tf.matmul(l1, wh1) + bh1) 63 | 64 | with tf.variable_scope('h2'): 65 | 66 | wh2 = tf.get_variable('wh2', [n_l1, n_l1], initializer=w_initializer, collections=c_names) 67 | bh2 = tf.get_variable('bh2', [1, n_l1], initializer=b_initializer, collections=c_names) 68 | lh2 = tf.nn.relu(tf.matmul(lh1, wh2) + bh2) 69 | 70 | with tf.variable_scope('l2'): 71 | w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) 72 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 73 | out = tf.matmul(lh2, w2) + b2 74 | return out 75 | # ------------------ build evaluate_net ------------------ 76 | self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s') # input 77 | self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss 78 | 79 | with tf.variable_scope('eval_net'): 80 | c_names, n_l1, w_initializer, b_initializer = \ 81 | ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \ 82 | tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers 83 | 84 | self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) 85 | 86 | with tf.variable_scope('loss'): 87 | self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) 88 | with tf.variable_scope('train'): 89 | self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 90 | 91 | # ------------------ build target_net ------------------ 92 | self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_') # input 93 | with tf.variable_scope('target_net'): 94 | c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] 95 | 96 | self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer) 97 | 98 | def store_transition(self, s, a, r, s_): 99 | if not hasattr(self, 'memory_counter'): 100 | self.memory_counter = 0 101 | transition = np.hstack((s, [a, r], s_)) 102 | index = self.memory_counter % self.memory_size 103 | self.memory[index, :] = transition 104 | self.memory_counter += 1 105 | 106 | def choose_action(self, observation): 107 | observation = observation[np.newaxis, :] 108 | actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation}) 109 | action = np.argmax(actions_value) 110 | 111 | if not hasattr(self, 'q'): # record action value it gets 112 | self.q = [] 113 | self.running_q = 0 114 | self.running_q = self.running_q*0.99 + 0.01 * np.max(actions_value) 115 | self.q.append(self.running_q) 116 | 117 | if np.random.uniform() > self.epsilon: # choosing action 118 | action = np.random.randint(0, self.n_actions) 119 | return action 120 | 121 | def learn(self): 122 | if self.learn_step_counter % self.replace_target_iter == 0: 123 | self.sess.run(self.replace_target_op) 124 | print('\ntarget_params_replaced\n') 125 | 126 | if self.memory_counter > self.memory_size: 127 | sample_index = np.random.choice(self.memory_size, size=self.batch_size) 128 | else: 129 | sample_index = np.random.choice(self.memory_counter, size=self.batch_size) 130 | batch_memory = self.memory[sample_index, :] 131 | 132 | q_next, q_eval4next = self.sess.run( 133 | [self.q_next, self.q_eval], 134 | feed_dict={self.s_: batch_memory[:, -self.n_features:], # next observation 135 | self.s: batch_memory[:, -self.n_features:]}) # next observation 136 | q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features]}) 137 | 138 | q_target = q_eval.copy() 139 | 140 | batch_index = np.arange(self.batch_size, dtype=np.int32) 141 | eval_act_index = batch_memory[:, self.n_features].astype(int) 142 | reward = batch_memory[:, self.n_features + 1] 143 | 144 | if self.double_q: 145 | max_act4next = np.argmax(q_eval4next, axis=1) # the action that brings the highest value is evaluated by q_eval 146 | selected_q_next = q_next[batch_index, max_act4next] # Double DQN, select q_next depending on above actions 147 | else: 148 | selected_q_next = np.max(q_next, axis=1) # the natural DQN 149 | 150 | q_target[batch_index, eval_act_index] = reward + self.gamma * selected_q_next 151 | 152 | _, self.cost = self.sess.run([self._train_op, self.loss], 153 | feed_dict={self.s: batch_memory[:, :self.n_features], 154 | self.q_target: q_target}) 155 | self.cost_his.append(self.cost) 156 | 157 | self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max 158 | self.learn_step_counter += 1 159 | 160 | def plot_cost(self,name= 'RL'): 161 | import matplotlib.pyplot as plt 162 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 163 | np.save('.\\data\\NNcost'+name+'.npy',np.array(self.cost_his)) 164 | plt.ylabel('Cost') 165 | plt.xlabel('training steps') 166 | plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400) 167 | plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) 168 | -------------------------------------------------------------------------------- /Dueling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | np.random.seed(1) 5 | tf.set_random_seed(1) 6 | 7 | 8 | class DuelingDQN: 9 | def __init__( 10 | self, 11 | n_actions, 12 | n_features, 13 | learning_rate=0.001, 14 | reward_decay=0.9, 15 | e_greedy=0.9, 16 | replace_target_iter=200, 17 | memory_size=500, 18 | batch_size=32, 19 | e_greedy_increment=None, 20 | output_graph=False, 21 | dueling=True, 22 | sess=None, 23 | ): 24 | self.n_actions = n_actions 25 | self.n_features = n_features 26 | self.lr = learning_rate 27 | self.gamma = reward_decay 28 | self.epsilon_max = e_greedy 29 | self.replace_target_iter = replace_target_iter 30 | self.memory_size = memory_size 31 | self.batch_size = batch_size 32 | self.epsilon_increment = e_greedy_increment 33 | self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max 34 | 35 | self.dueling = dueling # decide to use dueling DQN or not 36 | 37 | self.learn_step_counter = 0 38 | self.memory = np.zeros((self.memory_size, n_features*2+2)) 39 | self._build_net() 40 | t_params = tf.get_collection('target_net_params') 41 | e_params = tf.get_collection('eval_net_params') 42 | self.replace_target_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] 43 | 44 | if sess is None: 45 | self.sess = tf.Session() 46 | self.sess.run(tf.global_variables_initializer()) 47 | else: 48 | self.sess = sess 49 | if output_graph: 50 | tf.summary.FileWriter("logs/", self.sess.graph) 51 | self.cost_his = [] 52 | 53 | def _build_net(self): 54 | def build_layers(s, c_names, n_l1, w_initializer, b_initializer): 55 | with tf.variable_scope('l1'): 56 | w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) 57 | b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) 58 | l1 = tf.nn.relu(tf.matmul(s, w1) + b1) 59 | with tf.variable_scope('h1'): 60 | wh1 = tf.get_variable('wh1', [n_l1, n_l1], initializer=w_initializer, collections=c_names) 61 | bh1 = tf.get_variable('bh1', [1, n_l1], initializer=b_initializer, collections=c_names) 62 | lh1 = tf.nn.relu(tf.matmul(l1, wh1) + bh1) 63 | 64 | with tf.variable_scope('h2'): 65 | 66 | wh2 = tf.get_variable('wh2', [n_l1, n_l1], initializer=w_initializer, collections=c_names) 67 | bh2 = tf.get_variable('bh2', [1, n_l1], initializer=b_initializer, collections=c_names) 68 | lh2 = tf.nn.relu(tf.matmul(lh1, wh2) + bh2) 69 | 70 | if self.dueling: 71 | # Dueling DQN 72 | with tf.variable_scope('Value'): 73 | w2 = tf.get_variable('w2', [n_l1, 1], initializer=w_initializer, collections=c_names) 74 | b2 = tf.get_variable('b2', [1, 1], initializer=b_initializer, collections=c_names) 75 | self.V = tf.matmul(lh2, w2) + b2 76 | 77 | with tf.variable_scope('Advantage'): 78 | w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) 79 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 80 | self.A = tf.matmul(lh2, w2) + b2 81 | 82 | with tf.variable_scope('Q'): 83 | out = self.V + (self.A - tf.reduce_mean(self.A, axis=1, keep_dims=True)) # Q = V(s) + A(s,a) 84 | else: 85 | with tf.variable_scope('Q'): 86 | w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) 87 | b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) 88 | out = tf.matmul(l1, w2) + b2 89 | 90 | return out 91 | 92 | # ------------------ build evaluate_net ------------------ 93 | self.s = tf.keras.Input(tf.float32, [None, self.n_features], name='s') # input 94 | self.q_target = tf.keras.Input(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss 95 | with tf.variable_scope('eval_net'): 96 | c_names, n_l1, w_initializer, b_initializer = \ 97 | ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \ 98 | tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers 99 | 100 | self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) 101 | 102 | with tf.variable_scope('loss'): 103 | self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) 104 | with tf.variable_scope('train'): 105 | self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) 106 | 107 | # ------------------ build target_net ------------------ 108 | self.s_ = tf.keras.Input(tf.float32, [None, self.n_features], name='s_') # input 109 | with tf.variable_scope('target_net'): 110 | c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] 111 | 112 | self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer) 113 | 114 | def store_transition(self, s, a, r, s_): 115 | if not hasattr(self, 'memory_counter'): 116 | self.memory_counter = 0 117 | transition = np.hstack((s, [a, r], s_)) 118 | index = self.memory_counter % self.memory_size 119 | self.memory[index, :] = transition 120 | self.memory_counter += 1 121 | 122 | def choose_action(self, observation): 123 | observation = observation[np.newaxis, :] 124 | if np.random.uniform() < self.epsilon: # choosing action 125 | actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation}) 126 | action = np.argmax(actions_value) 127 | else: 128 | action = np.random.randint(0, self.n_actions) 129 | return action 130 | 131 | def learn(self): 132 | if self.learn_step_counter % self.replace_target_iter == 0: 133 | self.sess.run(self.replace_target_op) 134 | print('\ntarget_params_replaced\n') 135 | 136 | sample_index = np.random.choice(self.memory_size, size=self.batch_size) 137 | batch_memory = self.memory[sample_index, :] 138 | 139 | q_next = self.sess.run(self.q_next, feed_dict={self.s_: batch_memory[:, -self.n_features:]}) # next observation 140 | q_eval = self.sess.run(self.q_eval, {self.s: batch_memory[:, :self.n_features]}) 141 | 142 | q_target = q_eval.copy() 143 | 144 | batch_index = np.arange(self.batch_size, dtype=np.int32) 145 | eval_act_index = batch_memory[:, self.n_features].astype(int) 146 | reward = batch_memory[:, self.n_features + 1] 147 | 148 | q_target[batch_index, eval_act_index] = reward + self.gamma * np.max(q_next, axis=1) 149 | 150 | _, self.cost = self.sess.run([self._train_op, self.loss], 151 | feed_dict={self.s: batch_memory[:, :self.n_features], 152 | self.q_target: q_target}) 153 | self.cost_his.append(self.cost) 154 | 155 | self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max 156 | self.learn_step_counter += 1 157 | 158 | def plot_cost(self,name= 'RL'): 159 | import matplotlib.pyplot as plt 160 | plt.plot(np.arange(len(self.cost_his)), self.cost_his) 161 | np.save('.\\data\\NNcost'+name+'.npy',np.array(self.cost_his)) 162 | plt.ylabel('Cost') 163 | plt.xlabel('training steps') 164 | plt.savefig('.\\data\\'+name+'cost.svg',format='svg',dpi=400) 165 | plt.savefig('.\\data\\'+name+'cost.png',format='png',dpi=400) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Deep Reinforcement Learning for task offloading in edge computing environments. 2 | 3 | It includes: 4 | 5 | - systemModel.py: including definition of the task, MEC server, ... 6 | 7 | - Training.py: RL training process. 8 | 9 | - Deep Reinforcement Learning algorithms: DQN (DQN.py), Dueling DQN(Dueling.o), Double DQN(Double.py). 10 | 11 | - Simulation.py: main simulation file. 12 | 13 | ## Required packages 14 | - SimPy and Tensorflow 2.X 15 | 16 | ## How the code works 17 | - run the file Simulation.py. 18 | 19 | - For changing the numbers of user equipment, change the global variable 'UN' in the file Simulation.py. 20 | 21 | - For changing the DQN algorithms, change the import of package in the file Training.py. 22 | -------------------------------------------------------------------------------- /Simulation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import systemModel 4 | from Training import OFFLOADQ 5 | 6 | import random 7 | import simpy 8 | 9 | 10 | SIM_TIME = 150000 11 | RANDOM_SEED = 40 12 | RHO = 2 13 | BUFFER = 500 14 | 15 | #Simulation of comparative non-RL Experiment 16 | def Simulation(rho,name,function): 17 | random.seed(RANDOM_SEED) 18 | mec = systemModel.MEC() 19 | mec.RHO = rho*mec.RHO 20 | name += str(mec.USERS_NUM) 21 | print ("Envronment create!") 22 | env = simpy.Environment() 23 | print ("User create!") 24 | for i in range(mec.USERS_NUM): 25 | user = systemModel.User(i) 26 | user.usersetting() 27 | user.usercreat() 28 | mec.USER_LIST.append(user) 29 | 30 | WAITING_LEN = simpy.Container(env,BUFFER, init=len(mec.WAITING_LIST)) 31 | 32 | env.process(mec.runremote(env,WAITING_LEN)) 33 | env.process(mec.refreshsys(env,WAITING_LEN,name,'rho'+str(mec.RHO),1)) 34 | if function == 'offline': 35 | env.process(mec.offloadOF(env,WAITING_LEN)) 36 | elif function == 'online': 37 | env.process(mec.offloadOL(env,WAITING_LEN)) 38 | elif function == 'Semi': 39 | env.process(mec.offloadSe(env,WAITING_LEN)) 40 | 41 | env.process(mec.writelog(env,name,'rho',int(mec.RHO))) 42 | 43 | env.run(until=SIM_TIME) 44 | 45 | mec.writeoffload(name,'rho',int(mec.RHO)) 46 | for u in mec.USER_LIST: 47 | u.userprint() 48 | 49 | # Simulation of comparative RL Experiment 50 | def SimulationRL(rho,rl): 51 | 52 | random.seed(RANDOM_SEED) 53 | mec = systemModel.MEC() 54 | mec.RHO = rho*mec.RHO 55 | 56 | print ("Envronment create!") 57 | env = simpy.Environment() 58 | print ("User create!") 59 | for i in range(mec.USERS_NUM): 60 | user = systemModel.User(i) 61 | user.usersetting() 62 | user.usercreat() 63 | mec.USER_LIST.append(user) 64 | 65 | WAITING_LEN = simpy.Container(env,BUFFER, init=len(mec.WAITING_LIST)) 66 | env.process(mec.runremote(env,WAITING_LEN)) 67 | env.process(mec.refreshsys(env,WAITING_LEN,rl.name,'rho'+str(mec.RHO),1)) 68 | env.process(mec.offloadDQ(env,WAITING_LEN,rl)) 69 | env.process(mec.writelog(env,rl.name,'rho',int(mec.RHO))) 70 | env.run(until=SIM_TIME) 71 | mec.writeoffload(rl.name,'rho',int(mec.RHO)) 72 | for u in mec.USER_LIST: 73 | u.userprint() 74 | 75 | 76 | online = 'online'+str(systemModel.CD)+'_' 77 | Simulation(RHO,online,'online') 78 | 79 | offline = 'offline'+str(systemModel.CD)+'_' 80 | Simulation(RHO,offline,'offline') 81 | 82 | semi = 'semi'+str(systemModel.CD)+'_' 83 | Simulation(RHO,semi,'semi') 84 | 85 | 86 | ##########RL############## 87 | print("BEGIN training!") 88 | rl = OFFLOADQ() 89 | rl.mec.RHO = 4 90 | rl.update(RANDOM_SEED) 91 | rl.printCost() 92 | ##################################### 93 | SimulationRL(RHO,rl) 94 | #tf.reset_default_graph() 95 | -------------------------------------------------------------------------------- /Training.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import systemModel 4 | from DQN import DeepQNetwork as DQN 5 | from Double import DoubleDQN as DQN 6 | from Dueling import DuelingDQN as DQN 7 | import simpy 8 | import random 9 | 10 | 11 | 12 | USERS_NUM = systemModel.UN 13 | 14 | SIM_TIME = 10000 15 | BUFFER = 500 16 | LEPI = 500 17 | 18 | class OFFLOADQ(object): 19 | def __init__(self): 20 | 21 | self.name = 'DQN'+str(systemModel.CD)+'_'+str(USERS_NUM) 22 | self.mec = systemModel.MEC() 23 | self.action_space = [str(i) for i in range(USERS_NUM)] 24 | self.n_actions = 2**USERS_NUM 25 | self.n_features = 6 26 | self.RL = DQN(self.n_actions, self.n_features, 27 | learning_rate=0.01, 28 | reward_decay=0.9, 29 | e_greedy=0.9, 30 | replace_target_iter=200, 31 | memory_size=2000, 32 | ) 33 | 34 | self.done = True 35 | self.stepcount = 0 36 | 37 | def reset(self): 38 | self.mec.reset() 39 | self.done = True 40 | def printCost(self): 41 | self.RL.plot_cost(self.name) 42 | def step(self, mec_, observation,env_, WAITING_LEN_): 43 | count = 0 44 | while True: 45 | count+=1 46 | if mec_.CHANNEL - mec_.CHANNEL_USED <= 1: 47 | mec_.SCORE = -abs(mec_.SCORE) 48 | yield env_.timeout(mec_.TIMER*mec_.Delta*2) 49 | continue 50 | yield env_.timeout(mec_.TIMER*mec_.Delta) 51 | 52 | action = self.RL.choose_action(observation) 53 | userlist = mec_.randombin(action) 54 | channel = mec_.CHANNEL-mec_.CHANNEL_USED 55 | for i in range(len(userlist)): 56 | if userlist[i] == 1: 57 | userID = i 58 | mec_.offloadOne(env_,userID,sum(userlist),channel) 59 | 60 | observation_ = mec_.getstate() 61 | reward = mec_.SCORE 62 | self.RL.store_transition(observation, action, reward, observation_) 63 | if (self.stepcount > 40) and (self.stepcount % 4 == 0): 64 | self.RL.learn() 65 | observation = observation_ 66 | 67 | def update(self, RDSEED): 68 | self.reset() 69 | for episode in range(LEPI): 70 | self.reset() 71 | print ("learing episode %d" % (episode)) 72 | random.seed(RDSEED) 73 | for i in range(USERS_NUM): 74 | user = systemModel.User(i) 75 | user.usersetting() 76 | user.usercreat() 77 | self.mec.USER_LIST.append(user) 78 | env_ = simpy.Environment() 79 | WAITING_LEN_ = simpy.Container(env_, BUFFER, init=len(self.mec.WAITING_LIST)) 80 | 81 | observation = self.mec.getstate() 82 | env_.process(self.mec.runremote(env_,WAITING_LEN_)) 83 | env_.process(self.mec.refreshsys(env_,WAITING_LEN_)) 84 | env_.process(self.step(self.mec,observation,env_,WAITING_LEN_)) 85 | env_.run(until=SIM_TIME) 86 | 87 | self.stepcount += 1 88 | self.setpcount = 0 89 | self.reset() 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /systemModel.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | random.seed(40) 5 | 6 | 7 | UN = 5 #Numbers of user equipment 8 | CD = 2 #channel bandwidth allocation factor 9 | 10 | class Job(object): 11 | def __init__(self, userID, jobID): 12 | ######base info########## 13 | self.userID = userID 14 | self.jobID = jobID 15 | self.jobTran = 0.0 16 | self.jobDTran = 0.0 17 | self.jobRun = 0.0 18 | self.jobCPU = 0.0 19 | self.jobCEnergy = 0.0 20 | self.jobLEnergy = 0.0 21 | self.jobType = 'normal' 22 | self.jobState = 'LW' #=act,inh,lw,lr,ts,rw,rr,cp,fl 23 | #############online changing######### 24 | self.jobRunLeft = 0.0 25 | self.jobTransLeft = 0.0 26 | self.jobChannel = 0.0 27 | ###########log################# 28 | self.jobBegin = 0.0 29 | self.jobFinish = 0.0 30 | self.jobOffload = 0.0 31 | self.jobRT =0.0 32 | self.jobTT = 0.0 33 | self.jobAge = 0.0 34 | 35 | 36 | class User(object): 37 | def __init__(self, userID): 38 | self.userID = userID 39 | self.JOB_LIST = [] 40 | self.jobData = 0.0 41 | self.jobTrans = [20] 42 | self.jobRuns = [20] 43 | self.jobCPU = 0.1 44 | self.jobNums = 50 45 | self.jobCEnergy = [20] 46 | self.jobLEnergy = [20] 47 | ###############log########### 48 | self.Throughout = 0.0 49 | self.CEnergy = 0.0 50 | self.LEnergy = 0.0 51 | self.commTotal = 0.0 52 | self.Age = 0.0 53 | 54 | 55 | ############################################################## 56 | 57 | def usersetting(self): 58 | self.jobNums = 10 59 | self.jobData = (UN-self.userID)*64 60 | self.jobRuns = [(self.userID+1)*25*i for i in range(1,5)] 61 | self.jobCPU = 0.1 62 | self.jobLEnergy = [(self.userID+1)*1.25*i for i in range(7,25)] 63 | 64 | 65 | def setjobenergy(self,jid,jobtran): 66 | BDu = self.JOB_LIST[jid].jobChannel 67 | BDd = BDu/2 68 | self.JOB_LIST[jid].jobTran = self.jobData/BDu 69 | self.JOB_LIST[jid].jobDTran = self.jobData/BDd 70 | LET = BDu*0.438 + 0.051*BDd + 1.288 71 | #WIFI = BDu*0.283 + 0.137*BDd + 0.132 72 | #self.JOB_LIST[jid].jobCEnergy = random.choice([LET,WIFI])*(jobtran/1000) 73 | self.JOB_LIST[jid].jobCEnergy = LET*(jobtran/1000) 74 | 75 | 76 | 77 | def jobcreat(self,jobid,jobtype='normal'): 78 | jobrun = random.choice(self.jobRuns) 79 | onejob = Job(self.userID, jobid) 80 | onejob.jobRun = jobrun 81 | onejob.jobType = jobtype 82 | onejob.jobCPU = self.jobCPU 83 | onejob.jobLEnergy = random.choice(self.jobLEnergy) 84 | 85 | return onejob 86 | 87 | def usercreat(self): 88 | 89 | onejob = self.jobcreat(0) 90 | self.JOB_LIST.append(onejob) 91 | 92 | for i in range(1,self.jobNums): 93 | onejob = self.jobcreat(i) 94 | self.JOB_LIST.append(onejob) 95 | 96 | 97 | def userprint(self): 98 | print("User %d totalfinish %.2f, energy %.2f , age %.2f." % (self.userID, self.Throughout, self.CEnergy, self.Age)) 99 | 100 | def usersend(self): 101 | jobid = -1 102 | for i in range(len(self.JOB_LIST)): 103 | job = self.JOB_LIST[i] 104 | if job.jobState == 'LW': 105 | jobid = i 106 | self.jobappend() 107 | return jobid 108 | if jobid == -1: 109 | self.jobappend() 110 | job = self.JOB_LIST[-1] 111 | return jobid 112 | 113 | def userrun(self): 114 | jobid = -1 115 | for i in range(len(self.JOB_LIST)): 116 | job = self.JOB_LIST[i] 117 | if job.jobState == 'LW': 118 | jobid = i 119 | return jobid 120 | return jobid 121 | 122 | def jobrefresh(self,env, fjob): 123 | jobID = fjob.jobID 124 | self.Throughout += 1 125 | self.JOB_LIST[jobID].jobFinish= env.now 126 | 127 | def jobappend(self): 128 | jid = len(self.JOB_LIST) 129 | onejob = self.jobcreat(jid) 130 | self.JOB_LIST.append(onejob) 131 | 132 | def runlocal(self,env): 133 | while True: 134 | jobID = self.userrun() 135 | if jobID == -1: 136 | self.jobappend() 137 | continue 138 | else: 139 | self.JOB_LIST[jobID].jobState = 'LR' 140 | self.JOB_LIST[jobID].jobBegin = env.now 141 | RUNNINGTIME = self.JOB_LIST[jobID].jobRun 142 | yield env.timeout(RUNNINGTIME) 143 | self.JOB_LIST[jobID].jobState = 'CP' 144 | self.LEnergy += self.JOB_LIST[jobID].jobLEnergy 145 | self.jobrefresh(env,self.JOB_LIST[jobID]) 146 | self.jobappend() 147 | 148 | 149 | class MEC(object): 150 | def __init__(self): 151 | ##########basic info########## 152 | self.USERS_NUM = UN 153 | self.USER_LIST = [] 154 | self.CHANNEL = 50.0 155 | self.RHO = 2.0 156 | self.TIMER = 10 157 | self.Delta = UN 158 | self.CD = CD 159 | ##########online changing####### 160 | self.JOB_POOL = [] 161 | self.TRANS_POOL = [] 162 | self.WAITING_LIST = [] 163 | self.CHANNEL_USED = 0.0 164 | self.SYS_TIME = 0.0 165 | self.SYS_CPU = 0.0 166 | self.ACTION = 0 167 | self.SCORE = 0.0 168 | ####################log################ 169 | self.offloadJob = [] 170 | self.Age = 0.0 171 | self.commTime = 0.0 172 | self.commEnergy = 0.0 173 | self.Run = 0.0 174 | #################RL################### 175 | self.REWARD = 0.0 176 | 177 | 178 | ###############################system log################################################### 179 | def writelog(self,env,fn, name, value, timeslot = 5000): 180 | yield env.timeout(5000) 181 | f = open('.\\data\\USER_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','w') 182 | oneline = 'TIMESLOT \t Throughout \t Age \t Run \t commTotal \t commEnergy \t reward\n' 183 | f.write(oneline) 184 | f.close() 185 | while True: 186 | yield env.timeout(timeslot) 187 | age = 0.0 188 | run = 0.0 189 | throu = 0.0 190 | comm = 0.0 191 | energy = 0.0 192 | sumreward = self.REWARD 193 | ucout = len(self.USER_LIST) 194 | for u in self.USER_LIST: 195 | throu += float(u.Throughout) 196 | age = self.Age/ucout/1000 197 | run = self.Run/ucout 198 | throu = throu/ucout 199 | comm = self.commTime/ucout/1000 200 | energy = self.commEnergy/ucout 201 | sumreward = self.REWARD 202 | f = open('.\\data\\USER_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','a') 203 | oneline = str(env.now/1000)+'\t'+str(throu)+'\t'+str(age)+'\t'+str(run)+'\t'+str(comm)+'\t'+str(energy)+'\t'+str(sumreward)+'\n' 204 | f.write(oneline) 205 | f.close() 206 | 207 | def writeoffload(self,fn, name, value): 208 | f = open('.\\data\\JOB_'+str(fn)+'_'+str(name)+'_'+str(value)+'.data','w') 209 | titleline = 'No \t Uid \t Jid \t offloadtime \t begintime \t commutime \t runtime \t energy \t AoI\n' 210 | f.write(titleline) 211 | i = 0 212 | for j in self.offloadJob: 213 | oneline = str(i) +'\t'+ str(j.userID) +'\t'+ str(j.jobID) +'\t'+str(j.jobOffload/1000) +'\t'+ str(j.jobBegin/1000) +'\t' 214 | oneline += str(j.jobTran/1000) +'\t'+ str(j.jobRun/1000) +'\t'+ str(j.jobCEnergy) +'\t'+ str(j.jobAge/1000) +'\n' 215 | i +=1 216 | f.write(oneline) 217 | f.close() 218 | 219 | ######RL############# 220 | def getstate(self): 221 | state = [] 222 | state.append(self.CHANNEL_USED) 223 | state.append(self.SYS_CPU) 224 | state.append(len(self.JOB_POOL)) 225 | state.append(len(self.TRANS_POOL)) 226 | 227 | uwait = 0.0 228 | utran = 0.0 229 | for i in self.JOB_POOL: 230 | uwait += self.USER_LIST[i[0]].JOB_LIST[i[1]].jobRunLeft 231 | for j in self.TRANS_POOL: 232 | utran += self.USER_LIST[j[0]].JOB_LIST[j[1]].jobTransLeft 233 | state.append(uwait) 234 | state.append(utran) 235 | state = np.array(state) 236 | return state 237 | 238 | def reset(self): 239 | self.USER_LIST = [] 240 | 241 | self.JOB_POOL = [] 242 | self.TRANS_POOL = [] 243 | self.WAITING_LIST = [] 244 | self.CHANNEL_USED = 0.0 245 | self.SYS_TIME = 0.0 246 | self.SYS_CPU = 0.0 247 | 248 | self.offloadJob = [] 249 | self.REWARD = 0.0 250 | ######RL############# 251 | 252 | ##################################### 253 | def channeldisturb(self, userID,jobID,jobnum,channel): 254 | disturb = np.log2(1+1/(self.CD+jobnum)) 255 | cl = channel*disturb 256 | 257 | if self.CHANNEL_USED+cl > self.CHANNEL: 258 | return -1 259 | self.CHANNEL_USED += cl 260 | jt = self.USER_LIST[userID].jobData/cl 261 | self.USER_LIST[userID].JOB_LIST[jobID].jobChannel = cl 262 | return jt 263 | ################################################### 264 | 265 | 266 | def offloadOne(self,env,userID,jobnum,channel): 267 | jobID = self.USER_LIST[userID].usersend() 268 | if jobID == -1: 269 | return 270 | 271 | TRANSPOTTIME = self.channeldisturb(userID,jobID,jobnum,channel) 272 | if TRANSPOTTIME == -1: 273 | self.SCORE = -abs(self.SCORE) 274 | return 275 | 276 | self.USER_LIST[userID].JOB_LIST[jobID].jobOffload = env.now 277 | self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'TS' 278 | self.USER_LIST[userID].JOB_LIST[jobID].jobAge = env.now 279 | self.USER_LIST[userID].JOB_LIST[jobID].jobTT = TRANSPOTTIME 280 | self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft = TRANSPOTTIME 281 | self.USER_LIST[userID].setjobenergy(jobID,TRANSPOTTIME) 282 | self.commEnergy += self.USER_LIST[userID].JOB_LIST[jobID].jobCEnergy 283 | self.TRANS_POOL.append((userID,jobID)) 284 | 285 | def runremote(self,env, WAITING_LEN): 286 | while True: 287 | yield env.timeout(self.TIMER) 288 | 289 | if self.SYS_CPU > 0.8: 290 | yield env.timeout(self.TIMER*2) 291 | self.SCORE = -abs(self.SCORE) 292 | continue 293 | else: 294 | yield WAITING_LEN.get(1) 295 | job = self.WAITING_LIST.pop(0) 296 | userID = job.userID 297 | jobID = job.jobID 298 | self.JOB_POOL.append((userID,jobID)) 299 | self.SYS_CPU += self.USER_LIST[userID].JOB_LIST[jobID].jobCPU 300 | ####################################################################################### 301 | self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'RR' 302 | self.USER_LIST[userID].JOB_LIST[jobID].jobBegin = env.now 303 | RUNNINGTIME = float(self.USER_LIST[userID].JOB_LIST[jobID].jobRun)/self.RHO 304 | self.USER_LIST[userID].JOB_LIST[jobID].jobRT = RUNNINGTIME 305 | self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft = RUNNINGTIME 306 | 307 | 308 | def refreshsys(self,env,WAITING_LEN,name='',value='',flag = 0): 309 | if flag ==1: 310 | f = open('.\\data\\ACTION_'+str(name)+'_'+str(value)+'.data','w') 311 | oneline = 'sysTime \t'+'ACTION \t'+'ChannelUsed \t'+'TransJob \t'+'CPU \t'+'RunningJob \t'+'ActionQos \n' 312 | f.write(oneline) 313 | f.close() 314 | while True: 315 | yield env.timeout(self.TIMER) 316 | TIMER = env.now - self.SYS_TIME 317 | self.SYS_TIME = env.now 318 | if flag ==1: 319 | f = open('.\\data\\ACTION_'+str(name)+'_'+str(value)+'.data','a') 320 | oneline = str(self.SYS_TIME)+'\t' +str(self.ACTION)+'\t' +str(self.CHANNEL_USED)+ '\t' + str(len(self.TRANS_POOL)) + '\t' +str(self.SYS_CPU)+ '\t' + str(len(self.JOB_POOL)) 321 | oneline += '\t' +str(self.SCORE) + '\n' 322 | f.write(oneline) 323 | 324 | transpool = [] 325 | for Jt in self.TRANS_POOL: 326 | userID = Jt[0] 327 | jobID = Jt[1] 328 | onejob = self.USER_LIST[userID].JOB_LIST[jobID] 329 | if onejob.jobTransLeft > TIMER: 330 | transpool.append((userID,jobID)) 331 | self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft = self.USER_LIST[userID].JOB_LIST[jobID].jobTransLeft-TIMER 332 | else: 333 | self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'RW' 334 | self.CHANNEL_USED -= self.USER_LIST[userID].JOB_LIST[jobID].jobChannel 335 | self.WAITING_LIST.append(self.USER_LIST[userID].JOB_LIST[jobID]) 336 | self.USER_LIST[userID].jobappend() 337 | yield WAITING_LEN.put(1) 338 | self.TRANS_POOL = transpool 339 | 340 | 341 | jobpool = [] 342 | for Jr in self.JOB_POOL: 343 | userID = Jr[0] 344 | jobID = Jr[1] 345 | onejob = self.USER_LIST[userID].JOB_LIST[jobID] 346 | if onejob.jobRunLeft > TIMER: 347 | jobpool.append((userID,jobID)) 348 | self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft = self.USER_LIST[userID].JOB_LIST[jobID].jobRunLeft-TIMER 349 | else: 350 | self.USER_LIST[userID].JOB_LIST[jobID].jobState = 'CP' 351 | self.SYS_CPU -= self.USER_LIST[userID].JOB_LIST[jobID].jobCPU 352 | self.USER_LIST[userID].jobrefresh(env,self.USER_LIST[userID].JOB_LIST[jobID]) 353 | self.offloadJob.append(self.USER_LIST[userID].JOB_LIST[jobID]) 354 | ######################################################################## 355 | self.USER_LIST[userID].JOB_LIST[jobID].jobAge = env.now - self.USER_LIST[userID].JOB_LIST[jobID].jobAge 356 | self.Age += self.USER_LIST[userID].JOB_LIST[jobID].jobAge 357 | self.Run += self.USER_LIST[userID].JOB_LIST[jobID].jobRun 358 | self.commTime += self.USER_LIST[userID].JOB_LIST[jobID].jobTT 359 | ###################################REWARD###################################### 360 | self.SCORE = self.USER_LIST[userID].JOB_LIST[jobID].jobRun/self.USER_LIST[userID].JOB_LIST[jobID].jobCEnergy 361 | self.REWARD += self.SCORE 362 | ################################################################################# 363 | self.JOB_POOL = jobpool 364 | f.close() 365 | 366 | 367 | 368 | def offline(self): 369 | score = 0.0 370 | action = 1 371 | for i in range(2**self.USERS_NUM): 372 | userlist = self.randombin(i) 373 | score_ = 0 374 | jobnum = sum(userlist) 375 | channel = self.CHANNEL-self.CHANNEL_USED 376 | cl = 0 377 | for u in range(len(userlist)): 378 | if userlist[u] == 1: 379 | userID = u 380 | disturb = np.log2(1+1/(self.CD+jobnum)) 381 | cl = channel*disturb 382 | score_ += np.average(self.USER_LIST[userID].jobRuns)/self.USER_LIST[userID].jobData*cl 383 | if score_ > score: 384 | score = score_ 385 | action = i 386 | return action 387 | 388 | def spac(self): 389 | score = 100000.0 390 | action = 1 391 | for i in range(2**self.USERS_NUM): 392 | userlist = self.randombin(i) 393 | score_ = 100000.0 394 | jobnum = sum(userlist) 395 | channel = self.CHANNEL-self.CHANNEL_USED 396 | cl = 0 397 | for u in range(len(userlist)): 398 | if userlist[u] == 1: 399 | userID = u 400 | disturb = np.log2(1+1/(self.CD+jobnum)) 401 | cl = channel*disturb 402 | if cl < 1: 403 | score_ = 100000.0 404 | else: 405 | score_ += self.USER_LIST[userID].jobData/cl 406 | if score_ < score: 407 | score = score_ 408 | action = i 409 | return action 410 | 411 | def randombin(self,action): 412 | userlist = list(bin(action).replace('0b','')) 413 | zeros = self.USERS_NUM - len(userlist) 414 | ll = [0 for i in range(zeros)] 415 | for i in userlist: 416 | ll.append(int(i)) 417 | return ll 418 | #################################offloading strategy######################################## 419 | #online 420 | def offloadOL(self,env, WAITING_LEN): 421 | while True: 422 | if self.CHANNEL - self.CHANNEL_USED <= 1: 423 | self.SCORE = -abs(self.SCORE) 424 | yield env.timeout(self.TIMER*self.Delta*2) 425 | continue 426 | yield env.timeout(self.TIMER*self.Delta) 427 | self.ACTION = random.randint(1,2**self.USERS_NUM-1) 428 | userlist = self.randombin(self.ACTION) 429 | jobnum = sum(userlist) 430 | channel = self.CHANNEL-self.CHANNEL_USED 431 | for i in range(len(userlist)): 432 | if userlist[i] == 1: 433 | userID = i 434 | self.offloadOne(env,userID,jobnum,channel) 435 | #offline 436 | def offloadOF(self,env, WAITING_LEN): 437 | while True: 438 | if self.CHANNEL - self.CHANNEL_USED <= 1: 439 | self.SCORE = -abs(self.SCORE) 440 | yield env.timeout(self.TIMER*self.Delta*2) 441 | continue 442 | yield env.timeout(self.TIMER*self.Delta) 443 | self.ACTION = self.offline() 444 | userlist = self.randombin(self.ACTION) 445 | jobnum = sum(userlist) 446 | channel = self.CHANNEL-self.CHANNEL_USED 447 | for i in range(len(userlist)): 448 | if userlist[i] == 1: 449 | userID = i 450 | self.offloadOne(env,userID,jobnum,channel) 451 | #semi-online 452 | def offloadSe(self,env, WAITING_LEN): 453 | while True: 454 | if self.CHANNEL - self.CHANNEL_USED <= 1: 455 | self.SCORE = -abs(self.SCORE) 456 | yield env.timeout(self.TIMER*self.Delta*2) 457 | continue 458 | yield env.timeout(self.TIMER*self.Delta) 459 | self.ACTION = 1 460 | userlist = self.randombin(self.ACTION) 461 | jobnum = sum(userlist) 462 | channel = self.CHANNEL-self.CHANNEL_USED 463 | for i in range(len(userlist)): 464 | if userlist[i] == 1: 465 | userID = i 466 | self.offloadOne(env,userID,jobnum,channel) 467 | #RL 468 | def offloadDQ(self, env,WAITING_LEN,ql): 469 | while True: 470 | observation = self.getstate() 471 | if self.CHANNEL - self.CHANNEL_USED <= 1: 472 | self.SCORE = -abs(self.SCORE) 473 | yield env.timeout(self.TIMER*self.Delta*2) 474 | continue 475 | yield env.timeout(self.TIMER*self.Delta) 476 | self.ACTION = ql.RL.choose_action(observation) 477 | userlist = self.randombin(self.ACTION) 478 | channel = self.CHANNEL-self.CHANNEL_USED 479 | for i in range(len(userlist)): 480 | if userlist[i] == 1: 481 | userID = i 482 | self.offloadOne(env,userID,sum(userlist),channel) 483 | 484 | 485 | --------------------------------------------------------------------------------