├── README.md ├── data └── process.py ├── Parameter.py ├── Environment.py ├── DDPG.py ├── D_DDPG.py └── DPDS.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep PDS Learning for AoI Minimization 2 | Source code for paper [Age-Based Scheduling for Mobile Edge Computing: A Deep Reinforcement Learning Approach](https://ieeexplore.ieee.org/abstract/document/10449431), written in python and tensorflow. 3 | 4 | ## Usage 5 | ```bash 6 | python DPDS.py 7 | python DPDS.py --Alg='dpl' 8 | python DPDS.py --Alg='coo' 9 | python DPDS.py --Alg='lpo' 10 | ``` 11 | The running data are (i) recorded by the tf.summary module and can be viewed in real time by running tensorboard in the `logs` directory and (ii) written into matlab format files (`.mat`) in the `data` directory after the simulation is finished. 12 | 13 | ## Citation 14 | If you find our code helpful, please consider citing our paper. 15 | ``` 16 | @article{he2024age, 17 | title={Age-Based Scheduling for Mobile Edge Computing: A Deep Reinforcement Learning Approach}, 18 | author={He, Xingqiu and You, Chaoqun and Quek, Tony QS}, 19 | journal={IEEE Transactions on Mobile Computing}, 20 | year={2024}, 21 | publisher={IEEE} 22 | } 23 | ``` 24 | -------------------------------------------------------------------------------- /data/process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from scipy.io import loadmat, savemat 4 | 5 | alglist = ['dpds', 'coo', 'dpl', 'lpo'] 6 | nlist = list(range(10, 21)) 7 | 8 | A_scale = {} 9 | E_scale = {} 10 | A_energy = {} 11 | E_energy = {} 12 | 13 | for alg in alglist: 14 | A_scale[alg] = [] 15 | E_scale[alg] = [] 16 | A_energy[alg] = [] 17 | E_energy[alg] = [] 18 | 19 | for alg in alglist: 20 | for n in nlist: 21 | filename = alg + '_scale_n' + str(n) + '_lam5000.mat' 22 | data = loadmat(filename) 23 | A_scale[alg].append(np.mean(data['A'])) 24 | E_scale[alg].append(np.mean(data['E'])) 25 | 26 | print(A_scale) 27 | print(E_scale) 28 | 29 | savedata = {'A': A_scale, 'E': E_scale} 30 | savemat('scale.mat', savedata) 31 | 32 | 33 | for alg in alglist: 34 | for n in nlist: 35 | filename = alg + '_energy' + str(n) + '_n15_lam5000.mat' 36 | data = loadmat(filename) 37 | A_energy[alg].append(np.mean(data['A'])) 38 | E_energy[alg].append(np.mean(data['E'])) 39 | 40 | print(A_energy) 41 | print(E_energy) 42 | 43 | savedata = {'A': A_energy, 'E': E_energy} 44 | savemat('energy.mat', savedata) 45 | -------------------------------------------------------------------------------- /Parameter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import math 4 | 5 | ''' Parameter List 6 | N = 10 ; number of WDs 7 | T = 100 ; number of time slots 8 | d_t = 0.01 ; duration of each slot 9 | 10 | ; d ~ [d_min, d_max], uniform distribution 11 | ; d = {10, 20, 30, 40, 50}Kb 12 | d_min = 1Mb ; lower bound of data size 13 | d_max = 5Mb ; upper bound of data size 14 | d_int = 1Mb ; interval between neighboring data size 15 | d_sub = 5Kb ; data size of each subtask 16 | 17 | ; for the task interval Delta, we consider three different distributions 18 | pattern = {"geo", "map"} 19 | ; 1. geometric distribution (i.e. g_i(t) bernoulli) 20 | p_g = 0.2 ; P(Delta = k) = (1-p_g)^{k-1} * p_g 21 | ; 2. MAP 22 | TODO 23 | 24 | kappa = 1000 ; one bit data require kappa CPU cycles 25 | gamma = 10^{-27} ; energy-efficiency factor 26 | eta = gamma * kappa^3 / d_t^2 27 | h[i][t] = {3,6,9}*10^{-10} ; channel gain 28 | sigma^2 = 10^{-9} ; noise power 29 | 30 | W_max = 10MHz ; each WD share 1MHz (in expectation) 31 | E_max[i] = 0.5W 32 | f_max[i] = 1GHz 33 | P_max[i] = 1W 34 | ''' 35 | 36 | ''' Variable List 37 | f[i][t] ; CPU frequency of WD i at slot t 38 | d_l[i][t] ; locally-processed data 39 | E_l[i][t] ; energy consumption due to local processing 40 | 41 | r[i][t] ; wireless transmission rate 42 | W[i][t] ; wireless bandwidth 43 | P[i][t] ; transmission power 44 | d_o[i][t] ; offloaded data 45 | E_o[i][t] ; energy consumption due to offloading 46 | ''' 47 | 48 | class Parameter(object): 49 | def __init__(self, N=3, T=100, pattern = "geo"): 50 | super().__init__() 51 | 52 | self.N = N 53 | self.T = T 54 | self.d_t = 0.01 55 | 56 | self.pattern = pattern 57 | self.d_lb = 2e4 58 | self.d_ub = 5e4 59 | self.p_g = 0.3 60 | 61 | self.kappa = 1000.0 62 | self.gamma = 1e-28 63 | self.eta = self.gamma * self.kappa**3 / self.d_t**2 64 | self.sigma2 = 1e-11 65 | 66 | # channel model 67 | self.length = 100 68 | self.epsilon = 3.8 69 | self.BS_loc = (self.length/2, self.length/2) 70 | self.WD_loc_list = [(random.randint(0,self.length), random.randint(0,self.length)) for _ in range(self.N)] 71 | self.distance = [math.sqrt((self.BS_loc[0]-WD_loc[0])**2 + (self.BS_loc[1]-WD_loc[1])**2) for WD_loc in self.WD_loc_list] 72 | print(self.distance) 73 | self.distance = np.array(self.distance) 74 | 75 | self.W_max = 20e6 76 | self.E_max = [0.1*self.d_t for _ in range(N)] 77 | self.f_max = [2e9 for _ in range(N)] 78 | self.P_max = [1.0 for _ in range(N)] 79 | self.lam_max = [100000 for _ in range(N)] 80 | 81 | self.beta = lambda t: 1 / np.sqrt(t) 82 | #self.beta = lambda t: 0.0001 83 | self.theta = lambda t: min(100, 1000/np.log(t+10)) 84 | #self.theta = lambda t: max(10, 10/np.sqrt(t)) 85 | #self.theta = lambda t: 0 86 | self.lam_init = np.ones(N) * 5000 87 | #self.lam_init = np.zeros(N) 88 | -------------------------------------------------------------------------------- /Environment.py: -------------------------------------------------------------------------------- 1 | # + 2 | import numpy as np 3 | import sys 4 | import random 5 | import Parameter as Parameter 6 | 7 | import pdb 8 | 9 | 10 | # - 11 | 12 | class Environment(object): 13 | def __init__(self, param: Parameter): 14 | super().__init__() 15 | self.param = param 16 | # self.reset() 17 | 18 | def reset(self): 19 | param = self.param 20 | 21 | self.timer = 0 22 | self.nTask = 0 #number of generated tasks 23 | self.next_gen = [0 for _ in range(param.N)] 24 | self.last_datasize = 0 25 | self.last_delta = np.zeros(param.N, dtype=int) 26 | 27 | # states 28 | self.d_r = np.zeros(param.N) 29 | self.a = np.zeros(param.N, dtype=int) 30 | self.delta = np.zeros(param.N, dtype=int) 31 | self.q = np.zeros(param.N, dtype=int) 32 | self.h = self.new_channel_gain() 33 | self.g = np.zeros(param.N, dtype=int) 34 | self.q_set = [[] for _ in range(param.N)] 35 | self.acc_E = np.zeros(param.N) # accumulated energy consumption 36 | 37 | # statistics 38 | self.E_stat = [0] 39 | self.A_stat = [0] 40 | 41 | return np.transpose(np.vstack((self.d_r, self.a, self.q, self.h))) 42 | 43 | def step(self, action): 44 | f = action[:, 0] * self.param.f_max 45 | P = action[:, 1] * self.param.P_max 46 | W = action[:, 2] * self.param.W_max 47 | #assert np.isclose(np.sum(action[:,2]), 1.) 48 | if not np.isclose(np.sum(action[:,2]), 1.): 49 | pdb.set_trace() 50 | 51 | if np.isnan(f).any() or np.isnan(P).any() or np.isnan(W).any(): 52 | pdb.set_trace() 53 | 54 | d = f * self.param.d_t / self.param.kappa + \ 55 | self.param.d_t * W * np.log2(1+P*self.h/self.param.sigma2) 56 | E = self.param.gamma * f**3 * self.param.d_t + P * self.param.d_t 57 | #pdb.set_trace() 58 | self.update(d, E) 59 | 60 | return np.transpose(np.vstack((self.d_r, self.a, self.q, self.h))), E 61 | 62 | def update(self, d, E): 63 | self.timer += 1 64 | # at the beginning of each slot, observe current channel gain and previous task generation 65 | self.h = self.new_channel_gain() 66 | self.g = self.new_task_generation(self.delta) 67 | # update system states according to previous decisions 68 | d = np.minimum(d, self.d_r) 69 | self.d_r = self.d_r - d 70 | self.acc_E = self.acc_E + E 71 | for i in range(self.param.N): 72 | assert self.q[i] == len(self.q_set[i]) 73 | if d[i] > 0 and self.d_r[i] == 0: # HOL task is completed 74 | self.q_set[i].pop(0) # remove the HOL task 75 | self.q[i] = self.q[i] - 1 76 | if self.q[i] > 0: 77 | self.d_r[i] = self.q_set[i][0].datasize 78 | self.a[i] = self.timer - self.q_set[i][0].generationTime 79 | else: 80 | self.a[i] = 0 81 | elif self.d_r[i] > 0: # HOL task is not completed 82 | self.a[i] = self.a[i] + 1 83 | else: # d_r[i] == 0 and d[i] == 0, then the queue must be empty in last slot 84 | #assert self.q[i] - self.g[i] == 0 85 | if self.q[i] - self.g[i] != 0: 86 | print(self.q[i]) 87 | print(self.g[i]) 88 | pdb.set_trace() 89 | assert False 90 | if self.q[i] > 0: 91 | self.d_r[i] = self.q_set[i][0].datasize 92 | self.a[i] = self.timer - self.q_set[i][0].generationTime 93 | else: 94 | self.a[i] = 0 95 | #self.d_r = round_and_check(self.d_r) 96 | self.E_stat.append(np.sum(E)/self.param.N) 97 | self.A_stat.append(np.sum(self.a)/self.param.N) 98 | 99 | def new_channel_gain(self): 100 | rand_expo = [random.expovariate(1.0) for _ in range(self.param.N)] 101 | rand_expo = np.array(rand_expo) 102 | h = 1e-3 * self.param.distance**(-self.param.epsilon) * rand_expo 103 | return h 104 | 105 | def new_task_generation(self, old_delta): 106 | # remember to minus the timer by 1, because we are observing the task generation of the previous slot 107 | timer = self.timer - 1 108 | g = np.zeros(self.param.N) 109 | for i in range(self.param.N): 110 | if self.next_gen[i] == timer: 111 | g[i] = 1 112 | self.last_delta[i] = self.delta[i] 113 | self.delta[i] = 1 # reset delta 114 | # generate new task 115 | self.last_datasize = random.randint(self.param.d_lb, self.param.d_ub) 116 | new_task = Task(self.nTask, timer, self.last_datasize, i) 117 | self.nTask += 1 118 | # remember to update q[i] and q_set[i] 119 | self.q_set[i].append(new_task) 120 | self.q[i] += 1 121 | if self.param.pattern == "geo": 122 | interval = np.random.geometric(self.param.p_g) 123 | elif self.param.pattern == "map": 124 | sys.exit("MAP not implemented yet!") 125 | else: 126 | sys.exit("arrival pattern not implemented yet!") 127 | assert interval > 0 128 | self.next_gen[i] += interval 129 | else: 130 | self.delta[i] += 1 131 | return g 132 | 133 | class Task(object): 134 | def __init__(self, index, gt, size, wd): 135 | self.index = index 136 | self.generationTime = gt 137 | self.datasize = size 138 | self.generationDevice = wd 139 | -------------------------------------------------------------------------------- /DDPG.py: -------------------------------------------------------------------------------- 1 | # + 2 | from Environment import Environment 3 | from Parameter import Parameter 4 | from scipy.io import savemat 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | import random 9 | import os 10 | import time 11 | import argparse 12 | import sys 13 | import time 14 | import pickle 15 | import pdb 16 | 17 | # + 18 | #################### params ########################### 19 | parser = argparse.ArgumentParser(description='Hyper_params') 20 | parser.add_argument('--Info', default='', type=str) # information added to log dir name 21 | 22 | parser.add_argument('--Seed', default=41, type=int) 23 | parser.add_argument('--Units', default=256, type=int) # hidden units num of NN 24 | parser.add_argument('--Lr', default=0.001, type=float) # learning rate 25 | parser.add_argument('--omega', default=0.005, type=float) # used to update target networks 26 | parser.add_argument('--Max_Epsilon', default=1.0, type=float) 27 | parser.add_argument('--Min_Epsilon', default=1.0, type=float) 28 | parser.add_argument('--Epsilon_Decay', default=1.0, type=float) 29 | parser.add_argument('--Batch_Size', default=256, type=int) 30 | parser.add_argument('--Memory_Size', default=1000000, type=int) # buffer size 31 | parser.add_argument('--Start_Size', default=0, type=int) # random action before start_size 32 | parser.add_argument('--Update_After', default=0, type=int) 33 | parser.add_argument('--Train_Interval', default=1, type=int) 34 | parser.add_argument('--load_weights', default=False, type=bool) 35 | parser.add_argument('--Alg', default='ddpg', type=str) 36 | parser.add_argument('--Gpu_Id', default="0", type=str) # -1 means CPU 37 | parser.add_argument('--N', default=15, type=int) # number of WDs 38 | parser.add_argument('--T', default=1000000, type=int) # number of simulated slots 39 | parser.add_argument('--batch_norm', default=True, type=bool) 40 | 41 | args = parser.parse_args() 42 | # - 43 | 44 | #################### seed ########################### 45 | os.environ['TF_DETERMINISTIC_OPS'] = '1' 46 | os.environ["CUDA_VISIBLE_DEVICES"] = args.Gpu_Id 47 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 48 | for gpu in gpus: 49 | tf.config.experimental.set_memory_growth(gpu, True) 50 | print(tf.config.list_physical_devices()) 51 | tf.random.set_seed(args.Seed) 52 | np.random.seed(args.Seed) 53 | random.seed(args.Seed) 54 | 55 | #################### log ########################### 56 | # create log file 57 | time_str = time.strftime("%m-%d_%H-%M", time.localtime()) 58 | alg = args.Alg 59 | log_dir_name = 'logs/' + time_str + '_' + alg + args.Info + '_n' + \ 60 | str(args.N) + '_seed' + str(args.Seed) 61 | data_dir_name = 'data/' + alg + args.Info + '_n' + str(args.N) 62 | fw = tf.summary.create_file_writer(log_dir_name) # log file witer 63 | 64 | # create dir to save model 65 | if not os.path.exists(log_dir_name + '/models'): 66 | os.makedirs(log_dir_name + '/models') 67 | 68 | # save params to a .txt file 69 | prams_file = open(log_dir_name + '/prams_table.txt', 'w') 70 | prams_file.writelines(f'{i:50} {v}\n' for i, v in args.__dict__.items()) 71 | prams_file.close() 72 | 73 | ###################### env ############################### 74 | param = Parameter(args.N, args.T) 75 | param.lam_init = np.ones(param.N) * 1e4 76 | param.theta = lambda t: 0 77 | param.beta = lambda t: 0.01 78 | env = Environment(param) 79 | if args.load_weights: 80 | with open('models/v.pickle', 'rb') as f: 81 | Initial_v = pickle.load(f) 82 | else: 83 | Initial_v = 0 # initial average reward 84 | 85 | ###################### others ############################### 86 | W_Initializer = tf.initializers.he_normal(args.Seed) # NN initializer 87 | Epsilon_Decay_Rate = (args.Min_Epsilon - args.Max_Epsilon) / (args.T) * args.Epsilon_Decay # factor of decay 88 | TENSOR_FLOAT_TYPE = tf.dtypes.float32 89 | TENSOR_INT_TYPE = tf.dtypes.int32 90 | 91 | def softmax(x): 92 | return np.exp(x) / np.sum(np.exp(x)) 93 | 94 | # + 95 | class ReplayBuffer: 96 | def __init__(self, buffer_capacity = 100000): 97 | self.buffer_capacity = buffer_capacity 98 | self.buffer_counter = 0 99 | 100 | # dim(action) = N * 3 101 | # dim(state) = N * 4 102 | # dim(pds) = N * 4 103 | buffer_a_dim = (buffer_capacity, param.N, 3) 104 | buffer_s_dim = (buffer_capacity, param.N, 4) 105 | 106 | self.s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 107 | self.a_buffer = np.empty(buffer_a_dim, dtype=np.float32) 108 | self.r_buffer = np.empty((buffer_capacity,), dtype=np.float32) 109 | self.next_s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 110 | 111 | def store(self, exp): 112 | index = self.buffer_counter % self.buffer_capacity 113 | 114 | s, a, r, next_s = exp 115 | self.s_buffer[index] = s 116 | self.a_buffer[index] = a 117 | self.r_buffer[index] = r 118 | self.next_s_buffer[index] = next_s 119 | 120 | self.buffer_counter += 1 121 | 122 | def sample(self, batch_size): 123 | sampling_range = min(self.buffer_counter, self.buffer_capacity) 124 | idx = np.random.randint(0, sampling_range, batch_size) 125 | 126 | batch_s = tf.convert_to_tensor(self.s_buffer[idx]) 127 | batch_a = tf.convert_to_tensor(self.a_buffer[idx]) 128 | batch_r = tf.convert_to_tensor(self.r_buffer[idx]) 129 | batch_next_s = tf.convert_to_tensor(self.next_s_buffer[idx]) 130 | 131 | return batch_s, batch_a, batch_r, batch_next_s 132 | 133 | class OUActionNoise: 134 | def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 135 | self.theta = theta 136 | self.mean = mean 137 | self.std_dev = std_deviation 138 | self.dt = dt 139 | self.x_initial = x_initial 140 | self.reset() 141 | 142 | def __call__(self): 143 | # Formula taken from https://www.wikipedia.org/wiki/Ornstein-Uhlenbeck_process. 144 | x = ( 145 | self.x_prev 146 | + self.theta * (self.mean - self.x_prev) * self.dt 147 | + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape) 148 | ) 149 | # Store x into x_prev 150 | # Makes next noise dependent on current one 151 | self.x_prev = x 152 | return x 153 | 154 | def reset(self): 155 | if self.x_initial is not None: 156 | self.x_prev = self.x_initial 157 | else: 158 | self.x_prev = np.zeros_like(self.mean) 159 | # - 160 | 161 | class DPDS: 162 | def __init__(self, batch_size, memory_size, max_epsilon): 163 | 164 | def build_actor(): 165 | inputs = keras.Input(shape=(param.N, 4)) 166 | x = keras.layers.Flatten()(inputs) 167 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 168 | if args.batch_norm: 169 | x = keras.layers.BatchNormalization()(x) 170 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 171 | if args.batch_norm: 172 | x = keras.layers.BatchNormalization()(x) 173 | x = keras.layers.Dense(param.N*3, activation='sigmoid', kernel_initializer=W_Initializer)(x) 174 | outputs = keras.layers.Reshape((param.N, 3))(x) 175 | model = keras.Model(inputs=inputs, outputs=outputs) 176 | return model 177 | 178 | def build_critic(): 179 | state_input = keras.layers.Input(shape=(param.N, 4)) 180 | state_x = keras.layers.Flatten()(state_input) 181 | #state_x = keras.layers.Dense(args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(state_x) 182 | #state_x = keras.layers.Dense(2*args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(state_x) 183 | 184 | action_input = keras.layers.Input(shape=(param.N, 3)) 185 | action_x = keras.layers.Flatten()(action_input) 186 | #action_x = keras.layers.Dense(2*args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(action_x) 187 | 188 | concat = keras.layers.Concatenate()([state_x, action_x]) 189 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(concat) 190 | #if args.batch_norm: 191 | # x = keras.layers.BatchNormalization()(x) 192 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 193 | #if args.batch_norm: 194 | # x = keras.layers.BatchNormalization()(x) 195 | outputs = keras.layers.Dense(1)(x) 196 | model = keras.Model([state_input, action_input], outputs) 197 | return model 198 | 199 | if 'ddpg' in alg: 200 | self.actor = build_actor() 201 | self.critic = build_critic() 202 | self.target_actor = build_actor() 203 | self.target_critic = build_critic() 204 | 205 | if args.load_weights: 206 | self.critic.load_weights("models/critic") 207 | self.actor.load_weights("models/actor") 208 | print("load weight") 209 | 210 | self.target_actor.set_weights(self.actor.get_weights()) 211 | self.target_critic.set_weights(self.critic.get_weights()) 212 | else: 213 | raise NotImplementedError("alg not implemented") 214 | 215 | self.actor_optimizer = tf.optimizers.Adam(args.Lr) 216 | self.critic_optimizer = tf.optimizers.Adam(args.Lr*2) 217 | self.epsilon = max_epsilon 218 | self.batch_size = batch_size 219 | self.buffer = ReplayBuffer(memory_size) 220 | self.alg = alg 221 | self.v = Initial_v # average reward 222 | self.target_v = Initial_v 223 | self.lam = param.lam_init 224 | 225 | # transform some parameters to tensors 226 | self.f_max = tf.convert_to_tensor(param.f_max) 227 | self.P_max = tf.convert_to_tensor(param.P_max) 228 | self.W_max = tf.convert_to_tensor(param.W_max) 229 | self.E_max = tf.convert_to_tensor(param.E_max) 230 | self.d_t = tf.convert_to_tensor(param.d_t) 231 | self.kappa = tf.convert_to_tensor(param.kappa) 232 | self.sigma2 = tf.convert_to_tensor(param.sigma2) 233 | self.gamma = tf.convert_to_tensor(param.gamma) 234 | 235 | def random_action(self, s): 236 | action = np.random.rand(param.N, 3) 237 | # apply softmax to W so that its sum equals 1 238 | action[:,2] = softmax(action[:,2]) 239 | return action 240 | 241 | def _choose_action(self, s): 242 | action = self.actor(s[None, :])[0].numpy() 243 | action[:,2] = softmax(action[:,2]) 244 | return action 245 | 246 | def choose_action(self, s, noise_object, epsilon): 247 | action = self.actor(s[None, :])[0].numpy() 248 | noise = noise_object() 249 | # Adding noise to action 250 | action = action + epsilon * noise 251 | 252 | # We make sure action is within bounds 253 | legal_action = np.clip(action, 0, 1) 254 | legal_action[:,2] = softmax(legal_action[:,2]) 255 | 256 | return legal_action 257 | 258 | @tf.function(jit_compile=True) 259 | def f_k(self, batch_state, batch_action): 260 | f = batch_action[:, :, 0] * self.f_max 261 | P = batch_action[:, :, 1] * self.P_max 262 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 263 | d_r = batch_state[:, :, 0] 264 | a = batch_state[:, :, 1] 265 | q = batch_state[:, :, 2] 266 | h = batch_state[:, :, 3] 267 | 268 | d = f * self.d_t / self.kappa + \ 269 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 270 | b = ((d > 0) & (d_r > 0) & (d >= d_r)) 271 | b = tf.where(b, 1.0, 0.0) 272 | pds_q = q - b 273 | pds_h = h 274 | pds_d_r = tf.maximum(tf.constant(0, dtype=tf.float32), d_r - d) 275 | pds_a = a - 5 * b 276 | 277 | s = tf.stack([pds_d_r, pds_a, pds_q, pds_h], axis=2) 278 | return s 279 | 280 | @tf.function(jit_compile=True) 281 | def cost(self, batch_state, batch_action): 282 | f = batch_action[:, :, 0] * self.f_max 283 | P = batch_action[:, :, 1] * self.P_max 284 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 285 | E = self.gamma * f**3 * self.d_t + P * self.d_t 286 | h = batch_state[:, :, 3] 287 | d = f * self.d_t / self.kappa + \ 288 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 289 | # in expectation, completing a task reduces aoi by 1/p_g 290 | aoi_per_bit = 1 / param.p_g / ((param.d_lb+param.d_ub)/2) 291 | cost = tf.reduce_sum(batch_state[:, :, 1] - d*aoi_per_bit + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 292 | #cost = tf.reduce_sum(self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 293 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 294 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * (E - self.E_max), axis=1) 295 | return cost 296 | 297 | @tf.function(jit_compile=True) 298 | def train(self, s, a, r, s_next): 299 | # update critic network 300 | with tf.GradientTape() as tape: 301 | # calculate target y 302 | target_a_next = self.target_actor(s_next, training=True) 303 | target_y = self.cost(s,a) + self.target_critic([s_next, target_a_next], training=True) - self.target_v 304 | critic_value = self.critic([s, a], training=True) 305 | td = critic_value - target_y 306 | critic_loss = tf.math.reduce_mean(tf.math.abs(td)) 307 | 308 | critic_grad = tape.gradient(critic_loss, self.critic.trainable_variables) 309 | #critic_grad = [tf.clip_by_norm(grad, 10.0) for grad in critic_grad] 310 | self.critic_optimizer.apply_gradients( zip(critic_grad, self.critic.trainable_variables) ) 311 | 312 | # update actor network 313 | with tf.GradientTape() as tape: 314 | actions = self.actor(s, training=True) 315 | critic_value = self.critic([s, actions], training=True) 316 | actor_loss = tf.math.reduce_mean(critic_value) 317 | 318 | actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables) 319 | #actor_grad = [tf.clip_by_norm(grad, 10.0) for grad in actor_grad] 320 | self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor.trainable_variables) ) 321 | 322 | # td is returned to update self.v 323 | # we do not update self.v in this function because it leaks the local tensor 'td', which is prohibited by tensorflow 324 | return (td, critic_loss, actor_loss, actor_grad, critic_grad, critic_value) 325 | 326 | def save_model(self, dir=log_dir_name + '/models'): 327 | self.actor.save_weights(dir + '/' + self.alg + '_actor') 328 | self.critic.save_weights(dir + '/' + self.alg + '_critic') 329 | self.actor.save_weights('models/actor') 330 | self.critic.save_weights('models/critic') 331 | with open(dir + '/' + self.alg + '_v.pickle', 'wb') as f: 332 | pickle.dump(self.v, f) 333 | with open('models/v.pickle', 'wb') as f: 334 | pickle.dump(self.v, f) 335 | 336 | 337 | @tf.function(jit_compile=True) 338 | def update_target(target_weights, weights, omega): 339 | for (a, b) in zip(target_weights, weights): 340 | a.assign(b * omega + a * (1 - omega)) 341 | 342 | def train(T): 343 | agent = DPDS(args.Batch_Size, args.Memory_Size, args.Max_Epsilon) 344 | print("============" + agent.alg + "============") 345 | 346 | state = env.reset() 347 | std_dev = 0.01 348 | ou_noise = OUActionNoise(mean=np.zeros(1), std_deviation=float(std_dev) * np.ones(1)) 349 | acc_E = np.zeros(param.N) 350 | acc_A = np.zeros(param.N) 351 | timer = 1 352 | 353 | acc_interaction_time = 0 354 | acc_inference_time = 0 355 | acc_training_time = 0 356 | 357 | while timer <= T: 358 | if timer % 10000 == 0: 359 | print(timer) 360 | 361 | if timer <= args.Start_Size: 362 | action = agent.random_action(state) 363 | else: 364 | inference_begin = time.time() 365 | action = agent.choose_action(state, ou_noise, agent.epsilon) 366 | inference_end = time.time() 367 | acc_inference_time += inference_end - inference_begin 368 | interaction_begin = time.time() 369 | next_state, E = env.step(action) 370 | interaction_end = time.time() 371 | acc_interaction_time += interaction_end - interaction_begin 372 | #cost = np.sum(state[:,1] + agent.lam * (E - param.E_max)) 373 | cost = np.sum(agent.lam * (E - param.E_max)) 374 | 375 | agent.buffer.store((state, action, cost, next_state)) 376 | 377 | # train 378 | if timer > args.Update_After and timer % args.Train_Interval == 0: 379 | training_begin = time.time() 380 | # sample from buffer 381 | s, a, r, s_next = agent.buffer.sample(args.Batch_Size) 382 | td, critic_loss, actor_loss, actor_grad, critic_grad, critic_value = agent.train(s, a, r, s_next) 383 | #tf.print(actor_grad) 384 | #tf.print(critic_grad) 385 | agent.v = agent.v - param.beta(timer) * tf.reduce_mean(td) 386 | training_end = time.time() 387 | acc_training_time += training_end - training_begin 388 | 389 | # update target networks 390 | update_target(agent.target_actor.variables, agent.actor.variables, args.omega) 391 | update_target(agent.target_critic.variables, agent.critic.variables, args.omega) 392 | agent.target_v = args.omega * agent.v + (1 - args.omega) * agent.target_v 393 | 394 | # update lambda 395 | agent.lam = agent.lam + param.theta(timer) * (E - param.E_max) 396 | agent.lam = np.maximum(0, agent.lam) 397 | agent.lam = np.minimum(param.lam_max, agent.lam) 398 | 399 | with fw.as_default(): 400 | tf.summary.scalar('critic_loss', critic_loss, step = timer) 401 | tf.summary.scalar('actor_loss', actor_loss, step = timer) 402 | 403 | # epsilon decay 404 | agent.epsilon = max(Epsilon_Decay_Rate * timer + args.Max_Epsilon, args.Min_Epsilon) 405 | 406 | timer += 1 407 | 408 | # log 409 | acc_E += E 410 | acc_A += state[:,1] 411 | with fw.as_default(): 412 | tf.summary.scalar('cost', cost, step=timer) 413 | tf.summary.scalar('aoi', np.sum(state[:,1])/args.N, step=timer) 414 | tf.summary.scalar('average aoi', np.sum(acc_A)/timer/args.N, step=timer) 415 | tf.summary.scalar('energy', np.sum(E)/args.N, step=timer) 416 | tf.summary.scalar('v', agent.v, step=timer) 417 | tf.summary.scalar('epsilon', agent.epsilon, step=timer) 418 | tf.summary.scalar('lambda', np.sum(agent.lam)/args.N, step=timer) 419 | tf.summary.scalar('average energy', np.sum(acc_E)/timer/args.N, step=timer) 420 | 421 | state = next_state 422 | 423 | agent.save_model() 424 | print("Average interaction time:", acc_interaction_time / args.T) 425 | print("Average inference time:", acc_inference_time / (args.T - args.Start_Size)) 426 | print("Average training time:", acc_training_time / (args.T - args.Update_After)) 427 | print("Average number of tasks:", env.nTask / timer /args.N) 428 | 429 | data = {'N': param.N, 'E': env.E_stat, 'A': env.A_stat} 430 | savemat(data_dir_name + '.mat', data) 431 | 432 | if __name__ == "__main__": 433 | train(args.T) 434 | 435 | print(tf.__version__) 436 | 437 | 438 | -------------------------------------------------------------------------------- /D_DDPG.py: -------------------------------------------------------------------------------- 1 | # + 2 | from Environment import Environment 3 | from Parameter import Parameter 4 | from scipy.io import savemat 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | import random 9 | import os 10 | import time 11 | import argparse 12 | import sys 13 | import time 14 | import pickle 15 | import pdb 16 | 17 | # + 18 | #################### params ########################### 19 | parser = argparse.ArgumentParser(description='Hyper_params') 20 | parser.add_argument('--Info', default='', type=str) # information added to log dir name 21 | 22 | parser.add_argument('--Seed', default=41, type=int) 23 | parser.add_argument('--Units', default=256, type=int) # hidden units num of NN 24 | parser.add_argument('--Lr', default=0.001, type=float) # learning rate 25 | parser.add_argument('--omega', default=0.005, type=float) # used to update target networks 26 | parser.add_argument('--Max_Epsilon', default=1.0, type=float) 27 | parser.add_argument('--Min_Epsilon', default=1.0, type=float) 28 | parser.add_argument('--Epsilon_Decay', default=1.0, type=float) 29 | parser.add_argument('--Batch_Size', default=256, type=int) 30 | parser.add_argument('--Memory_Size', default=1000000, type=int) # buffer size 31 | parser.add_argument('--Start_Size', default=0, type=int) # random action before start_size 32 | parser.add_argument('--Update_After', default=0, type=int) 33 | parser.add_argument('--Train_Interval', default=1, type=int) 34 | parser.add_argument('--load_weights', default=False, type=bool) 35 | parser.add_argument('--Alg', default='d_ddpg', type=str) 36 | parser.add_argument('--Gpu_Id', default="0", type=str) # -1 means CPU 37 | parser.add_argument('--N', default=15, type=int) # number of WDs 38 | parser.add_argument('--T', default=1000000, type=int) # number of simulated slots 39 | parser.add_argument('--batch_norm', default=True, type=bool) 40 | parser.add_argument('--discount', default=0.99, type=float) 41 | 42 | args = parser.parse_args() 43 | # - 44 | 45 | #################### seed ########################### 46 | os.environ['TF_DETERMINISTIC_OPS'] = '1' 47 | os.environ["CUDA_VISIBLE_DEVICES"] = args.Gpu_Id 48 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 49 | for gpu in gpus: 50 | tf.config.experimental.set_memory_growth(gpu, True) 51 | print(tf.config.list_physical_devices()) 52 | tf.random.set_seed(args.Seed) 53 | np.random.seed(args.Seed) 54 | random.seed(args.Seed) 55 | 56 | #################### log ########################### 57 | # create log file 58 | time_str = time.strftime("%m-%d_%H-%M", time.localtime()) 59 | alg = args.Alg 60 | log_dir_name = 'logs/' + time_str + '_' + alg + args.Info + '_n' + \ 61 | str(args.N) + '_seed' + str(args.Seed) 62 | data_dir_name = 'data/' + alg + args.Info + '_n' + str(args.N) 63 | fw = tf.summary.create_file_writer(log_dir_name) # log file witer 64 | 65 | # create dir to save model 66 | if not os.path.exists(log_dir_name + '/models'): 67 | os.makedirs(log_dir_name + '/models') 68 | 69 | # save params to a .txt file 70 | prams_file = open(log_dir_name + '/prams_table.txt', 'w') 71 | prams_file.writelines(f'{i:50} {v}\n' for i, v in args.__dict__.items()) 72 | prams_file.close() 73 | 74 | ###################### env ############################### 75 | param = Parameter(args.N, args.T) 76 | param.lam_init = np.ones(param.N) * 10000 77 | param.theta = lambda t: 0 78 | #param.theta = lambda t: min(10, 100/np.log(t+10)) 79 | param.beta = lambda t: 0.01 80 | env = Environment(param) 81 | if args.load_weights: 82 | with open('models/v.pickle', 'rb') as f: 83 | Initial_v = pickle.load(f) 84 | else: 85 | Initial_v = 0 # initial average reward 86 | 87 | ###################### others ############################### 88 | W_Initializer = tf.initializers.he_normal(args.Seed) # NN initializer 89 | Epsilon_Decay_Rate = (args.Min_Epsilon - args.Max_Epsilon) / (args.T) * args.Epsilon_Decay # factor of decay 90 | TENSOR_FLOAT_TYPE = tf.dtypes.float32 91 | TENSOR_INT_TYPE = tf.dtypes.int32 92 | 93 | def softmax(x): 94 | return np.exp(x) / np.sum(np.exp(x)) 95 | 96 | # + 97 | class ReplayBuffer: 98 | def __init__(self, buffer_capacity = 100000): 99 | self.buffer_capacity = buffer_capacity 100 | self.buffer_counter = 0 101 | 102 | # dim(action) = N * 3 103 | # dim(state) = N * 4 104 | # dim(pds) = N * 4 105 | buffer_a_dim = (buffer_capacity, param.N, 3) 106 | buffer_s_dim = (buffer_capacity, param.N, 4) 107 | 108 | self.s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 109 | self.a_buffer = np.empty(buffer_a_dim, dtype=np.float32) 110 | self.r_buffer = np.empty((buffer_capacity,), dtype=np.float32) 111 | self.next_s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 112 | 113 | def store(self, exp): 114 | index = self.buffer_counter % self.buffer_capacity 115 | 116 | s, a, r, next_s = exp 117 | self.s_buffer[index] = s 118 | self.a_buffer[index] = a 119 | self.r_buffer[index] = r 120 | self.next_s_buffer[index] = next_s 121 | 122 | self.buffer_counter += 1 123 | 124 | def sample(self, batch_size): 125 | sampling_range = min(self.buffer_counter, self.buffer_capacity) 126 | idx = np.random.randint(0, sampling_range, batch_size) 127 | 128 | batch_s = tf.convert_to_tensor(self.s_buffer[idx]) 129 | batch_a = tf.convert_to_tensor(self.a_buffer[idx]) 130 | batch_r = tf.convert_to_tensor(self.r_buffer[idx]) 131 | batch_next_s = tf.convert_to_tensor(self.next_s_buffer[idx]) 132 | 133 | return batch_s, batch_a, batch_r, batch_next_s 134 | 135 | class OUActionNoise: 136 | def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 137 | self.theta = theta 138 | self.mean = mean 139 | self.std_dev = std_deviation 140 | self.dt = dt 141 | self.x_initial = x_initial 142 | self.reset() 143 | 144 | def __call__(self): 145 | # Formula taken from https://www.wikipedia.org/wiki/Ornstein-Uhlenbeck_process. 146 | x = ( 147 | self.x_prev 148 | + self.theta * (self.mean - self.x_prev) * self.dt 149 | + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape) 150 | ) 151 | # Store x into x_prev 152 | # Makes next noise dependent on current one 153 | self.x_prev = x 154 | return x 155 | 156 | def reset(self): 157 | if self.x_initial is not None: 158 | self.x_prev = self.x_initial 159 | else: 160 | self.x_prev = np.zeros_like(self.mean) 161 | # - 162 | 163 | class DPDS: 164 | def __init__(self, batch_size, memory_size, max_epsilon): 165 | 166 | def build_actor(): 167 | inputs = keras.Input(shape=(param.N, 4)) 168 | x = keras.layers.Flatten()(inputs) 169 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 170 | if args.batch_norm: 171 | x = keras.layers.BatchNormalization()(x) 172 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 173 | if args.batch_norm: 174 | x = keras.layers.BatchNormalization()(x) 175 | x = keras.layers.Dense(param.N*3, activation='sigmoid', kernel_initializer=W_Initializer)(x) 176 | outputs = keras.layers.Reshape((param.N, 3))(x) 177 | model = keras.Model(inputs=inputs, outputs=outputs) 178 | return model 179 | 180 | def build_critic(): 181 | state_input = keras.layers.Input(shape=(param.N, 4)) 182 | state_x = keras.layers.Flatten()(state_input) 183 | #state_x = keras.layers.Dense(args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(state_x) 184 | #state_x = keras.layers.Dense(2*args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(state_x) 185 | 186 | action_input = keras.layers.Input(shape=(param.N, 3)) 187 | action_x = keras.layers.Flatten()(action_input) 188 | #action_x = keras.layers.Dense(2*args.Critic_Units, activation='relu', kernel_initializer=W_Initializer)(action_x) 189 | 190 | concat = keras.layers.Concatenate()([state_x, action_x]) 191 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(concat) 192 | if args.batch_norm: 193 | x = keras.layers.BatchNormalization()(x) 194 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 195 | if args.batch_norm: 196 | x = keras.layers.BatchNormalization()(x) 197 | outputs = keras.layers.Dense(1)(x) 198 | model = keras.Model([state_input, action_input], outputs) 199 | return model 200 | 201 | if 'd_ddpg' in alg: 202 | self.actor = build_actor() 203 | self.critic = build_critic() 204 | self.target_actor = build_actor() 205 | self.target_critic = build_critic() 206 | 207 | if args.load_weights: 208 | self.critic.load_weights("models/critic") 209 | self.actor.load_weights("models/actor") 210 | print("load weight") 211 | 212 | self.target_actor.set_weights(self.actor.get_weights()) 213 | self.target_critic.set_weights(self.critic.get_weights()) 214 | else: 215 | raise NotImplementedError("alg not implemented") 216 | 217 | self.actor_optimizer = tf.optimizers.Adam(args.Lr) 218 | self.critic_optimizer = tf.optimizers.Adam(args.Lr*2) 219 | self.epsilon = max_epsilon 220 | self.batch_size = batch_size 221 | self.buffer = ReplayBuffer(memory_size) 222 | self.alg = alg 223 | self.v = Initial_v # average reward 224 | self.target_v = Initial_v 225 | self.lam = param.lam_init 226 | 227 | # transform some parameters to tensors 228 | self.f_max = tf.convert_to_tensor(param.f_max) 229 | self.P_max = tf.convert_to_tensor(param.P_max) 230 | self.W_max = tf.convert_to_tensor(param.W_max) 231 | self.E_max = tf.convert_to_tensor(param.E_max) 232 | self.d_t = tf.convert_to_tensor(param.d_t) 233 | self.kappa = tf.convert_to_tensor(param.kappa) 234 | self.sigma2 = tf.convert_to_tensor(param.sigma2) 235 | self.gamma = tf.convert_to_tensor(param.gamma) 236 | 237 | def random_action(self, s): 238 | action = np.random.rand(param.N, 3) 239 | # apply softmax to W so that its sum equals 1 240 | action[:,2] = softmax(action[:,2]) 241 | return action 242 | 243 | def _choose_action(self, s): 244 | action = self.actor(s[None, :])[0].numpy() 245 | action[:,2] = softmax(action[:,2]) 246 | return action 247 | 248 | def choose_action(self, s, noise_object, epsilon): 249 | action = self.actor(s[None, :])[0].numpy() 250 | noise = noise_object() 251 | # Adding noise to action 252 | action = action + epsilon * noise 253 | 254 | # We make sure action is within bounds 255 | legal_action = np.clip(action, 0, 1) 256 | legal_action[:,2] = softmax(legal_action[:,2]) 257 | 258 | return legal_action 259 | 260 | @tf.function(jit_compile=True) 261 | def f_k(self, batch_state, batch_action): 262 | f = batch_action[:, :, 0] * self.f_max 263 | P = batch_action[:, :, 1] * self.P_max 264 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 265 | d_r = batch_state[:, :, 0] 266 | a = batch_state[:, :, 1] 267 | q = batch_state[:, :, 2] 268 | h = batch_state[:, :, 3] 269 | 270 | d = f * self.d_t / self.kappa + \ 271 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 272 | b = ((d > 0) & (d_r > 0) & (d >= d_r)) 273 | b = tf.where(b, 1.0, 0.0) 274 | pds_q = q - b 275 | pds_h = h 276 | pds_d_r = tf.maximum(tf.constant(0, dtype=tf.float32), d_r - d) 277 | pds_a = a - 5 * b 278 | 279 | s = tf.stack([pds_d_r, pds_a, pds_q, pds_h], axis=2) 280 | return s 281 | 282 | @tf.function(jit_compile=True) 283 | def cost(self, batch_state, batch_action): 284 | f = batch_action[:, :, 0] * self.f_max 285 | P = batch_action[:, :, 1] * self.P_max 286 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 287 | E = self.gamma * f**3 * self.d_t + P * self.d_t 288 | h = batch_state[:, :, 3] 289 | d = f * self.d_t / self.kappa + \ 290 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 291 | # in expectation, completing a task reduces aoi by 1/p_g 292 | aoi_per_bit = 1 / param.p_g / ((param.d_lb+param.d_ub)/2) 293 | cost = tf.reduce_sum(batch_state[:, :, 1] - d*aoi_per_bit + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 294 | #cost = tf.reduce_sum(-d*aoi_per_bit + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 295 | #cost = tf.reduce_sum(self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 296 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 297 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * (E - self.E_max), axis=1) 298 | return cost 299 | 300 | def norm(self, s): 301 | s[:, :, 0] = s[:, :, 0] / 2000 302 | s[:, :, 3] = -tf.math.log(s[:, :, 3] * 1000) 303 | return s 304 | 305 | @tf.function(jit_compile=True) 306 | def train(self, s, a, r, s_next): 307 | # update critic network 308 | with tf.GradientTape() as tape: 309 | # calculate target y 310 | target_a_next = self.target_actor(s_next, training=True) 311 | target_a_next[:,:,2] = tf.nn.softmax(target_a_next[:,:,2]) 312 | target_y = self.cost(s,a) + args.discount * self.target_critic([s_next, target_a_next], training=True) 313 | critic_value = self.critic([s, a], training=True) 314 | td = critic_value - target_y 315 | critic_loss = tf.math.reduce_mean(tf.math.abs(td)) 316 | 317 | critic_grad = tape.gradient(critic_loss, self.critic.trainable_variables) 318 | #critic_grad = [tf.clip_by_norm(grad, 10.0) for grad in critic_grad] 319 | self.critic_optimizer.apply_gradients( zip(critic_grad, self.critic.trainable_variables) ) 320 | 321 | # update actor network 322 | with tf.GradientTape() as tape: 323 | actions = self.actor(s, training=True) 324 | actions[:,:,2] = tf.nn.softmax(actions[:,:,2]) 325 | critic_value = self.critic([s, actions], training=True) 326 | actor_loss = tf.math.reduce_mean(critic_value) 327 | 328 | actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables) 329 | actor_grad = [tf.clip_by_norm(grad, 1.0) for grad in actor_grad] 330 | self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor.trainable_variables) ) 331 | 332 | # td is returned to update self.v 333 | # we do not update self.v in this function because it leaks the local tensor 'td', which is prohibited by tensorflow 334 | return (td, critic_loss, actor_loss, actor_grad, critic_grad, critic_value) 335 | 336 | def save_model(self, dir=log_dir_name + '/models'): 337 | self.actor.save_weights(dir + '/' + self.alg + '_actor') 338 | self.critic.save_weights(dir + '/' + self.alg + '_critic') 339 | self.actor.save_weights('models/actor') 340 | self.critic.save_weights('models/critic') 341 | with open(dir + '/' + self.alg + '_v.pickle', 'wb') as f: 342 | pickle.dump(self.v, f) 343 | with open('models/v.pickle', 'wb') as f: 344 | pickle.dump(self.v, f) 345 | 346 | 347 | @tf.function(jit_compile=True) 348 | def update_target(target_weights, weights, omega): 349 | for (a, b) in zip(target_weights, weights): 350 | a.assign(b * omega + a * (1 - omega)) 351 | 352 | def train(T): 353 | agent = DPDS(args.Batch_Size, args.Memory_Size, args.Max_Epsilon) 354 | print("============" + agent.alg + "============") 355 | 356 | state = env.reset() 357 | std_dev = 0.01 358 | ou_noise = OUActionNoise(mean=np.zeros(1), std_deviation=float(std_dev) * np.ones(1)) 359 | acc_E = np.zeros(param.N) 360 | acc_A = np.zeros(param.N) 361 | timer = 1 362 | 363 | acc_interaction_time = 0 364 | acc_inference_time = 0 365 | acc_training_time = 0 366 | 367 | while timer <= T: 368 | if timer % 10000 == 0: 369 | print(timer) 370 | 371 | if timer <= args.Start_Size: 372 | action = agent.random_action(state) 373 | else: 374 | inference_begin = time.time() 375 | action = agent.choose_action(state, ou_noise, agent.epsilon) 376 | inference_end = time.time() 377 | acc_inference_time += inference_end - inference_begin 378 | interaction_begin = time.time() 379 | next_state, E = env.step(action) 380 | interaction_end = time.time() 381 | acc_interaction_time += interaction_end - interaction_begin 382 | #cost = np.sum(state[:,1] + agent.lam * (E - param.E_max)) 383 | cost = np.sum(agent.lam * (E - param.E_max)) 384 | 385 | agent.buffer.store((state, action, cost, next_state)) 386 | 387 | # train 388 | if timer > args.Update_After and timer % args.Train_Interval == 0: 389 | training_begin = time.time() 390 | # sample from buffer 391 | s, a, r, s_next = agent.buffer.sample(args.Batch_Size) 392 | td, critic_loss, actor_loss, actor_grad, critic_grad, critic_value = agent.train(s, a, r, s_next) 393 | #tf.print(actor_grad) 394 | #tf.print(critic_grad) 395 | agent.v = agent.v - param.beta(timer) * tf.reduce_mean(td) 396 | training_end = time.time() 397 | acc_training_time += training_end - training_begin 398 | 399 | # update target networks 400 | update_target(agent.target_actor.variables, agent.actor.variables, args.omega) 401 | update_target(agent.target_critic.variables, agent.critic.variables, args.omega) 402 | agent.target_v = args.omega * agent.v + (1 - args.omega) * agent.target_v 403 | 404 | # update lambda 405 | agent.lam = agent.lam + param.theta(timer) * (E - param.E_max) 406 | agent.lam = np.maximum(0, agent.lam) 407 | agent.lam = np.minimum(param.lam_max, agent.lam) 408 | 409 | with fw.as_default(): 410 | tf.summary.scalar('critic_loss', critic_loss, step = timer) 411 | tf.summary.scalar('actor_loss', actor_loss, step = timer) 412 | 413 | # epsilon decay 414 | agent.epsilon = max(Epsilon_Decay_Rate * timer + args.Max_Epsilon, args.Min_Epsilon) 415 | 416 | timer += 1 417 | 418 | # log 419 | acc_E += E 420 | acc_A += state[:,1] 421 | with fw.as_default(): 422 | tf.summary.scalar('cost', cost, step=timer) 423 | tf.summary.scalar('aoi', np.sum(state[:,1])/args.N, step=timer) 424 | tf.summary.scalar('average aoi', np.sum(acc_A)/timer/args.N, step=timer) 425 | tf.summary.scalar('energy', np.sum(E)/args.N, step=timer) 426 | tf.summary.scalar('v', agent.v, step=timer) 427 | tf.summary.scalar('epsilon', agent.epsilon, step=timer) 428 | tf.summary.scalar('lambda', np.sum(agent.lam)/args.N, step=timer) 429 | tf.summary.scalar('average energy', np.sum(acc_E)/timer/args.N, step=timer) 430 | 431 | state = next_state 432 | 433 | agent.save_model() 434 | print("Average interaction time:", acc_interaction_time / args.T) 435 | print("Average inference time:", acc_inference_time / (args.T - args.Start_Size)) 436 | print("Average training time:", acc_training_time / (args.T - args.Update_After)) 437 | print("Average number of tasks:", env.nTask / timer /args.N) 438 | 439 | data = {'N': param.N, 'E': env.E_stat, 'A': env.A_stat} 440 | savemat(data_dir_name + '.mat', data) 441 | 442 | if __name__ == "__main__": 443 | train(args.T) 444 | 445 | print(tf.__version__) 446 | 447 | 448 | -------------------------------------------------------------------------------- /DPDS.py: -------------------------------------------------------------------------------- 1 | # + 2 | from Environment import Environment 3 | from Parameter import Parameter 4 | from scipy.io import savemat 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow import keras 8 | import random 9 | import os 10 | import time 11 | import argparse 12 | import sys 13 | import time 14 | import pickle 15 | import pdb 16 | 17 | # + 18 | #################### params ########################### 19 | parser = argparse.ArgumentParser(description='Hyper_params') 20 | parser.add_argument('--Info', default='', type=str) # information added to log dir name 21 | 22 | parser.add_argument('--Seed', default=41, type=int) 23 | parser.add_argument('--Units', default=128, type=int) # hidden units num of critic NN 24 | parser.add_argument('--Lr', default=0.001, type=float) # learning rate 25 | parser.add_argument('--omega', default=0.005, type=float) # used to update target networks 26 | parser.add_argument('--Max_Epsilon', default=0.0, type=float) 27 | parser.add_argument('--Min_Epsilon', default=0.0, type=float) 28 | parser.add_argument('--Epsilon_Decay', default=1.0, type=float) 29 | parser.add_argument('--Batch_Size', default=256, type=int) 30 | parser.add_argument('--Memory_Size', default=200000, type=int) # buffer size 31 | parser.add_argument('--Start_Size', default=0, type=int) # random action before start_size 32 | parser.add_argument('--Update_After', default=0, type=int) 33 | parser.add_argument('--Train_Interval', default=1, type=int) 34 | parser.add_argument('--load_weights', default=False, type=bool) 35 | parser.add_argument('--Alg', default='dpds', type=str) 36 | parser.add_argument('--Gpu_Id', default="0", type=str) # -1 means CPU 37 | parser.add_argument('--E_max', default=0.1, type=float) 38 | parser.add_argument('--N', default=15, type=int) # number of WDs 39 | parser.add_argument('--T', default=100000, type=int) # number of simulated slots 40 | parser.add_argument('--batch_norm', default=True, type=bool) 41 | 42 | args = parser.parse_args() 43 | # - 44 | 45 | #################### seed ########################### 46 | os.environ['TF_DETERMINISTIC_OPS'] = '1' 47 | os.environ["CUDA_VISIBLE_DEVICES"] = args.Gpu_Id 48 | gpus = tf.config.experimental.list_physical_devices(device_type='GPU') 49 | for gpu in gpus: 50 | tf.config.experimental.set_memory_growth(gpu, True) 51 | print(tf.config.list_physical_devices()) 52 | tf.random.set_seed(args.Seed) 53 | np.random.seed(args.Seed) 54 | random.seed(args.Seed) 55 | 56 | param = Parameter(args.N, args.T) 57 | param.E_max = [args.E_max*param.d_t for _ in range(param.N)] 58 | if args.Alg == 'lpo': 59 | # for local processing only, we set the wireless bandwidth to zero 60 | param.W_max = 0.0 61 | param.E_max = [3*args.E_max*param.d_t for _ in range(param.N)] 62 | #param.lam_init = np.ones(param.N) * 1000 63 | #param.theta = lambda t: 0 64 | if args.Alg == 'coo': 65 | # for computation offloading only, we set the local CPU frequency to zero 66 | param.f_max = [0.0 for _ in range(param.N)] 67 | param.E_max = [2*args.E_max*param.d_t for _ in range(param.N)] 68 | env = Environment(param) 69 | if args.load_weights: 70 | with open('models/v.pickle', 'rb') as f: 71 | Initial_v = pickle.load(f) 72 | else: 73 | Initial_v = 0 # initial average reward 74 | 75 | #################### log ########################### 76 | # create log file 77 | time_str = time.strftime("%m-%d_%H-%M", time.localtime()) 78 | alg = args.Alg 79 | log_dir_name = 'logs/' + time_str + '_' + alg + args.Info + '_n' + \ 80 | str(args.N) + '_seed' + str(args.Seed) + '_lam' + str(int(param.lam_init[0])) 81 | data_dir_name = 'data/' + alg + '_' + args.Info + '_n' + \ 82 | str(args.N) + '_lam' + str(int(param.lam_init[0])) 83 | fw = tf.summary.create_file_writer(log_dir_name) # log file witer 84 | 85 | # create dir to save model 86 | if not os.path.exists(log_dir_name + '/models'): 87 | os.makedirs(log_dir_name + '/models') 88 | 89 | # save params to a .txt file 90 | prams_file = open(log_dir_name + '/prams_table.txt', 'w') 91 | prams_file.writelines(f'{i:50} {v}\n' for i, v in args.__dict__.items()) 92 | prams_file.close() 93 | 94 | ###################### others ############################### 95 | W_Initializer = tf.initializers.he_normal(args.Seed) # NN initializer 96 | Epsilon_Decay_Rate = (args.Min_Epsilon - args.Max_Epsilon) / (args.T) * args.Epsilon_Decay # factor of decay 97 | TENSOR_FLOAT_TYPE = tf.dtypes.float32 98 | TENSOR_INT_TYPE = tf.dtypes.int32 99 | 100 | def softmax(x): 101 | return np.exp(x) / np.sum(np.exp(x)) 102 | 103 | # + 104 | class ReplayBuffer: 105 | def __init__(self, buffer_capacity = 100000): 106 | self.buffer_capacity = buffer_capacity 107 | self.buffer_counter = 0 108 | 109 | # dim(action) = N * 3 110 | # dim(state) = N * 4 111 | # dim(pds) = N * 4 112 | buffer_a_dim = (buffer_capacity, param.N, 3) 113 | buffer_s_dim = (buffer_capacity, param.N, 4) 114 | 115 | self.s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 116 | self.a_buffer = np.empty(buffer_a_dim, dtype=np.float32) 117 | self.r_buffer = np.empty((buffer_capacity,), dtype=np.float32) 118 | self.next_s_buffer = np.empty(buffer_s_dim, dtype=np.float32) 119 | 120 | def store(self, exp): 121 | index = self.buffer_counter % self.buffer_capacity 122 | 123 | s, a, r, next_s = exp 124 | self.s_buffer[index] = s 125 | self.a_buffer[index] = a 126 | self.r_buffer[index] = r 127 | self.next_s_buffer[index] = next_s 128 | 129 | self.buffer_counter += 1 130 | 131 | def sample(self, batch_size): 132 | sampling_range = min(self.buffer_counter, self.buffer_capacity) 133 | idx = np.random.randint(0, sampling_range, batch_size) 134 | 135 | batch_s = tf.convert_to_tensor(self.s_buffer[idx]) 136 | batch_a = tf.convert_to_tensor(self.a_buffer[idx]) 137 | batch_r = tf.convert_to_tensor(self.r_buffer[idx]) 138 | batch_next_s = tf.convert_to_tensor(self.next_s_buffer[idx]) 139 | 140 | return batch_s, batch_a, batch_r, batch_next_s 141 | 142 | class OUActionNoise: 143 | def __init__(self, mean, std_deviation, theta=0.15, dt=1e-2, x_initial=None): 144 | self.theta = theta 145 | self.mean = mean 146 | self.std_dev = std_deviation 147 | self.dt = dt 148 | self.x_initial = x_initial 149 | self.reset() 150 | 151 | def __call__(self): 152 | # Formula taken from https://www.wikipedia.org/wiki/Ornstein-Uhlenbeck_process. 153 | x = ( 154 | self.x_prev 155 | + self.theta * (self.mean - self.x_prev) * self.dt 156 | + self.std_dev * np.sqrt(self.dt) * np.random.normal(size=self.mean.shape) 157 | ) 158 | # Store x into x_prev 159 | # Makes next noise dependent on current one 160 | self.x_prev = x 161 | return x 162 | 163 | def reset(self): 164 | if self.x_initial is not None: 165 | self.x_prev = self.x_initial 166 | else: 167 | self.x_prev = np.zeros_like(self.mean) 168 | # - 169 | 170 | class DPDS: 171 | def __init__(self, batch_size, memory_size, max_epsilon): 172 | 173 | def build_actor(): 174 | inputs = keras.Input(shape=(param.N, 4)) 175 | x = keras.layers.Flatten()(inputs) 176 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 177 | if args.batch_norm: 178 | x = keras.layers.BatchNormalization()(x) 179 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 180 | if args.batch_norm: 181 | x = keras.layers.BatchNormalization()(x) 182 | x = keras.layers.Dense(param.N*3, activation='sigmoid', kernel_initializer=W_Initializer)(x) 183 | outputs = keras.layers.Reshape((param.N, 3))(x) 184 | model = keras.Model(inputs=inputs, outputs=outputs) 185 | return model 186 | 187 | def build_value(): 188 | inputs = keras.Input(shape=(param.N, 4)) 189 | x = keras.layers.Flatten()(inputs) 190 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 191 | if args.batch_norm: 192 | x = keras.layers.BatchNormalization()(x) 193 | x = keras.layers.Dense(args.Units, activation='relu', kernel_initializer=W_Initializer)(x) 194 | if args.batch_norm: 195 | x = keras.layers.BatchNormalization()(x) 196 | outputs = keras.layers.Dense(1)(x) 197 | model = keras.Model(inputs=inputs, outputs=outputs) 198 | return model 199 | 200 | self.actor = build_actor() 201 | self.value = build_value() 202 | self.target_actor = build_actor() 203 | self.target_value = build_value() 204 | if args.load_weights: 205 | self.value.load_weights("models/value") 206 | self.actor.load_weights("models/actor") 207 | print("load weight") 208 | self.target_actor.set_weights(self.actor.get_weights()) 209 | self.target_value.set_weights(self.value.get_weights()) 210 | 211 | self.actor_optimizer = tf.optimizers.Adam(args.Lr) 212 | self.value_optimizer = tf.optimizers.Adam(args.Lr*2) 213 | self.epsilon = max_epsilon 214 | self.batch_size = batch_size 215 | self.buffer = ReplayBuffer(memory_size) 216 | self.alg = alg 217 | self.v = Initial_v # average reward 218 | self.target_v = Initial_v 219 | self.lam = param.lam_init 220 | 221 | # transform some parameters to tensors 222 | self.f_max = tf.convert_to_tensor(param.f_max) 223 | self.P_max = tf.convert_to_tensor(param.P_max) 224 | self.W_max = tf.convert_to_tensor(param.W_max) 225 | self.E_max = tf.convert_to_tensor(param.E_max) 226 | self.d_t = tf.convert_to_tensor(param.d_t) 227 | self.kappa = tf.convert_to_tensor(param.kappa) 228 | self.sigma2 = tf.convert_to_tensor(param.sigma2) 229 | self.gamma = tf.convert_to_tensor(param.gamma) 230 | 231 | def random_action(self, s): 232 | action = np.random.rand(param.N, 3) 233 | # apply softmax to W so that its sum equals 1 234 | action[:,2] = softmax(action[:,2]) 235 | return action 236 | 237 | def _choose_action(self, s): 238 | action = self.actor(s[None, :])[0].numpy() 239 | action[:,2] = softmax(action[:,2]) 240 | return action 241 | 242 | def choose_action(self, s, noise_object, epsilon): 243 | action = self.actor(s[None, :])[0].numpy() 244 | noise = noise_object() 245 | # Adding noise to action 246 | action = action + epsilon * noise 247 | 248 | # We make sure action is within bounds 249 | legal_action = np.clip(action, 0, 1) 250 | legal_action[:,2] = softmax(legal_action[:,2]) 251 | 252 | return legal_action 253 | 254 | @tf.function(jit_compile=True) 255 | def f_k(self, batch_state, batch_action): 256 | f = batch_action[:, :, 0] * self.f_max 257 | P = batch_action[:, :, 1] * self.P_max 258 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 259 | d_r = batch_state[:, :, 0] 260 | a = batch_state[:, :, 1] 261 | q = batch_state[:, :, 2] 262 | h = batch_state[:, :, 3] 263 | 264 | d = f * self.d_t / self.kappa + \ 265 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 266 | b = ((d > 0) & (d_r > 0) & (d >= d_r)) 267 | b = tf.where(b, 1.0, 0.0) 268 | pds_q = q - b 269 | pds_h = h 270 | pds_d_r = tf.maximum(tf.constant(0, dtype=tf.float32), d_r - d) 271 | # explanation of the following update formula can be found here: https://github.com/XingqiuHe/DPDS/issues/2 272 | pds_a = a - 3.3 * b 273 | 274 | s = tf.stack([pds_d_r, pds_a, pds_q, pds_h], axis=2) 275 | return s 276 | 277 | @tf.function(jit_compile=True) 278 | def cost(self, batch_state, batch_action): 279 | f = batch_action[:, :, 0] * self.f_max 280 | P = batch_action[:, :, 1] * self.P_max 281 | W = tf.nn.softmax(batch_action[:, :, 2]) * self.W_max 282 | E = self.gamma * f**3 * self.d_t + P * self.d_t 283 | h = batch_state[:, :, 3] 284 | d = f * self.d_t / self.kappa + \ 285 | self.d_t * W * (tf.math.log(1+P*h/self.sigma2)/tf.math.log(2.)) 286 | # in expectation, completing a task reduces aoi by 1/p_g 287 | aoi_per_bit = 1 / param.p_g / ((param.d_lb+param.d_ub)/2) 288 | if args.Alg == 'dpl': 289 | # for delay-based algorithm, the total delay equals the sum of length of queues 290 | cost = tf.reduce_sum(batch_state[:, :, 2] - d/((param.d_lb+param.d_ub)/2) + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 291 | else: 292 | cost = tf.reduce_sum(batch_state[:, :, 1] - d*aoi_per_bit + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 293 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * tf.math.maximum(0.0, E - self.E_max), axis=1) 294 | #cost = tf.reduce_sum(batch_state[:, :, 1] + self.lam * (E - self.E_max), axis=1) 295 | return cost 296 | 297 | @tf.function(jit_compile=True) 298 | def train(self, s, a, r, s_next): 299 | # update value network 300 | with tf.GradientTape() as tape: 301 | # calculate target y 302 | target_a_next = self.target_actor(s_next, training=True) 303 | target_pds_next = self.f_k(s_next, target_a_next) 304 | target_y = self.cost(s_next, target_a_next) + \ 305 | self.target_value(target_pds_next, training=True) - self.target_v 306 | pds = self.f_k(s,a) 307 | pds_value = self.value(pds, training=True) 308 | td = pds_value - target_y 309 | value_loss = tf.math.reduce_mean(tf.math.abs(td)) 310 | 311 | value_grad = tape.gradient(value_loss, self.value.trainable_variables) 312 | #value_grad = [tf.clip_by_norm(grad, 10.0) for grad in value_grad] 313 | self.value_optimizer.apply_gradients( zip(value_grad, self.value.trainable_variables) ) 314 | 315 | # update actor network 316 | with tf.GradientTape() as tape: 317 | actions = self.actor(s, training=True) 318 | pds = self.f_k(s, actions) 319 | #critic_value = cost(s, actions) + self.value(pds, training=True) - self.v 320 | c = self.cost(s, actions) 321 | value = self.value(pds, training=True) 322 | critic_value = c + value 323 | actor_loss = tf.math.reduce_mean(critic_value) 324 | 325 | actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables) 326 | #actor_grad = [tf.clip_by_norm(grad, 10.0) for grad in actor_grad] 327 | self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor.trainable_variables) ) 328 | 329 | # td is returned to update self.v 330 | # we do not update self.v in this function because it leaks the local tensor 'td', which is prohibited by tensorflow 331 | return (td, value_loss, actor_loss, c, value) 332 | 333 | def save_model(self, dir=log_dir_name + '/models'): 334 | self.actor.save_weights(dir + '/' + self.alg + '_actor') 335 | self.value.save_weights(dir + '/' + self.alg + '_value') 336 | self.actor.save_weights('models/actor') 337 | self.value.save_weights('models/value') 338 | with open(dir + '/' + self.alg + '_v.pickle', 'wb') as f: 339 | pickle.dump(self.v, f) 340 | with open('models/v.pickle', 'wb') as f: 341 | pickle.dump(self.v, f) 342 | 343 | 344 | @tf.function(jit_compile=True) 345 | def update_target(target_weights, weights, omega): 346 | for (a, b) in zip(target_weights, weights): 347 | a.assign(b * omega + a * (1 - omega)) 348 | 349 | def train(T): 350 | agent = DPDS(args.Batch_Size, args.Memory_Size, args.Max_Epsilon) 351 | print("============" + agent.alg + "============") 352 | 353 | state = env.reset() 354 | std_dev = 0.01 355 | ou_noise = OUActionNoise(mean=np.zeros(1), std_deviation=float(std_dev) * np.ones(1)) 356 | acc_E = np.zeros(param.N) 357 | acc_A = np.zeros(param.N) 358 | stat_W = [] 359 | timer = 1 360 | 361 | acc_interaction_time = 0 362 | acc_inference_time = 0 363 | acc_training_time = 0 364 | 365 | while timer <= T: 366 | if timer % 10000 == 0: 367 | print(timer) 368 | 369 | if timer <= args.Start_Size: 370 | action = agent.random_action(state) 371 | else: 372 | inference_begin = time.time() 373 | action = agent.choose_action(state, ou_noise, agent.epsilon) 374 | inference_end = time.time() 375 | acc_inference_time += inference_end - inference_begin 376 | interaction_begin = time.time() 377 | next_state, E = env.step(action) 378 | interaction_end = time.time() 379 | acc_interaction_time += interaction_end - interaction_begin 380 | cost = np.sum(state[:,1] + agent.lam * (E - param.E_max)) 381 | 382 | agent.buffer.store((state, action, cost, next_state)) 383 | 384 | # train 385 | if timer > args.Update_After and timer % args.Train_Interval == 0: 386 | training_begin = time.time() 387 | # sample from buffer 388 | s, a, r, s_next = agent.buffer.sample(args.Batch_Size) 389 | td, value_loss, actor_loss, c, v = agent.train(s, a, r, s_next) 390 | agent.v = agent.v - param.beta(timer) * tf.reduce_mean(td) 391 | training_end = time.time() 392 | acc_training_time += training_end - training_begin 393 | 394 | # update target networks 395 | update_target(agent.target_actor.variables, agent.actor.variables, args.omega) 396 | update_target(agent.target_value.variables, agent.value.variables, args.omega) 397 | agent.target_v = args.omega * agent.v + (1 - args.omega) * agent.target_v 398 | 399 | # update lambda 400 | agent.lam = agent.lam + param.theta(timer) * (E - param.E_max) 401 | agent.lam = np.maximum(0, agent.lam) 402 | agent.lam = np.minimum(param.lam_max, agent.lam) 403 | 404 | with fw.as_default(): 405 | tf.summary.scalar('value_loss', value_loss, step = timer) 406 | tf.summary.scalar('actor_loss', actor_loss, step = timer) 407 | tf.summary.scalar('cost1', tf.math.reduce_mean(c), step = timer) 408 | tf.summary.scalar('value', tf.math.reduce_mean(v), step = timer) 409 | 410 | # epsilon decay 411 | agent.epsilon = max(Epsilon_Decay_Rate * timer + args.Max_Epsilon, args.Min_Epsilon) 412 | 413 | timer += 1 414 | 415 | """ 416 | TODO 417 | we need to explain that in the paper that we can guarantee constraint (10b) by 418 | applying softmax on the W 419 | also need to specify we use values between (0,1) to represent the control variables 420 | also need to discuss we do not need to ensure constraint (7) 421 | """ 422 | 423 | # log 424 | acc_E += E 425 | acc_A += state[:,1] 426 | stat_W.append(action[:,2]) 427 | with fw.as_default(): 428 | tf.summary.scalar('cost', cost, step=timer) 429 | tf.summary.scalar('aoi', np.sum(state[:,1])/args.N, step=timer) 430 | tf.summary.scalar('average aoi', np.sum(acc_A)/timer/args.N, step=timer) 431 | tf.summary.scalar('energy', np.sum(E)/args.N, step=timer) 432 | tf.summary.scalar('v', agent.v, step=timer) 433 | tf.summary.scalar('epsilon', agent.epsilon, step=timer) 434 | tf.summary.scalar('lambda', np.sum(agent.lam)/args.N, step=timer) 435 | tf.summary.scalar('average energy', np.sum(acc_E)/timer/args.N, step=timer) 436 | 437 | state = next_state 438 | 439 | agent.save_model() 440 | print("Average interaction time:", acc_interaction_time / args.T) 441 | print("Average inference time:", acc_inference_time / (args.T - args.Start_Size)) 442 | print("Average training time:", acc_training_time / (args.T - args.Update_After)) 443 | print("Average number of tasks:", env.nTask / timer /args.N) 444 | 445 | data = {'N': param.N, 'locations': param.WD_loc_list, 'distances': param.distance, 'E': env.E_stat, 'A': env.A_stat} 446 | savemat(data_dir_name + '.mat', data) 447 | 448 | if __name__ == "__main__": 449 | train(args.T) 450 | 451 | print(tf.__version__) 452 | 453 | 454 | --------------------------------------------------------------------------------