├── CSI.csv ├── DQN.py ├── EnvInfo_3.csv ├── README.md ├── __pycache__ ├── DQN.cpython-37.pyc ├── agent.cpython-37.pyc ├── env.cpython-37.pyc ├── pdqn.cpython-37.pyc └── tool.cpython-37.pyc ├── agent.py ├── env.py ├── mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv ├── memory ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── memory.cpython-37.pyc └── memory.py ├── pdqn.py ├── test_DQN.py ├── tool.py ├── train_DQN.py ├── train_PDQN.py └── utils ├── __init__.py ├── __pycache__ ├── __init__.cpython-37.pyc └── noise.cpython-37.pyc └── noise.py /DQN.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import numpy as np 7 | import random 8 | from collections import Counter 9 | from torch.autograd import Variable 10 | import time 11 | import scipy.stats as st 12 | import copy 13 | import matplotlib.pyplot as plt 14 | import os 15 | os.chdir('/home/chan/PDQN/') 16 | os.environ['CUDA_VISIBLE_DEVICES']='1' 17 | from agent import Agent 18 | from memory.memory import Memory 19 | #from memory import Memory 20 | from utils import soft_update_target_network, hard_update_target_network 21 | from utils.noise import OrnsteinUhlenbeckActionNoise 22 | from env import env_PowerAllocation 23 | import tool as t 24 | 25 | 26 | 27 | class DQNActor(nn.Module): 28 | 29 | def __init__(self, state_size, action_size, power_level, hidden_layers=(100,), 30 | output_layer_init_std=None, activation="relu", **kwargs): 31 | super(DQNActor, self).__init__() 32 | self.state_size = state_size 33 | self.action_size = action_size 34 | self.activation = activation 35 | self.power_level=power_level 36 | #self.state_size----------------------- 37 | # version 1 (hidden layer >= 2) 38 | # create layers ------------------------------------------------------- 39 | self.layers = nn.ModuleList() 40 | # 1-0) state input layer - 1st hidden layer 41 | self.state_input_layer = nn.Linear(self.state_size , hidden_layers[0]) 42 | # 1-1) action input layer - 2nd hidden layer 43 | 44 | # 1-2) all hidden layer 45 | nh = len(hidden_layers) 46 | for i in range(1,nh): 47 | self.layers.append(nn.Linear(hidden_layers[i - 1], hidden_layers[i])) 48 | # 1-3) the last hidden layer - output layer (action_size) -- A(s,a) 49 | self.layers.append(nn.Linear(hidden_layers[nh-1], self.action_size)) 50 | # 1-4) the last hidden layer - output layer (1) -- V(s) 51 | self.value_layer = nn.Linear(hidden_layers[nh-1], self.action_size) 52 | 53 | # initialise layer weights -------------------------------------------- 54 | # 1-0) all layers except the last layer -- He initialization / zero initialzation 55 | nn.init.kaiming_normal_(self.state_input_layer.weight, nonlinearity=activation) 56 | nn.init.zeros_(self.state_input_layer.bias) 57 | for i in range(0, len(self.layers) - 1): 58 | nn.init.kaiming_normal_(self.layers[i].weight, nonlinearity=activation) 59 | nn.init.zeros_(self.layers[i].bias) 60 | # 1-1) the last layer for A(s,a) -- normal initialzation / zero initialzation 61 | nn.init.normal_(self.layers[-1].weight, mean=0., std=output_layer_init_std) 62 | nn.init.zeros_(self.layers[-1].bias) 63 | # 1-2) the last layer for V(s) -- normal initialzation / zero initialzation 64 | nn.init.normal_(self.value_layer.weight, mean=0., std=output_layer_init_std) 65 | nn.init.zeros_(self.value_layer.bias) 66 | 67 | ''' 68 | # version 0 69 | # create layers ------------------------------------------------------- 70 | self.layers = nn.ModuleList() 71 | inputSize = self.state_size + self.action_parameter_size #5+210*5 72 | if hidden_layers is not None: 73 | nh = len(hidden_layers) 74 | # 1-0) input layer (inputSize) - 1st hidden layer 75 | self.layers.append(nn.Linear(inputSize, hidden_layers[0])) 76 | # 1-1) all hidden layer 77 | for i in range(1, nh): 78 | self.layers.append(nn.Linear(hidden_layers[i - 1], hidden_layers[i])) 79 | # 1-2) the last hidden layer - output layer (action_size) 80 | lastHiddenLayerSize = hidden_layers[nh - 1] 81 | self.layers.append(nn.Linear(lastHiddenLayerSize, self.action_size)) 82 | 83 | # initialise layer weights -------------------------------------------- 84 | # 1-0) all layers except the last layer -- He initialization / zero initialzation 85 | for i in range(0, len(self.layers) - 1): 86 | nn.init.kaiming_normal_(self.layers[i].weight, nonlinearity=activation) 87 | nn.init.zeros_(self.layers[i].bias) 88 | # 1-1) the last layer -- normal initialzation / zero initialzation 89 | nn.init.normal_(self.layers[-1].weight, mean=0., std=output_layer_init_std) 90 | nn.init.zeros_(self.layers[-1].bias) 91 | ''' 92 | def forward(self, state): 93 | negative_slope = 0.01 # slope for leaky_relu 94 | 95 | # version 1 96 | num_layers = len(self.layers) 97 | if self.activation == "relu": 98 | # 0-0) state input layer - 1st hidden layer 99 | x= F.relu(self.state_input_layer(state)) 100 | x = F.relu(self.layers[0](x)) 101 | # 0-1) action input layer + 1st hidden layer 102 | elif self.activation == "leaky_relu": 103 | # 0-0) state input layer - 1st hidden layer 104 | x= F.leaky_relu(self.state_input_layer(state),negative_slope) 105 | x = F.leaky_relu(self.layers[0](x),negative_slope) 106 | # 0-1) action input layer + 1st hidden layer 107 | else: 108 | raise ValueError("Unknown activation function "+str(self.activation)) 109 | # 0-2) (action input layer + 1st hidden layer) - other hidden layers 110 | for i in range(1, num_layers - 1): 111 | if self.activation == "relu": 112 | x = F.relu(self.layers[i](x)) 113 | elif self.activation == "leaky_relu": 114 | x = F.leaky_relu(self.layers[i](x), negative_slope) 115 | else: 116 | raise ValueError("Unknown activation function "+str(self.activation)) 117 | # 0-3) the last hidden layer - output layer ( not pass through activation function ) 118 | V = self.value_layer(x) 119 | """ 120 | # version 0 121 | # 1-0) all layers except the last layer -- pass through activation function 122 | x = torch.cat((state, action_parameters), dim=1) 123 | num_layers = len(self.layers) 124 | for i in range(0, num_layers - 1): 125 | if self.activation == "relu": 126 | x = F.relu(self.layers[i](x)) 127 | elif self.activation == "leaky_relu": 128 | x = F.leaky_relu(self.layers[i](x), negative_slope) 129 | else: 130 | raise ValueError("Unknown activation function "+str(self.activation)) 131 | # 1-1) the last layer -- not pass through activation function 132 | Q = self.layers[-1](x) 133 | """ 134 | return V 135 | 136 | #%% 137 | 138 | """ 139 | num_actions=210 140 | action_parameter_size=210*5 141 | s_dim=5 142 | action_input_layer=0# Which layer to input action parameters 143 | layers=[32,16]#(256,)# # Hidden layers 144 | actor_param_kwargs={'hidden_layers': layers, 'output_layer_init_std': 1e-5,'squashing_function': False} 145 | actor_param = ParamActor(s_dim, num_actions, action_parameter_size, **actor_param_kwargs) 146 | print(actor_param) 147 | """ 148 | #%% 149 | class DQNAgent(Agent): 150 | #DDPG actor-critic agent for parameterised action spaces [Hausknecht and Stone 2016] 151 | 152 | NAME = "DQN Agent" 153 | 154 | def __init__(self, 155 | s_dim,#observation_space, 156 | action_space, 157 | nUE, power_level, 158 | actor_class=DQNActor, 159 | actor_kwargs={}, 160 | epsilon_initial=1.0, 161 | epsilon_final=0.05, 162 | epsilon_steps=10000, 163 | batch_size=64, 164 | gamma=0.99, 165 | tau_actor=0.01, # Polyak averaging factor for copying target weights 166 | replay_memory_size=1000000, 167 | learning_rate_actor=0.0001, 168 | initial_memory_threshold=0, 169 | use_ornstein_noise=False, # if false, uses epsilon-greedy with uniform-random action-parameter exploration 170 | loss_func=F.mse_loss, # F.mse_loss 171 | clip_grad=10, 172 | inverting_gradients=False, 173 | zero_index_gradients=False, 174 | indexed=False, 175 | weighted=False, 176 | average=False, 177 | random_weighted=False, 178 | device="cuda" if torch.cuda.is_available() else "cpu", 179 | seed=None): 180 | super(DQNAgent, self).__init__(s_dim, action_space)#observation_space, action_space) 181 | self.device = torch.device(device) 182 | self.nUE=nUE 183 | """ 184 | parameter_min[i] -- np.array 185 | action_space=(num_action, [(parameter_min[i],parameter_max[i]) for i in range(num_action)]) 186 | """ 187 | self.power_level=power_level 188 | self.num_actions = self.action_space[0]*(self.power_level**self.nUE) # number of discrete actions 189 | self.action_max = torch.from_numpy(np.ones((self.num_actions,))).float().to(device)## 190 | self.action_min = -self.action_max.detach()## 191 | self.action_range = (self.action_max-self.action_min).detach()## 192 | #print([self.action_space.spaces[i].high for i in range(1,self.num_actions+1)]) 193 | self.epsilon = epsilon_initial 194 | self.epsilon_initial = epsilon_initial 195 | self.epsilon_final = epsilon_final 196 | self.epsilon_steps = epsilon_steps 197 | self.indexed = indexed 198 | self.weighted = weighted 199 | self.average = average 200 | self.random_weighted = random_weighted 201 | assert (weighted ^ average ^ random_weighted) or not (weighted or average or random_weighted) 202 | #?? 203 | self.batch_size = batch_size 204 | self.gamma = gamma 205 | self.replay_memory_size = replay_memory_size 206 | self.initial_memory_threshold = initial_memory_threshold 207 | self.learning_rate_actor = learning_rate_actor 208 | self.inverting_gradients = inverting_gradients 209 | self.tau_actor = tau_actor 210 | self._step = 0 211 | self._episode = 0 212 | self.updates = 0 213 | self.clip_grad = clip_grad 214 | self.zero_index_gradients = zero_index_gradients 215 | 216 | self.np_random = None 217 | self.seed = seed 218 | self._seed(seed) 219 | #?? 220 | self.use_ornstein_noise = use_ornstein_noise 221 | 222 | #print(self.num_actions+self.action_parameter_size) 223 | """ 224 | observation_space=np.array([Qos_difference of UE 0]) 225 | """ 226 | # 0) Memory 227 | self.replay_memory = Memory(replay_memory_size, (s_dim,), (1,), next_actions=False)## #Memory(replay_memory_size, observation_space.shape, (1+self.action_parameter_size,), next_actions=False) 228 | # 1-1) Actor-eval 229 | self.actor = actor_class(s_dim, self.num_actions, power_level , **actor_kwargs).to(device)#self.actor = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device) 230 | # 2-2) Actor-target 231 | self.actor_target = actor_class(s_dim, self.num_actions, power_level, **actor_kwargs).to(device)#self.actor_target = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device) 232 | hard_update_target_network(self.actor, self.actor_target) # directly copy without ratio 233 | self.actor_target.eval() 234 | # 2-3) Actor parameter 235 | # 2-4) Actor Loss Function 236 | self.loss_func = loss_func # l1_smooth_loss performs better but original paper used MSE 237 | 238 | # Original DDPG paper [Lillicrap et al. 2016] used a weight decay of 0.01 for Q (critic) 239 | # but setting weight_decay=0.01 on the critic_optimiser seems to perform worse... 240 | # using AMSgrad ("fixed" version of Adam, amsgrad=True) doesn't seem to help either... 241 | self.actor_optimiser = optim.Adam(self.actor.parameters(), lr=self.learning_rate_actor) #, betas=(0.95, 0.999)) 242 | 243 | def __str__(self): 244 | desc = super().__str__() + "\n" 245 | desc += "Actor Network {}\n".format(self.actor) + \ 246 | "Actor Alpha: {}\n".format(self.learning_rate_actor) + \ 247 | "Gamma: {}\n".format(self.gamma) + \ 248 | "Tau (actor): {}\n".format(self.tau_actor) + \ 249 | "Inverting Gradients: {}\n".format(self.inverting_gradients) + \ 250 | "Replay Memory: {}\n".format(self.replay_memory_size) + \ 251 | "Batch Size: {}\n".format(self.batch_size) + \ 252 | "Initial memory: {}\n".format(self.initial_memory_threshold) + \ 253 | "epsilon_initial: {}\n".format(self.epsilon_initial) + \ 254 | "epsilon_final: {}\n".format(self.epsilon_final) + \ 255 | "epsilon_steps: {}\n".format(self.epsilon_steps) + \ 256 | "Clip Grad: {}\n".format(self.clip_grad) + \ 257 | "Ornstein Noise?: {}\n".format(self.use_ornstein_noise) + \ 258 | "Zero Index Grads?: {}\n".format(self.zero_index_gradients) + \ 259 | "Seed: {}\n".format(self.seed) 260 | return desc 261 | 262 | # initialize parameter(passthrough layer of ActorParam) by user 263 | 264 | 265 | def _seed(self, seed=None): 266 | """ 267 | NOTE: this will not reset the randomly initialised weights; use the seed parameter in the constructor instead. 268 | 269 | :param seed: 270 | :return: 271 | """ 272 | self.seed = seed 273 | random.seed(seed) 274 | np.random.seed(seed) 275 | self.np_random = np.random.RandomState(seed=seed) 276 | if seed is not None: 277 | torch.manual_seed(seed) 278 | if self.device == torch.device("cuda"): 279 | torch.cuda.manual_seed(seed) 280 | 281 | def start_episode(self): 282 | pass 283 | 284 | def end_episode(self): 285 | # adjust epsilon for epsilon-greedy 286 | self._episode += 1 287 | ep = self._episode 288 | if ep < self.epsilon_steps: 289 | self.epsilon = self.epsilon_initial - (self.epsilon_initial - self.epsilon_final) * ( 290 | ep / self.epsilon_steps) 291 | else: 292 | self.epsilon = self.epsilon_final 293 | 294 | # take an action for train ================================================= 295 | def act(self, state): 296 | with torch.no_grad(): 297 | state = torch.from_numpy(state).to(self.device) 298 | # 0) get action parameters----------------------------------------- 299 | 300 | # 1) get discrete action------------------------------------------- 301 | # Hausknecht and Stone [2016] use epsilon greedy actions with uniform random action-parameter exploration 302 | rnd = self.np_random.uniform() 303 | if rnd < self.epsilon: 304 | action = self.np_random.choice(self.num_actions) 305 | 306 | else: 307 | # select maximum action 308 | Q_a = self.actor.forward(state.unsqueeze(0)) 309 | Q_a = Q_a.detach().cpu().data.numpy() 310 | action = np.argmax(Q_a) 311 | # 3) add noise----------------------------------------------------- 312 | # add noise only to parameters of chosen action 313 | #print('action=',action) 314 | #print('all_action_parameters=',all_action_parameters.shape) 315 | 316 | #noise = self.noise.sample().reshape(self.num_actions,5)[action,:] 317 | #action_parameters = action_parameters + noise 318 | 319 | 320 | return action 321 | 322 | # take the deterministic action for test =================================== 323 | def _act(self, state): 324 | with torch.no_grad(): 325 | state = torch.from_numpy(state).to(self.device) 326 | # 0) get all action parameters------------------------------------- 327 | # 1) get discrete action (select maximum action)------------------- 328 | Q_a = self.actor.forward(state.unsqueeze(0)) 329 | Q_a = Q_a.detach().cpu().data.numpy() 330 | action = np.argmax(Q_a) 331 | # 3) get action parameters----------------------------------------- 332 | 333 | #print('act all_action_parameters.shape=',action_parameters.shape) 334 | return action 335 | 336 | def action_decoder(self, action, max_power): 337 | cluster=int(action/(self.power_level**self.nUE)) 338 | power=[0 for i in range(self.nUE)] 339 | temppower=action%(self.power_level**self.nUE) 340 | idx=self.nUE-1 341 | while True: 342 | if idx>0: 343 | #power[idx]=(temppower%self.power_level+1)/self.power_level*max_power 344 | power[idx]=1/10**(temppower%self.power_level)*max_power 345 | temppower=int(temppower/self.power_level) 346 | idx=idx-1 347 | else: 348 | #power[idx]=(temppower/self.power_level+1)/self.power_level*max_power 349 | power[idx]=1/10**(temppower/self.power_level)*max_power 350 | break 351 | 352 | power=np.array(power) 353 | 354 | return cluster,power 355 | 356 | 357 | 358 | def step(self, state, action, reward, next_state, next_action, terminal): 359 | #c1,P1 360 | act = action 361 | self._step += 1 # number of agent.step 362 | #self._step = _step 363 | # self._add_sample(state, np.concatenate((all_actions.data, all_action_parameters.data)).ravel(), reward, next_state, terminal) 364 | # 1) Memory ----------------------------------------------------------- 365 | self._add_sample(state, np.array([act]), reward, next_state, np.array([next_action]), terminal=terminal) 366 | # 2) Update ----------------------------------------------------------- 367 | if self._step >= self.batch_size and self._step >= self.initial_memory_threshold: 368 | self._optimize_td_loss() 369 | self.updates += 1 370 | #self.update = update 371 | 372 | def _add_sample(self, state, action, reward, next_state, next_action, terminal): 373 | assert len(action) == 1 374 | self.replay_memory.append(state, action, reward, next_state, terminal=terminal) 375 | 376 | def _optimize_td_loss(self): 377 | if self._step < self.batch_size or self._step < self.initial_memory_threshold: 378 | return 379 | # 2-1) Sample a batch from replay memory 380 | states, actions, rewards, next_states, terminals = self.replay_memory.sample(self.batch_size, random_machine=self.np_random) 381 | # 2-2) form 382 | states = torch.from_numpy(states).to(self.device) 383 | actions_combined = torch.from_numpy(actions).to(self.device) # make sure to separate actions and parameters 384 | actions = actions_combined.long() 385 | rewards = torch.from_numpy(rewards).to(self.device).squeeze() 386 | next_states = torch.from_numpy(next_states).to(self.device) 387 | terminals = torch.from_numpy(terminals).to(self.device).squeeze() 388 | # 2-3) Update parameters 389 | # ---------------------- optimize actor ---------------------- 390 | with torch.no_grad(): 391 | pred_Q_a = self.actor_target(next_states) 392 | Qprime = torch.max(pred_Q_a, 1, keepdim=True)[0].squeeze() 393 | # Compute the TD error 394 | target = rewards + (1 - terminals) * self.gamma * Qprime 395 | 396 | # Compute current Q-values using policy network 397 | q_values = self.actor(states) 398 | y_predicted = q_values.gather(1, actions.view(-1, 1)).squeeze() 399 | y_expected = target 400 | loss_Q = self.loss_func(y_predicted, y_expected) 401 | 402 | self.actor_optimiser.zero_grad() # 1 403 | loss_Q.backward() # 2 404 | if self.clip_grad > 0: 405 | torch.nn.utils.clip_grad_norm(self.actor.parameters(), self.clip_grad) 406 | self.actor_optimiser.step() # 3 407 | # ---------------------- optimize actor-parameter ---------------------- 408 | 409 | 410 | # ---------------------- update target-network ------------------------ 411 | soft_update_target_network(self.actor, self.actor_target, self.tau_actor) 412 | 413 | def save_models(self, prefix): 414 | """ 415 | saves the target actor and critic models 416 | :param prefix: the count of episodes iterated 417 | :return: 418 | """ 419 | torch.save(self.actor.state_dict(), prefix + '_actor.pt') 420 | print('Models saved successfully') 421 | 422 | def load_models(self, prefix): 423 | """ 424 | loads the target actor and critic models, and copies them onto actor and critic models 425 | :param prefix: the count of episodes iterated (used to find the file name) 426 | :param target: whether to load the target newtwork too (not necessary for evaluation) 427 | :return: 428 | """ 429 | # also try load on CPU if no GPU available? 430 | self.actor.load_state_dict(torch.load(prefix + '_actor.pt', map_location='cpu')) 431 | print('Models loaded successfully') 432 | 433 | if __name__ == '__main__': 434 | batch_size=128#32 435 | initial_memory_threshold=128#1000 # Number of transitions required to start learning. 436 | replay_memory_size=20000 # Replay memory transition capacity 437 | epsilon_initial=1 438 | epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon 439 | epsilon_final=0.01 # Final epsilon value 440 | gamma=0.95 441 | clip_grad=1 # Parameter gradient clipping limit 442 | use_ornstein_noise= False # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 443 | inverting_gradients= True # Use inverting gradients scheme instead of squashing function 444 | seed=0 #Random seed 445 | save_freq = 100#0 # How often to save models (0 = never) 446 | # 1) ParamActor------------------------------------------------------------ 447 | learning_rate_actor_param=0.00001 448 | tau_actor_param=0.001 449 | """loss func for actor_parameter """ 450 | average=False # Average weighted loss function 451 | weighted=False # Naive weighted loss function 452 | random_weighted=False # Randomly weighted loss function 453 | indexed=False # Indexed loss function 454 | zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action 455 | # 2) Actor----------------------------------------------------------------- 456 | tau_actor=0.1 457 | learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output 458 | action_input_layer=0# Which layer to input action parameters-- useless? 459 | #-------------------------------------------------------------------------- 460 | # Performance 461 | dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput'] 462 | dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5'] } 463 | dic_info_no_back={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5'] } 464 | dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference'] 465 | dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5'] } 466 | dic_info_ori_no_back={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5'] } 467 | a_info={'c':[],'P':[]} 468 | dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]} 469 | dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]} 470 | num_back=0 471 | debug_QoSr={i:[] for i in ['1','2','3','4','5']} 472 | #-------------------------------------------------------------------------- 473 | # debug 474 | debug_PNN=[] 475 | debug_backhaul=[] 476 | debug_BSbackhaul=[] 477 | debug_channel_episode=[] 478 | debug_episode_back=[] 479 | debug_s=[] 480 | 481 | #%% Need to modify 482 | ########################################################################### 483 | scale_actions = True # True 484 | initialise_params = False#True#False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam 485 | MAXepisode = 100#1000 486 | MAXepisode_train = 1000 487 | MAXstep = 100#10#150 488 | realization=100#20 489 | title="PDQN1"#"PDQN_backhaul" # Prefix of output files 490 | #save_dir ="results" #Output directory 491 | n_baseline=5 492 | load_dir ="results_53/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 493 | load_num="_done"#"400"# 494 | layers_actor=[512,128,16] # 1055-- --5 # # Hidden layers 495 | actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"} 496 | layers_actor_param =[256]#[64,256] # 5-- --1050 497 | actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"} 498 | name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE8.csv'#'mean_std_cc_nct.csv' 499 | scenario_name='EnvInfo_11' 500 | lambda1=0.2#0.53#1 501 | lambda2=0.8#0.05#0.42#0.8 502 | lambda3=0#0.1#0.3#0 503 | result_save=load_dir+'/test_testChannel_block_fading'#'/test_all_'#'/test_testChannel'#'/test_last2000_' 504 | ########################################################################### 505 | #%% ENV 506 | env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=n_baseline) 507 | #-------------------------------------------------------------------------- Choose Network Geometry 508 | #env.reset() # create a new one 509 | env.load(name=scenario_name) # use the previous one 510 | #-------------------------------------------------------------------------- mean_std 511 | env.mean_std(10**5,False,name)#calculate(True) or load(False) 512 | num_actions = env.action_space[0] 513 | s_dim = env.nUE 514 | # use the same channel gain to test 515 | read_train_channel_episode = t.readCSI('Rayleigh_CSIforTest_100episode_100timestep_s11',env.nSBS,env.nUE,MAXepisode) 516 | 517 | #%% DQN 518 | power_level=2 519 | agent_classDQN = DQNAgent 520 | agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 521 | power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor, # 0.001 522 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 523 | clip_grad=clip_grad,indexed=indexed,average=average, 524 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 525 | initial_memory_threshold=initial_memory_threshold, 526 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 527 | actor_kwargs=actor_kwargs, 528 | zero_index_gradients=zero_index_gradients,seed=seed) 529 | agentDQN.action_decoder(5, env.P_Max_SBS) 530 | 531 | #%% 532 | 533 | 534 | -------------------------------------------------------------------------------- /EnvInfo_3.csv: -------------------------------------------------------------------------------- 1 | 130.66766715061817,6.103612044283106,149.13216990439463 2 | 112.77970089450794,-222.92821834528178,327.19287066463886 3 | -224.61027764370075,346.2031408013222,-406.9085457985689,291.44245949621796,301.39543513106366 4 | -407.87410382359803,13.87531070132368,-4.2654563033904305,-113.8322325759938,324.45315913531704 5 | 0.004815172076881084,0.174060647665381,0.007931849219840477,0.09732104116184383,0.10530752181166962,0.020956068593393933 6 | 0.07744932935443383,0.02243851774429035,0.01443773216963905,0.06872183236607807,0.005057240946593546,0.01895849440144912 7 | 0.0017961313331011153,0.03396952573879356,0.00436666596957212,0.014889589929801985,0.884337151418975,0.0157284306455081 8 | 0,1,2,3,4 9 | 10 | 11 | 0,1,2,3 12 | 4 13 | 14 | 0,1,2,3 15 | 16 | 4 17 | 0,1,2,4 18 | 3 19 | 20 | 0,1,2 21 | 3,4 22 | 23 | 0,1,2 24 | 3 25 | 4 26 | 0,1,2,4 27 | 28 | 3 29 | 0,1,2 30 | 4 31 | 3 32 | 0,1,2 33 | 34 | 3,4 35 | 0,1,3,4 36 | 2 37 | 38 | 0,1,3 39 | 2,4 40 | 41 | 0,1,3 42 | 2 43 | 4 44 | 0,1,4 45 | 2,3 46 | 47 | 0,1 48 | 2,3,4 49 | 50 | 0,1 51 | 2,3 52 | 4 53 | 0,1,4 54 | 2 55 | 3 56 | 0,1 57 | 2,4 58 | 3 59 | 0,1 60 | 2 61 | 3,4 62 | 0,1,3,4 63 | 64 | 2 65 | 0,1,3 66 | 4 67 | 2 68 | 0,1,3 69 | 70 | 2,4 71 | 0,1,4 72 | 3 73 | 2 74 | 0,1 75 | 3,4 76 | 2 77 | 0,1 78 | 3 79 | 2,4 80 | 0,1,4 81 | 82 | 2,3 83 | 0,1 84 | 4 85 | 2,3 86 | 0,1 87 | 88 | 2,3,4 89 | 0,2,3,4 90 | 1 91 | 92 | 0,2,3 93 | 1,4 94 | 95 | 0,2,3 96 | 1 97 | 4 98 | 0,2,4 99 | 1,3 100 | 101 | 0,2 102 | 1,3,4 103 | 104 | 0,2 105 | 1,3 106 | 4 107 | 0,2,4 108 | 1 109 | 3 110 | 0,2 111 | 1,4 112 | 3 113 | 0,2 114 | 1 115 | 3,4 116 | 0,3,4 117 | 1,2 118 | 119 | 0,3 120 | 1,2,4 121 | 122 | 0,3 123 | 1,2 124 | 4 125 | 0,4 126 | 1,2,3 127 | 128 | 0 129 | 1,2,3,4 130 | 131 | 0 132 | 1,2,3 133 | 4 134 | 0,4 135 | 1,2 136 | 3 137 | 0 138 | 1,2,4 139 | 3 140 | 0 141 | 1,2 142 | 3,4 143 | 0,3,4 144 | 1 145 | 2 146 | 0,3 147 | 1,4 148 | 2 149 | 0,3 150 | 1 151 | 2,4 152 | 0,4 153 | 1,3 154 | 2 155 | 0 156 | 1,3,4 157 | 2 158 | 0 159 | 1,3 160 | 2,4 161 | 0,4 162 | 1 163 | 2,3 164 | 0 165 | 1,4 166 | 2,3 167 | 0 168 | 1 169 | 2,3,4 170 | 0,2,3,4 171 | 172 | 1 173 | 0,2,3 174 | 4 175 | 1 176 | 0,2,3 177 | 178 | 1,4 179 | 0,2,4 180 | 3 181 | 1 182 | 0,2 183 | 3,4 184 | 1 185 | 0,2 186 | 3 187 | 1,4 188 | 0,2,4 189 | 190 | 1,3 191 | 0,2 192 | 4 193 | 1,3 194 | 0,2 195 | 196 | 1,3,4 197 | 0,3,4 198 | 2 199 | 1 200 | 0,3 201 | 2,4 202 | 1 203 | 0,3 204 | 2 205 | 1,4 206 | 0,4 207 | 2,3 208 | 1 209 | 0 210 | 2,3,4 211 | 1 212 | 0 213 | 2,3 214 | 1,4 215 | 0,4 216 | 2 217 | 1,3 218 | 0 219 | 2,4 220 | 1,3 221 | 0 222 | 2 223 | 1,3,4 224 | 0,3,4 225 | 226 | 1,2 227 | 0,3 228 | 4 229 | 1,2 230 | 0,3 231 | 232 | 1,2,4 233 | 0,4 234 | 3 235 | 1,2 236 | 0 237 | 3,4 238 | 1,2 239 | 0 240 | 3 241 | 1,2,4 242 | 0,4 243 | 244 | 1,2,3 245 | 0 246 | 4 247 | 1,2,3 248 | 0 249 | 250 | 1,2,3,4 251 | 1,2,3,4 252 | 0 253 | 254 | 1,2,3 255 | 0,4 256 | 257 | 1,2,3 258 | 0 259 | 4 260 | 1,2,4 261 | 0,3 262 | 263 | 1,2 264 | 0,3,4 265 | 266 | 1,2 267 | 0,3 268 | 4 269 | 1,2,4 270 | 0 271 | 3 272 | 1,2 273 | 0,4 274 | 3 275 | 1,2 276 | 0 277 | 3,4 278 | 1,3,4 279 | 0,2 280 | 281 | 1,3 282 | 0,2,4 283 | 284 | 1,3 285 | 0,2 286 | 4 287 | 1,4 288 | 0,2,3 289 | 290 | 1 291 | 0,2,3,4 292 | 293 | 1 294 | 0,2,3 295 | 4 296 | 1,4 297 | 0,2 298 | 3 299 | 1 300 | 0,2,4 301 | 3 302 | 1 303 | 0,2 304 | 3,4 305 | 1,3,4 306 | 0 307 | 2 308 | 1,3 309 | 0,4 310 | 2 311 | 1,3 312 | 0 313 | 2,4 314 | 1,4 315 | 0,3 316 | 2 317 | 1 318 | 0,3,4 319 | 2 320 | 1 321 | 0,3 322 | 2,4 323 | 1,4 324 | 0 325 | 2,3 326 | 1 327 | 0,4 328 | 2,3 329 | 1 330 | 0 331 | 2,3,4 332 | 2,3,4 333 | 0,1 334 | 335 | 2,3 336 | 0,1,4 337 | 338 | 2,3 339 | 0,1 340 | 4 341 | 2,4 342 | 0,1,3 343 | 344 | 2 345 | 0,1,3,4 346 | 347 | 2 348 | 0,1,3 349 | 4 350 | 2,4 351 | 0,1 352 | 3 353 | 2 354 | 0,1,4 355 | 3 356 | 2 357 | 0,1 358 | 3,4 359 | 3,4 360 | 0,1,2 361 | 362 | 3 363 | 0,1,2,4 364 | 365 | 3 366 | 0,1,2 367 | 4 368 | 4 369 | 0,1,2,3 370 | 371 | 372 | 0,1,2,3,4 373 | 374 | 375 | 0,1,2,3 376 | 4 377 | 4 378 | 0,1,2 379 | 3 380 | 381 | 0,1,2,4 382 | 3 383 | 384 | 0,1,2 385 | 3,4 386 | 3,4 387 | 0,1 388 | 2 389 | 3 390 | 0,1,4 391 | 2 392 | 3 393 | 0,1 394 | 2,4 395 | 4 396 | 0,1,3 397 | 2 398 | 399 | 0,1,3,4 400 | 2 401 | 402 | 0,1,3 403 | 2,4 404 | 4 405 | 0,1 406 | 2,3 407 | 408 | 0,1,4 409 | 2,3 410 | 411 | 0,1 412 | 2,3,4 413 | 2,3,4 414 | 0 415 | 1 416 | 2,3 417 | 0,4 418 | 1 419 | 2,3 420 | 0 421 | 1,4 422 | 2,4 423 | 0,3 424 | 1 425 | 2 426 | 0,3,4 427 | 1 428 | 2 429 | 0,3 430 | 1,4 431 | 2,4 432 | 0 433 | 1,3 434 | 2 435 | 0,4 436 | 1,3 437 | 2 438 | 0 439 | 1,3,4 440 | 3,4 441 | 0,2 442 | 1 443 | 3 444 | 0,2,4 445 | 1 446 | 3 447 | 0,2 448 | 1,4 449 | 4 450 | 0,2,3 451 | 1 452 | 453 | 0,2,3,4 454 | 1 455 | 456 | 0,2,3 457 | 1,4 458 | 4 459 | 0,2 460 | 1,3 461 | 462 | 0,2,4 463 | 1,3 464 | 465 | 0,2 466 | 1,3,4 467 | 3,4 468 | 0 469 | 1,2 470 | 3 471 | 0,4 472 | 1,2 473 | 3 474 | 0 475 | 1,2,4 476 | 4 477 | 0,3 478 | 1,2 479 | 480 | 0,3,4 481 | 1,2 482 | 483 | 0,3 484 | 1,2,4 485 | 4 486 | 0 487 | 1,2,3 488 | 489 | 0,4 490 | 1,2,3 491 | 492 | 0 493 | 1,2,3,4 494 | 1,2,3,4 495 | 496 | 0 497 | 1,2,3 498 | 4 499 | 0 500 | 1,2,3 501 | 502 | 0,4 503 | 1,2,4 504 | 3 505 | 0 506 | 1,2 507 | 3,4 508 | 0 509 | 1,2 510 | 3 511 | 0,4 512 | 1,2,4 513 | 514 | 0,3 515 | 1,2 516 | 4 517 | 0,3 518 | 1,2 519 | 520 | 0,3,4 521 | 1,3,4 522 | 2 523 | 0 524 | 1,3 525 | 2,4 526 | 0 527 | 1,3 528 | 2 529 | 0,4 530 | 1,4 531 | 2,3 532 | 0 533 | 1 534 | 2,3,4 535 | 0 536 | 1 537 | 2,3 538 | 0,4 539 | 1,4 540 | 2 541 | 0,3 542 | 1 543 | 2,4 544 | 0,3 545 | 1 546 | 2 547 | 0,3,4 548 | 1,3,4 549 | 550 | 0,2 551 | 1,3 552 | 4 553 | 0,2 554 | 1,3 555 | 556 | 0,2,4 557 | 1,4 558 | 3 559 | 0,2 560 | 1 561 | 3,4 562 | 0,2 563 | 1 564 | 3 565 | 0,2,4 566 | 1,4 567 | 568 | 0,2,3 569 | 1 570 | 4 571 | 0,2,3 572 | 1 573 | 574 | 0,2,3,4 575 | 2,3,4 576 | 1 577 | 0 578 | 2,3 579 | 1,4 580 | 0 581 | 2,3 582 | 1 583 | 0,4 584 | 2,4 585 | 1,3 586 | 0 587 | 2 588 | 1,3,4 589 | 0 590 | 2 591 | 1,3 592 | 0,4 593 | 2,4 594 | 1 595 | 0,3 596 | 2 597 | 1,4 598 | 0,3 599 | 2 600 | 1 601 | 0,3,4 602 | 3,4 603 | 1,2 604 | 0 605 | 3 606 | 1,2,4 607 | 0 608 | 3 609 | 1,2 610 | 0,4 611 | 4 612 | 1,2,3 613 | 0 614 | 615 | 1,2,3,4 616 | 0 617 | 618 | 1,2,3 619 | 0,4 620 | 4 621 | 1,2 622 | 0,3 623 | 624 | 1,2,4 625 | 0,3 626 | 627 | 1,2 628 | 0,3,4 629 | 3,4 630 | 1 631 | 0,2 632 | 3 633 | 1,4 634 | 0,2 635 | 3 636 | 1 637 | 0,2,4 638 | 4 639 | 1,3 640 | 0,2 641 | 642 | 1,3,4 643 | 0,2 644 | 645 | 1,3 646 | 0,2,4 647 | 4 648 | 1 649 | 0,2,3 650 | 651 | 1,4 652 | 0,2,3 653 | 654 | 1 655 | 0,2,3,4 656 | 2,3,4 657 | 658 | 0,1 659 | 2,3 660 | 4 661 | 0,1 662 | 2,3 663 | 664 | 0,1,4 665 | 2,4 666 | 3 667 | 0,1 668 | 2 669 | 3,4 670 | 0,1 671 | 2 672 | 3 673 | 0,1,4 674 | 2,4 675 | 676 | 0,1,3 677 | 2 678 | 4 679 | 0,1,3 680 | 2 681 | 682 | 0,1,3,4 683 | 3,4 684 | 2 685 | 0,1 686 | 3 687 | 2,4 688 | 0,1 689 | 3 690 | 2 691 | 0,1,4 692 | 4 693 | 2,3 694 | 0,1 695 | 696 | 2,3,4 697 | 0,1 698 | 699 | 2,3 700 | 0,1,4 701 | 4 702 | 2 703 | 0,1,3 704 | 705 | 2,4 706 | 0,1,3 707 | 708 | 2 709 | 0,1,3,4 710 | 3,4 711 | 712 | 0,1,2 713 | 3 714 | 4 715 | 0,1,2 716 | 3 717 | 718 | 0,1,2,4 719 | 4 720 | 3 721 | 0,1,2 722 | 723 | 3,4 724 | 0,1,2 725 | 726 | 3 727 | 0,1,2,4 728 | 4 729 | 730 | 0,1,2,3 731 | 732 | 4 733 | 0,1,2,3 734 | 735 | 736 | 0,1,2,3,4 737 | 0,0,0,0,0 738 | 0,0,0,0,1 739 | 0,0,0,0,2 740 | 0,0,0,1,0 741 | 0,0,0,1,1 742 | 0,0,0,1,2 743 | 0,0,0,2,0 744 | 0,0,0,2,1 745 | 0,0,0,2,2 746 | 0,0,1,0,0 747 | 0,0,1,0,1 748 | 0,0,1,0,2 749 | 0,0,1,1,0 750 | 0,0,1,1,1 751 | 0,0,1,1,2 752 | 0,0,1,2,0 753 | 0,0,1,2,1 754 | 0,0,1,2,2 755 | 0,0,2,0,0 756 | 0,0,2,0,1 757 | 0,0,2,0,2 758 | 0,0,2,1,0 759 | 0,0,2,1,1 760 | 0,0,2,1,2 761 | 0,0,2,2,0 762 | 0,0,2,2,1 763 | 0,0,2,2,2 764 | 0,1,0,0,0 765 | 0,1,0,0,1 766 | 0,1,0,0,2 767 | 0,1,0,1,0 768 | 0,1,0,1,1 769 | 0,1,0,1,2 770 | 0,1,0,2,0 771 | 0,1,0,2,1 772 | 0,1,0,2,2 773 | 0,1,1,0,0 774 | 0,1,1,0,1 775 | 0,1,1,0,2 776 | 0,1,1,1,0 777 | 0,1,1,1,1 778 | 0,1,1,1,2 779 | 0,1,1,2,0 780 | 0,1,1,2,1 781 | 0,1,1,2,2 782 | 0,1,2,0,0 783 | 0,1,2,0,1 784 | 0,1,2,0,2 785 | 0,1,2,1,0 786 | 0,1,2,1,1 787 | 0,1,2,1,2 788 | 0,1,2,2,0 789 | 0,1,2,2,1 790 | 0,1,2,2,2 791 | 0,2,0,0,0 792 | 0,2,0,0,1 793 | 0,2,0,0,2 794 | 0,2,0,1,0 795 | 0,2,0,1,1 796 | 0,2,0,1,2 797 | 0,2,0,2,0 798 | 0,2,0,2,1 799 | 0,2,0,2,2 800 | 0,2,1,0,0 801 | 0,2,1,0,1 802 | 0,2,1,0,2 803 | 0,2,1,1,0 804 | 0,2,1,1,1 805 | 0,2,1,1,2 806 | 0,2,1,2,0 807 | 0,2,1,2,1 808 | 0,2,1,2,2 809 | 0,2,2,0,0 810 | 0,2,2,0,1 811 | 0,2,2,0,2 812 | 0,2,2,1,0 813 | 0,2,2,1,1 814 | 0,2,2,1,2 815 | 0,2,2,2,0 816 | 0,2,2,2,1 817 | 0,2,2,2,2 818 | 1,0,0,0,0 819 | 1,0,0,0,1 820 | 1,0,0,0,2 821 | 1,0,0,1,0 822 | 1,0,0,1,1 823 | 1,0,0,1,2 824 | 1,0,0,2,0 825 | 1,0,0,2,1 826 | 1,0,0,2,2 827 | 1,0,1,0,0 828 | 1,0,1,0,1 829 | 1,0,1,0,2 830 | 1,0,1,1,0 831 | 1,0,1,1,1 832 | 1,0,1,1,2 833 | 1,0,1,2,0 834 | 1,0,1,2,1 835 | 1,0,1,2,2 836 | 1,0,2,0,0 837 | 1,0,2,0,1 838 | 1,0,2,0,2 839 | 1,0,2,1,0 840 | 1,0,2,1,1 841 | 1,0,2,1,2 842 | 1,0,2,2,0 843 | 1,0,2,2,1 844 | 1,0,2,2,2 845 | 1,1,0,0,0 846 | 1,1,0,0,1 847 | 1,1,0,0,2 848 | 1,1,0,1,0 849 | 1,1,0,1,1 850 | 1,1,0,1,2 851 | 1,1,0,2,0 852 | 1,1,0,2,1 853 | 1,1,0,2,2 854 | 1,1,1,0,0 855 | 1,1,1,0,1 856 | 1,1,1,0,2 857 | 1,1,1,1,0 858 | 1,1,1,1,1 859 | 1,1,1,1,2 860 | 1,1,1,2,0 861 | 1,1,1,2,1 862 | 1,1,1,2,2 863 | 1,1,2,0,0 864 | 1,1,2,0,1 865 | 1,1,2,0,2 866 | 1,1,2,1,0 867 | 1,1,2,1,1 868 | 1,1,2,1,2 869 | 1,1,2,2,0 870 | 1,1,2,2,1 871 | 1,1,2,2,2 872 | 1,2,0,0,0 873 | 1,2,0,0,1 874 | 1,2,0,0,2 875 | 1,2,0,1,0 876 | 1,2,0,1,1 877 | 1,2,0,1,2 878 | 1,2,0,2,0 879 | 1,2,0,2,1 880 | 1,2,0,2,2 881 | 1,2,1,0,0 882 | 1,2,1,0,1 883 | 1,2,1,0,2 884 | 1,2,1,1,0 885 | 1,2,1,1,1 886 | 1,2,1,1,2 887 | 1,2,1,2,0 888 | 1,2,1,2,1 889 | 1,2,1,2,2 890 | 1,2,2,0,0 891 | 1,2,2,0,1 892 | 1,2,2,0,2 893 | 1,2,2,1,0 894 | 1,2,2,1,1 895 | 1,2,2,1,2 896 | 1,2,2,2,0 897 | 1,2,2,2,1 898 | 1,2,2,2,2 899 | 2,0,0,0,0 900 | 2,0,0,0,1 901 | 2,0,0,0,2 902 | 2,0,0,1,0 903 | 2,0,0,1,1 904 | 2,0,0,1,2 905 | 2,0,0,2,0 906 | 2,0,0,2,1 907 | 2,0,0,2,2 908 | 2,0,1,0,0 909 | 2,0,1,0,1 910 | 2,0,1,0,2 911 | 2,0,1,1,0 912 | 2,0,1,1,1 913 | 2,0,1,1,2 914 | 2,0,1,2,0 915 | 2,0,1,2,1 916 | 2,0,1,2,2 917 | 2,0,2,0,0 918 | 2,0,2,0,1 919 | 2,0,2,0,2 920 | 2,0,2,1,0 921 | 2,0,2,1,1 922 | 2,0,2,1,2 923 | 2,0,2,2,0 924 | 2,0,2,2,1 925 | 2,0,2,2,2 926 | 2,1,0,0,0 927 | 2,1,0,0,1 928 | 2,1,0,0,2 929 | 2,1,0,1,0 930 | 2,1,0,1,1 931 | 2,1,0,1,2 932 | 2,1,0,2,0 933 | 2,1,0,2,1 934 | 2,1,0,2,2 935 | 2,1,1,0,0 936 | 2,1,1,0,1 937 | 2,1,1,0,2 938 | 2,1,1,1,0 939 | 2,1,1,1,1 940 | 2,1,1,1,2 941 | 2,1,1,2,0 942 | 2,1,1,2,1 943 | 2,1,1,2,2 944 | 2,1,2,0,0 945 | 2,1,2,0,1 946 | 2,1,2,0,2 947 | 2,1,2,1,0 948 | 2,1,2,1,1 949 | 2,1,2,1,2 950 | 2,1,2,2,0 951 | 2,1,2,2,1 952 | 2,1,2,2,2 953 | 2,2,0,0,0 954 | 2,2,0,0,1 955 | 2,2,0,0,2 956 | 2,2,0,1,0 957 | 2,2,0,1,1 958 | 2,2,0,1,2 959 | 2,2,0,2,0 960 | 2,2,0,2,1 961 | 2,2,0,2,2 962 | 2,2,1,0,0 963 | 2,2,1,0,1 964 | 2,2,1,0,2 965 | 2,2,1,1,0 966 | 2,2,1,1,1 967 | 2,2,1,1,2 968 | 2,2,1,2,0 969 | 2,2,1,2,1 970 | 2,2,1,2,2 971 | 2,2,2,0,0 972 | 2,2,2,0,1 973 | 2,2,2,0,2 974 | 2,2,2,1,0 975 | 2,2,2,1,1 976 | 2,2,2,1,2 977 | 2,2,2,2,0 978 | 2,2,2,2,1 979 | 2,2,2,2,2 980 | 210 981 | 0,1,2 982 | 3,4 983 | 984 | 0,1,2 985 | 3 986 | 4 987 | 0,1,2 988 | 4 989 | 3 990 | 0,1,2 991 | 992 | 3,4 993 | 0,1,3 994 | 2,4 995 | 996 | 0,1,3 997 | 2 998 | 4 999 | 0,1,4 1000 | 2,3 1001 | 1002 | 0,1 1003 | 2,3,4 1004 | 1005 | 0,1 1006 | 2,3 1007 | 4 1008 | 0,1,4 1009 | 2 1010 | 3 1011 | 0,1 1012 | 2,4 1013 | 3 1014 | 0,1 1015 | 2 1016 | 3,4 1017 | 0,1,3 1018 | 4 1019 | 2 1020 | 0,1,3 1021 | 1022 | 2,4 1023 | 0,1,4 1024 | 3 1025 | 2 1026 | 0,1 1027 | 3,4 1028 | 2 1029 | 0,1 1030 | 3 1031 | 2,4 1032 | 0,1,4 1033 | 1034 | 2,3 1035 | 0,1 1036 | 4 1037 | 2,3 1038 | 0,1 1039 | 1040 | 2,3,4 1041 | 0,2,3 1042 | 1,4 1043 | 1044 | 0,2,3 1045 | 1 1046 | 4 1047 | 0,2,4 1048 | 1,3 1049 | 1050 | 0,2 1051 | 1,3,4 1052 | 1053 | 0,2 1054 | 1,3 1055 | 4 1056 | 0,2,4 1057 | 1 1058 | 3 1059 | 0,2 1060 | 1,4 1061 | 3 1062 | 0,2 1063 | 1 1064 | 3,4 1065 | 0,3,4 1066 | 1,2 1067 | 1068 | 0,3 1069 | 1,2,4 1070 | 1071 | 0,3 1072 | 1,2 1073 | 4 1074 | 0,4 1075 | 1,2,3 1076 | 1077 | 0 1078 | 1,2,3 1079 | 4 1080 | 0,4 1081 | 1,2 1082 | 3 1083 | 0 1084 | 1,2,4 1085 | 3 1086 | 0 1087 | 1,2 1088 | 3,4 1089 | 0,3,4 1090 | 1 1091 | 2 1092 | 0,3 1093 | 1,4 1094 | 2 1095 | 0,3 1096 | 1 1097 | 2,4 1098 | 0,4 1099 | 1,3 1100 | 2 1101 | 0 1102 | 1,3,4 1103 | 2 1104 | 0 1105 | 1,3 1106 | 2,4 1107 | 0,4 1108 | 1 1109 | 2,3 1110 | 0 1111 | 1,4 1112 | 2,3 1113 | 0 1114 | 1 1115 | 2,3,4 1116 | 0,2,3 1117 | 4 1118 | 1 1119 | 0,2,3 1120 | 1121 | 1,4 1122 | 0,2,4 1123 | 3 1124 | 1 1125 | 0,2 1126 | 3,4 1127 | 1 1128 | 0,2 1129 | 3 1130 | 1,4 1131 | 0,2,4 1132 | 1133 | 1,3 1134 | 0,2 1135 | 4 1136 | 1,3 1137 | 0,2 1138 | 1139 | 1,3,4 1140 | 0,3,4 1141 | 2 1142 | 1 1143 | 0,3 1144 | 2,4 1145 | 1 1146 | 0,3 1147 | 2 1148 | 1,4 1149 | 0,4 1150 | 2,3 1151 | 1 1152 | 0 1153 | 2,3,4 1154 | 1 1155 | 0 1156 | 2,3 1157 | 1,4 1158 | 0,4 1159 | 2 1160 | 1,3 1161 | 0 1162 | 2,4 1163 | 1,3 1164 | 0 1165 | 2 1166 | 1,3,4 1167 | 0,3,4 1168 | 1169 | 1,2 1170 | 0,3 1171 | 4 1172 | 1,2 1173 | 0,3 1174 | 1175 | 1,2,4 1176 | 0,4 1177 | 3 1178 | 1,2 1179 | 0 1180 | 3,4 1181 | 1,2 1182 | 0 1183 | 3 1184 | 1,2,4 1185 | 0,4 1186 | 1187 | 1,2,3 1188 | 0 1189 | 4 1190 | 1,2,3 1191 | 1,2,3 1192 | 0,4 1193 | 1194 | 1,2,3 1195 | 0 1196 | 4 1197 | 1,2,4 1198 | 0,3 1199 | 1200 | 1,2 1201 | 0,3,4 1202 | 1203 | 1,2 1204 | 0,3 1205 | 4 1206 | 1,2,4 1207 | 0 1208 | 3 1209 | 1,2 1210 | 0,4 1211 | 3 1212 | 1,2 1213 | 0 1214 | 3,4 1215 | 1,3,4 1216 | 0,2 1217 | 1218 | 1,3 1219 | 0,2,4 1220 | 1221 | 1,3 1222 | 0,2 1223 | 4 1224 | 1,4 1225 | 0,2,3 1226 | 1227 | 1 1228 | 0,2,3 1229 | 4 1230 | 1,4 1231 | 0,2 1232 | 3 1233 | 1 1234 | 0,2,4 1235 | 3 1236 | 1 1237 | 0,2 1238 | 3,4 1239 | 1,3,4 1240 | 0 1241 | 2 1242 | 1,3 1243 | 0,4 1244 | 2 1245 | 1,3 1246 | 0 1247 | 2,4 1248 | 1,4 1249 | 0,3 1250 | 2 1251 | 1 1252 | 0,3,4 1253 | 2 1254 | 1 1255 | 0,3 1256 | 2,4 1257 | 1,4 1258 | 0 1259 | 2,3 1260 | 1 1261 | 0,4 1262 | 2,3 1263 | 1 1264 | 0 1265 | 2,3,4 1266 | 2,3,4 1267 | 0,1 1268 | 1269 | 2,3 1270 | 0,1,4 1271 | 1272 | 2,3 1273 | 0,1 1274 | 4 1275 | 2,4 1276 | 0,1,3 1277 | 1278 | 2 1279 | 0,1,3 1280 | 4 1281 | 2,4 1282 | 0,1 1283 | 3 1284 | 2 1285 | 0,1,4 1286 | 3 1287 | 2 1288 | 0,1 1289 | 3,4 1290 | 3,4 1291 | 0,1,2 1292 | 1293 | 3 1294 | 0,1,2 1295 | 4 1296 | 4 1297 | 0,1,2 1298 | 3 1299 | 1300 | 0,1,2 1301 | 3,4 1302 | 3,4 1303 | 0,1 1304 | 2 1305 | 3 1306 | 0,1,4 1307 | 2 1308 | 3 1309 | 0,1 1310 | 2,4 1311 | 4 1312 | 0,1,3 1313 | 2 1314 | 1315 | 0,1,3 1316 | 2,4 1317 | 4 1318 | 0,1 1319 | 2,3 1320 | 1321 | 0,1,4 1322 | 2,3 1323 | 1324 | 0,1 1325 | 2,3,4 1326 | 2,3,4 1327 | 0 1328 | 1 1329 | 2,3 1330 | 0,4 1331 | 1 1332 | 2,3 1333 | 0 1334 | 1,4 1335 | 2,4 1336 | 0,3 1337 | 1 1338 | 2 1339 | 0,3,4 1340 | 1 1341 | 2 1342 | 0,3 1343 | 1,4 1344 | 2,4 1345 | 0 1346 | 1,3 1347 | 2 1348 | 0,4 1349 | 1,3 1350 | 2 1351 | 0 1352 | 1,3,4 1353 | 3,4 1354 | 0,2 1355 | 1 1356 | 3 1357 | 0,2,4 1358 | 1 1359 | 3 1360 | 0,2 1361 | 1,4 1362 | 4 1363 | 0,2,3 1364 | 1 1365 | 1366 | 0,2,3 1367 | 1,4 1368 | 4 1369 | 0,2 1370 | 1,3 1371 | 1372 | 0,2,4 1373 | 1,3 1374 | 1375 | 0,2 1376 | 1,3,4 1377 | 3,4 1378 | 0 1379 | 1,2 1380 | 3 1381 | 0,4 1382 | 1,2 1383 | 3 1384 | 0 1385 | 1,2,4 1386 | 4 1387 | 0,3 1388 | 1,2 1389 | 1390 | 0,3,4 1391 | 1,2 1392 | 1393 | 0,3 1394 | 1,2,4 1395 | 4 1396 | 0 1397 | 1,2,3 1398 | 1399 | 0,4 1400 | 1,2,3 1401 | 1,2,3 1402 | 4 1403 | 0 1404 | 1,2,3 1405 | 1406 | 0,4 1407 | 1,2,4 1408 | 3 1409 | 0 1410 | 1,2 1411 | 3,4 1412 | 0 1413 | 1,2 1414 | 3 1415 | 0,4 1416 | 1,2,4 1417 | 1418 | 0,3 1419 | 1,2 1420 | 4 1421 | 0,3 1422 | 1,2 1423 | 1424 | 0,3,4 1425 | 1,3,4 1426 | 2 1427 | 0 1428 | 1,3 1429 | 2,4 1430 | 0 1431 | 1,3 1432 | 2 1433 | 0,4 1434 | 1,4 1435 | 2,3 1436 | 0 1437 | 1 1438 | 2,3,4 1439 | 0 1440 | 1 1441 | 2,3 1442 | 0,4 1443 | 1,4 1444 | 2 1445 | 0,3 1446 | 1 1447 | 2,4 1448 | 0,3 1449 | 1 1450 | 2 1451 | 0,3,4 1452 | 1,3,4 1453 | 1454 | 0,2 1455 | 1,3 1456 | 4 1457 | 0,2 1458 | 1,3 1459 | 1460 | 0,2,4 1461 | 1,4 1462 | 3 1463 | 0,2 1464 | 1 1465 | 3,4 1466 | 0,2 1467 | 1 1468 | 3 1469 | 0,2,4 1470 | 1,4 1471 | 1472 | 0,2,3 1473 | 1 1474 | 4 1475 | 0,2,3 1476 | 2,3,4 1477 | 1 1478 | 0 1479 | 2,3 1480 | 1,4 1481 | 0 1482 | 2,3 1483 | 1 1484 | 0,4 1485 | 2,4 1486 | 1,3 1487 | 0 1488 | 2 1489 | 1,3,4 1490 | 0 1491 | 2 1492 | 1,3 1493 | 0,4 1494 | 2,4 1495 | 1 1496 | 0,3 1497 | 2 1498 | 1,4 1499 | 0,3 1500 | 2 1501 | 1 1502 | 0,3,4 1503 | 3,4 1504 | 1,2 1505 | 0 1506 | 3 1507 | 1,2,4 1508 | 0 1509 | 3 1510 | 1,2 1511 | 0,4 1512 | 4 1513 | 1,2,3 1514 | 0 1515 | 1516 | 1,2,3 1517 | 0,4 1518 | 4 1519 | 1,2 1520 | 0,3 1521 | 1522 | 1,2,4 1523 | 0,3 1524 | 1525 | 1,2 1526 | 0,3,4 1527 | 3,4 1528 | 1 1529 | 0,2 1530 | 3 1531 | 1,4 1532 | 0,2 1533 | 3 1534 | 1 1535 | 0,2,4 1536 | 4 1537 | 1,3 1538 | 0,2 1539 | 1540 | 1,3,4 1541 | 0,2 1542 | 1543 | 1,3 1544 | 0,2,4 1545 | 4 1546 | 1 1547 | 0,2,3 1548 | 1549 | 1,4 1550 | 0,2,3 1551 | 2,3,4 1552 | 1553 | 0,1 1554 | 2,3 1555 | 4 1556 | 0,1 1557 | 2,3 1558 | 1559 | 0,1,4 1560 | 2,4 1561 | 3 1562 | 0,1 1563 | 2 1564 | 3,4 1565 | 0,1 1566 | 2 1567 | 3 1568 | 0,1,4 1569 | 2,4 1570 | 1571 | 0,1,3 1572 | 2 1573 | 4 1574 | 0,1,3 1575 | 3,4 1576 | 2 1577 | 0,1 1578 | 3 1579 | 2,4 1580 | 0,1 1581 | 3 1582 | 2 1583 | 0,1,4 1584 | 4 1585 | 2,3 1586 | 0,1 1587 | 1588 | 2,3,4 1589 | 0,1 1590 | 1591 | 2,3 1592 | 0,1,4 1593 | 4 1594 | 2 1595 | 0,1,3 1596 | 1597 | 2,4 1598 | 0,1,3 1599 | 3,4 1600 | 1601 | 0,1,2 1602 | 3 1603 | 4 1604 | 0,1,2 1605 | 4 1606 | 3 1607 | 0,1,2 1608 | 1609 | 3,4 1610 | 0,1,2 1611 | 0,0,0,1,1 1612 | 0,0,0,1,2 1613 | 0,0,0,2,1 1614 | 0,0,0,2,2 1615 | 0,0,1,0,1 1616 | 0,0,1,0,2 1617 | 0,0,1,1,0 1618 | 0,0,1,1,1 1619 | 0,0,1,1,2 1620 | 0,0,1,2,0 1621 | 0,0,1,2,1 1622 | 0,0,1,2,2 1623 | 0,0,2,0,1 1624 | 0,0,2,0,2 1625 | 0,0,2,1,0 1626 | 0,0,2,1,1 1627 | 0,0,2,1,2 1628 | 0,0,2,2,0 1629 | 0,0,2,2,1 1630 | 0,0,2,2,2 1631 | 0,1,0,0,1 1632 | 0,1,0,0,2 1633 | 0,1,0,1,0 1634 | 0,1,0,1,1 1635 | 0,1,0,1,2 1636 | 0,1,0,2,0 1637 | 0,1,0,2,1 1638 | 0,1,0,2,2 1639 | 0,1,1,0,0 1640 | 0,1,1,0,1 1641 | 0,1,1,0,2 1642 | 0,1,1,1,0 1643 | 0,1,1,1,2 1644 | 0,1,1,2,0 1645 | 0,1,1,2,1 1646 | 0,1,1,2,2 1647 | 0,1,2,0,0 1648 | 0,1,2,0,1 1649 | 0,1,2,0,2 1650 | 0,1,2,1,0 1651 | 0,1,2,1,1 1652 | 0,1,2,1,2 1653 | 0,1,2,2,0 1654 | 0,1,2,2,1 1655 | 0,1,2,2,2 1656 | 0,2,0,0,1 1657 | 0,2,0,0,2 1658 | 0,2,0,1,0 1659 | 0,2,0,1,1 1660 | 0,2,0,1,2 1661 | 0,2,0,2,0 1662 | 0,2,0,2,1 1663 | 0,2,0,2,2 1664 | 0,2,1,0,0 1665 | 0,2,1,0,1 1666 | 0,2,1,0,2 1667 | 0,2,1,1,0 1668 | 0,2,1,1,1 1669 | 0,2,1,1,2 1670 | 0,2,1,2,0 1671 | 0,2,1,2,1 1672 | 0,2,1,2,2 1673 | 0,2,2,0,0 1674 | 0,2,2,0,1 1675 | 0,2,2,0,2 1676 | 0,2,2,1,0 1677 | 0,2,2,1,1 1678 | 0,2,2,1,2 1679 | 0,2,2,2,0 1680 | 0,2,2,2,1 1681 | 1,0,0,0,1 1682 | 1,0,0,0,2 1683 | 1,0,0,1,0 1684 | 1,0,0,1,1 1685 | 1,0,0,1,2 1686 | 1,0,0,2,0 1687 | 1,0,0,2,1 1688 | 1,0,0,2,2 1689 | 1,0,1,0,0 1690 | 1,0,1,0,1 1691 | 1,0,1,0,2 1692 | 1,0,1,1,0 1693 | 1,0,1,1,2 1694 | 1,0,1,2,0 1695 | 1,0,1,2,1 1696 | 1,0,1,2,2 1697 | 1,0,2,0,0 1698 | 1,0,2,0,1 1699 | 1,0,2,0,2 1700 | 1,0,2,1,0 1701 | 1,0,2,1,1 1702 | 1,0,2,1,2 1703 | 1,0,2,2,0 1704 | 1,0,2,2,1 1705 | 1,0,2,2,2 1706 | 1,1,0,0,0 1707 | 1,1,0,0,1 1708 | 1,1,0,0,2 1709 | 1,1,0,1,0 1710 | 1,1,0,1,2 1711 | 1,1,0,2,0 1712 | 1,1,0,2,1 1713 | 1,1,0,2,2 1714 | 1,1,1,0,0 1715 | 1,1,1,0,2 1716 | 1,1,1,2,0 1717 | 1,1,1,2,2 1718 | 1,1,2,0,0 1719 | 1,1,2,0,1 1720 | 1,1,2,0,2 1721 | 1,1,2,1,0 1722 | 1,1,2,1,2 1723 | 1,1,2,2,0 1724 | 1,1,2,2,1 1725 | 1,1,2,2,2 1726 | 1,2,0,0,0 1727 | 1,2,0,0,1 1728 | 1,2,0,0,2 1729 | 1,2,0,1,0 1730 | 1,2,0,1,1 1731 | 1,2,0,1,2 1732 | 1,2,0,2,0 1733 | 1,2,0,2,1 1734 | 1,2,0,2,2 1735 | 1,2,1,0,0 1736 | 1,2,1,0,1 1737 | 1,2,1,0,2 1738 | 1,2,1,1,0 1739 | 1,2,1,1,2 1740 | 1,2,1,2,0 1741 | 1,2,1,2,1 1742 | 1,2,1,2,2 1743 | 1,2,2,0,0 1744 | 1,2,2,0,1 1745 | 1,2,2,0,2 1746 | 1,2,2,1,0 1747 | 1,2,2,1,1 1748 | 1,2,2,1,2 1749 | 1,2,2,2,0 1750 | 1,2,2,2,1 1751 | 2,0,0,0,1 1752 | 2,0,0,0,2 1753 | 2,0,0,1,0 1754 | 2,0,0,1,1 1755 | 2,0,0,1,2 1756 | 2,0,0,2,0 1757 | 2,0,0,2,1 1758 | 2,0,0,2,2 1759 | 2,0,1,0,0 1760 | 2,0,1,0,1 1761 | 2,0,1,0,2 1762 | 2,0,1,1,0 1763 | 2,0,1,1,1 1764 | 2,0,1,1,2 1765 | 2,0,1,2,0 1766 | 2,0,1,2,1 1767 | 2,0,1,2,2 1768 | 2,0,2,0,0 1769 | 2,0,2,0,1 1770 | 2,0,2,0,2 1771 | 2,0,2,1,0 1772 | 2,0,2,1,1 1773 | 2,0,2,1,2 1774 | 2,0,2,2,0 1775 | 2,0,2,2,1 1776 | 2,1,0,0,0 1777 | 2,1,0,0,1 1778 | 2,1,0,0,2 1779 | 2,1,0,1,0 1780 | 2,1,0,1,1 1781 | 2,1,0,1,2 1782 | 2,1,0,2,0 1783 | 2,1,0,2,1 1784 | 2,1,0,2,2 1785 | 2,1,1,0,0 1786 | 2,1,1,0,1 1787 | 2,1,1,0,2 1788 | 2,1,1,1,0 1789 | 2,1,1,1,2 1790 | 2,1,1,2,0 1791 | 2,1,1,2,1 1792 | 2,1,1,2,2 1793 | 2,1,2,0,0 1794 | 2,1,2,0,1 1795 | 2,1,2,0,2 1796 | 2,1,2,1,0 1797 | 2,1,2,1,1 1798 | 2,1,2,1,2 1799 | 2,1,2,2,0 1800 | 2,1,2,2,1 1801 | 2,2,0,0,0 1802 | 2,2,0,0,1 1803 | 2,2,0,0,2 1804 | 2,2,0,1,0 1805 | 2,2,0,1,1 1806 | 2,2,0,1,2 1807 | 2,2,0,2,0 1808 | 2,2,0,2,1 1809 | 2,2,1,0,0 1810 | 2,2,1,0,1 1811 | 2,2,1,0,2 1812 | 2,2,1,1,0 1813 | 2,2,1,1,1 1814 | 2,2,1,1,2 1815 | 2,2,1,2,0 1816 | 2,2,1,2,1 1817 | 2,2,2,0,0 1818 | 2,2,2,0,1 1819 | 2,2,2,1,0 1820 | 2,2,2,1,1 1821 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning 2 | env.py can create the environment. 3 | DQN.py is the function code of DQN. 4 | train_DQN.py train the DQN model. 5 | test_DQN.py test the DQN model. 6 | pdqn.py is the function code of PQDN, and it can test the PDQN model. 7 | train_PDQN.py train the PDQN model. 8 | -------------------------------------------------------------------------------- /__pycache__/DQN.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/DQN.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/agent.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/env.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/pdqn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/pdqn.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/tool.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/tool.cpython-37.pyc -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | class Agent(object): 3 | """ 4 | Defines a basic reinforcement learning agent for OpenAI Gym environments 5 | """ 6 | 7 | NAME = "Abstract Agent" 8 | 9 | def __init__(self, observation_space, action_space): 10 | super().__init__() 11 | self.observation_space = observation_space 12 | self.action_space = action_space 13 | 14 | def act(self, state): 15 | """ 16 | Determines the action to take in the given state. 17 | 18 | :param state: 19 | :return: 20 | """ 21 | raise NotImplementedError 22 | 23 | def step(self, state, action, reward, next_state, next_action, terminal, time_steps=1): 24 | """ 25 | Performs a learning step given a (s,a,r,s',a') sample. 26 | 27 | :param state: previous observed state (s) 28 | :param action: action taken in previous state (a) 29 | :param reward: reward for the transition (r) 30 | :param next_state: the resulting observed state (s') 31 | :param next_action: action taken in next state (a') 32 | :param terminal: whether the episode is over 33 | :param time_steps: number of time steps the action took to execute (default=1) 34 | :return: 35 | """ 36 | raise NotImplementedError 37 | 38 | def start_episode(self): 39 | """ 40 | Perform any initialisation for the start of an episode. 41 | 42 | :return: 43 | """ 44 | raise NotImplementedError 45 | 46 | def end_episode(self): 47 | """ 48 | Performs any cleanup before the next episode. 49 | 50 | :return: 51 | """ 52 | raise NotImplementedError 53 | 54 | def __str__(self): 55 | desc = self.NAME 56 | return desc 57 | -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | #!/usr/bin/env python3 3 | # -*- coding: utf-8 -*- 4 | """ 5 | Created on Fri Jul 26 02:02:15 2019 6 | 7 | @author: kuo 8 | """ 9 | 10 | 11 | import numpy as np 12 | #from itertools import combinations 13 | import random 14 | import matplotlib.pyplot as plt 15 | import math 16 | import csv 17 | import scipy.stats as st 18 | import copy 19 | 20 | 21 | class env_PowerAllocation(object): 22 | def __init__(self,nMBS=1,lambda1=0,lambda2=0,lambda3=0,MAXepisode=1500,n_baseline=6): 23 | super(env_PowerAllocation, self).__init__() 24 | """setting---------------------------------------------------------------------""" 25 | self.nMBS = 1 26 | self.nSBS = 3 # 5 # or J 27 | self.nTP = self.nMBS+self.nSBS 28 | self.nUE = 5 #8 # or K 29 | self.rMBS = 500 # in m 30 | self.dmin = 10 31 | self.P_Max_MBS = 10**(4.3-3) # in 19.95 W 32 | self.P_Max_SBS = 10**(2.4-3) # in 0.25 W 33 | self.Pc_MBS = 130 # in W 34 | self.Pc_SBS = 0.5 #6.8# in W 35 | self.NT = 100 36 | self.Ng= 20 37 | self.N = 3 #5#200 # number of subchannel 38 | #self.sigma_MBS = 10**(0.6) # in 6 dB 39 | #self.sigma_SBS = 10**(0.4) 40 | self.subB = 15000 # in Hz 41 | #self.B = self.subB*self.N 42 | #self.B_MBS2SBS = self.B/self.nSBS 43 | self.Noise = (10**(-17.4))*0.001 # W/Hz 44 | self.SINR_threshold = 1 # 0dB / 1dB / 2dB 45 | self.Throughput_UE_threshold = np.log2(1+self.SINR_threshold) # 1/1.1756/ 1.37 46 | self.lambda1=lambda1 47 | self.lambda2=lambda2 48 | self.lambda3=lambda3 49 | self.ori_sizeTable = self.nSBS**self.nUE # all possible association 50 | self.s_dim = (self.nUE+1)*self.nSBS+self.nUE # state dimension 51 | #self.Throughput_SBS_threshold = [ 50 for i in range(self.nSBS)] 52 | ######################################################################## limit of continuous parameters corresponding to each discrete action 53 | self.parameter_min=[np.array([0 for i in range(self.nUE)]) ] #P_min_SBS 54 | self.parameter_max=[np.array([self.P_Max_SBS for i in range(self.nUE)]) ] 55 | ######################################################################## avoid inf or divide 0 56 | self.delta_min = 10**(-20) 57 | self.delta_max = 10**(20) 58 | self.SINR_min=10**(-5.5) #-20dB 59 | self.SINR_max=10**(5.5) # 20dB 60 | ######################################################################## debug or analysis 61 | debug_I={str(i):{'UE'+str(j):[] for j in range(self.nUE)} for i in range(MAXepisode)} # I, intra-cluster, inter-cluster 62 | #self.debug_channel={str(i):[]for i in range(MAXepisode)} 63 | debug_UE_throughput={str(i):[]for i in range(MAXepisode)} # each UE throughput 64 | debug_SBS_throughput={str(i):[]for i in range(MAXepisode)} 65 | debug_SBS_threshold={str(i):[]for i in range(MAXepisode)} 66 | debug_c={str(i):[]for i in range(MAXepisode)} # user association 67 | debug_p={str(i):[]for i in range(MAXepisode)} # power allocation 68 | debug_backhaul={str(i):{}for i in range(MAXepisode)} # which episode and step violate backhaul constraint & SBS index 69 | debug_QoS={str(i):{}for i in range(MAXepisode)} # which episode and step violate QoS constraint & UE index 70 | debug_system_throughput={str(i):[]for i in range(MAXepisode)} 71 | debug_system_energy={str(i):[]for i in range(MAXepisode)} 72 | #----------------------------------------------------------------------- for all n_baseline methods 73 | self.debug_I={str(i):copy.deepcopy(debug_I) for i in range(n_baseline+1)} 74 | self.debug_UE_throughput={str(i):copy.deepcopy(debug_UE_throughput) for i in range(n_baseline+1)} 75 | self.debug_SBS_throughput={str(i):copy.deepcopy(debug_SBS_throughput) for i in range(n_baseline+1)} 76 | self.debug_SBS_threshold={str(i):debug_SBS_threshold for i in range(n_baseline+1)} 77 | self.debug_c={str(i):copy.deepcopy(debug_c) for i in range(n_baseline+1)} 78 | self.debug_p={str(i):copy.deepcopy(debug_p) for i in range(n_baseline+1)} 79 | self.debug_backhaul={str(i):copy.deepcopy(debug_backhaul) for i in range(n_baseline+1)} 80 | self.debug_QoS={str(i):copy.deepcopy(debug_QoS) for i in range(n_baseline+1)} 81 | self.debug_system_throughput={str(i):copy.deepcopy(debug_system_throughput) for i in range(n_baseline+1)} 82 | self.debug_system_energy={str(i):copy.deepcopy(debug_system_energy) for i in range(n_baseline+1)} 83 | 84 | 85 | 86 | def new(self,name): 87 | # to create new "Network Geometry" 88 | # Uniform distribution of SBSs and UEs 89 | # SBS and UE at least far 10 meters from MBS --> if violate, print something and need to create a new SBS-UE distribution again 90 | SBS_R,SBS_A = np.random.uniform(self.dmin,self.rMBS,self.nSBS), np.random.uniform(0,2*math.pi,self.nSBS) 91 | self.xSBS,self.ySBS = [ r*math.cos(a) for r,a in zip(SBS_R,SBS_A)],[ r*math.sin(a) for r,a in zip(SBS_R,SBS_A)] 92 | UE_R,UE_A = np.random.uniform(self.dmin,self.rMBS,self.nUE), np.random.uniform(0,2*math.pi,self.nUE) 93 | self.xUE,self.yUE = [ r*math.cos(a) for r,a in zip(UE_R,UE_A)],[ r*math.sin(a) for r,a in zip(UE_R,UE_A)] 94 | # for pathloss 95 | self.dSBS2UE=[ ((self.xUE-x)**2+(self.yUE-y)**2)**0.5 for x,y in zip(self.xSBS,self.ySBS)] 96 | self.dMBS2SBS=[((x)**2+(y)**2)**0.5 for x,y in zip(self.xSBS,self.ySBS)] 97 | print('dSBS2UE=',self.dSBS2UE,'\n') 98 | print('dMBS2SBS=',self.dMBS2SBS,'\n') 99 | # check distance 100 | for iSBS,D in enumerate(self.dSBS2UE): 101 | for iUE,d in enumerate(list(D)): 102 | if d < 10: 103 | print('SBS '+str(iSBS)+' UE '+str(iUE)+' too close') 104 | if d>2000: 105 | print('SBS '+str(iSBS)+' UE '+str(iUE)+' too far') 106 | for i,d in enumerate(self.dMBS2SBS): 107 | if d < 10: 108 | print('SBS '+str(i)+' MBS too close') 109 | if d > 5000: 110 | print('SBS '+str(i)+' MBS too far') 111 | 112 | """2)Plot""" 113 | self.plotNetwork(name) 114 | """3)Build Table""" 115 | self.build_table() 116 | chosen_c=np.random.choice([i for i in range(self.sizeTable)]) 117 | self.chosen_TP2UE=self.TP2UE[chosen_c] 118 | self.chosen_UE2TP=self.UE2TP[chosen_c] 119 | """4)Initialize state""" 120 | self.channel() 121 | """5)Store location and channel gain """ 122 | self.writeCSV(name) 123 | 124 | 125 | def load(self,name): 126 | # to build the used env 127 | # 1)load 128 | self.readCSV(name) 129 | # 2)plotNetwork 130 | self.plotNetwork(name) 131 | # 3)Build Table 132 | # 4)for pathloss 133 | self.dSBS2UE=[ ((np.array(self.xUE)-x)**2+(np.array(self.yUE)-y)**2)**0.5 for x,y in zip(np.array(self.xSBS),np.array(self.ySBS))] 134 | self.dMBS2SBS=[((x)**2+(y)**2)**0.5 for x,y in zip(np.array(self.xSBS),np.array(self.ySBS))] 135 | # 5)action_space 136 | self.action_space=(self.sizeTable,[(self.parameter_min[0],self.parameter_max[0]) for i in range(self.sizeTable)]) 137 | 138 | def reset(self): 139 | """1)Initialize channel/ state""" 140 | #self.channel() 141 | c = np.random.randint(low=0, high=self.sizeTable, size= 1)[0] ##################### 142 | #P = np.array([ self.P_Max_SBS for i in range(self.nUE) ]) 143 | P = np.random.uniform(0,self.P_Max_SBS*0.1,self.nUE).flatten() 144 | _, _,s,_,_ ,_,_= self.step(c,P,False,True,'0',0,0) 145 | #self.channel() 146 | #inits = list(st.norm(0, 1).rvs(self.nUE)) 147 | #initG = list(self.G.T.flatten()) 148 | return s#inits,initG 149 | 150 | def plotNetwork(self,name): 151 | # 1)plot TP & UE 152 | plt.figure(figsize=(5,5)) 153 | plt.scatter([0],[0],s=80,c='red',marker='o',alpha=0.5,label='MBS') 154 | plt.scatter(self.xSBS,self.ySBS,s=50,c='green',marker='D',alpha=0.5,label='SBS') 155 | plt.scatter(self.xUE,self.yUE,s=50,c='blue',marker='*',alpha=0.5,label='UE') 156 | # 2)Display index 157 | plt.annotate("0", xy=(0,0), xytext=(0, 0)) 158 | cnt=1 159 | for x,y in zip(self.xSBS,self.ySBS): 160 | plt.annotate("%s" % cnt, xy=(x,y), xytext=(x, y)) 161 | cnt = cnt+1 162 | cnt=1 163 | for x,y in zip(self.xUE,self.yUE): 164 | plt.annotate("%s" % cnt, xy=(x,y), xytext=(x, y)) 165 | cnt = cnt+1 166 | margin=50 167 | plt.xlim((-self.rMBS-margin, self.rMBS+margin)) 168 | plt.ylim((-self.rMBS-margin, self.rMBS+margin)) 169 | plt.title('Network Geometry ') 170 | plt.xlabel('Distance(m)') 171 | plt.ylabel('Distance(m)') 172 | plt.legend(loc='upper right') 173 | plt.savefig(name+'.png') 174 | plt.show() 175 | print('SBS Location') 176 | for i in range(self.nSBS): 177 | print(i,' (',self.xSBS[i],',',self.ySBS[i],')') 178 | print('UE Location') 179 | for i in range(self.nUE): 180 | print(i,' (',self.xUE[i],',',self.yUE[i],')') 181 | 182 | def writeCSV(self,name): 183 | with open(name+'.csv','w',newline='') as csvfile: 184 | writer = csv.writer(csvfile) 185 | # write SBS, UE location 186 | writer.writerow(self.xSBS) 187 | writer.writerow(self.ySBS) 188 | writer.writerow(self.xUE) 189 | writer.writerow(self.yUE) 190 | # write channel gain 191 | for i in list(self.G): 192 | writer.writerow(i) 193 | # write ori_TP2UE 194 | for key,lis in self.ori_TP2UE.items(): 195 | for i in lis: 196 | writer.writerow(i) 197 | # write ori_UE2TP 198 | for key,lis in self.ori_UE2TP.items(): 199 | writer.writerow(lis) 200 | # write sizeTable ?? ############################################## 201 | writer.writerow([self.sizeTable]) 202 | # write TP2UE 203 | for key,lis in self.TP2UE.items(): 204 | for i in lis: 205 | writer.writerow(i) 206 | # write UE2TP 207 | for key,lis in self.UE2TP.items(): 208 | writer.writerow(lis) 209 | 210 | def readCSV(self,FileName): 211 | self.ori_TP2UE={i:[] for i in range(self.ori_sizeTable)} 212 | self.ori_UE2TP={} 213 | with open(FileName+'.csv', newline='') as csvfile: 214 | rows = csv.reader(csvfile) 215 | rows = list(rows) 216 | # read SBS, UE location 217 | self.xSBS,self.ySBS=[ float(i) for i in rows[0]],[ float(i) for i in rows[1]] 218 | self.xUE,self.yUE=[ float(i) for i in rows[2]],[ float(i) for i in rows[3]] 219 | # read channel gain 220 | self.G=np.array([float(i) for lis in rows[4:4+self.nSBS] for i in lis]).reshape(self.nSBS,self.nUE+1) 221 | # read ori_TP2UE 222 | cnt=4+self.nSBS 223 | for i in range(self.ori_sizeTable): 224 | for j in range(self.nSBS): 225 | self.ori_TP2UE[i].append([int(v) for v in rows[cnt]]) 226 | cnt=cnt+1 227 | # read ori_UE2TP 228 | for i in range(self.ori_sizeTable): 229 | self.ori_UE2TP[i]=[int(v) for v in rows[cnt]] 230 | cnt=cnt+1 231 | # read sizeTable ?? 232 | self.sizeTable=int(rows[cnt][0]) 233 | self.TP2UE={i:[] for i in range(self.sizeTable)} 234 | self.UE2TP={} 235 | cnt=cnt+1 236 | # read TP2UE 237 | for i in range(self.sizeTable): 238 | for j in range(self.nSBS): 239 | self.TP2UE[i].append([int(v) for v in rows[cnt]]) 240 | cnt=cnt+1 241 | # read UE2TP 242 | for i in range(self.sizeTable): 243 | self.UE2TP[i]=[int(v) for v in rows[cnt]] 244 | cnt=cnt+1 245 | # action dimension 246 | self.a_dim = self.nUE+ self.sizeTable 247 | 248 | def index_list(self,l): 249 | # l=[1,1,2,3,5,5] 250 | # L_list =[[], [0, 1], [2], [3], [], [4, 5]] 251 | # invalid: True, need to delete this action 252 | L_list=[] 253 | invalid= False 254 | for i in range(self.nSBS): 255 | loc=[] 256 | c=l.count(i) 257 | if c>self.N: 258 | invalid = True 259 | while c!=0: 260 | loc.append(l.index(i)) 261 | l[l.index(i)]=self.nSBS+1 262 | c=l.count(i) 263 | L_list.append(loc) 264 | return L_list, invalid 265 | 266 | def build_table(self): 267 | # build table for 1) all possible associations --> ori_UE2TP / ori_TP2UE / ori_sizeTable 268 | # 2) those expect that violates cluster size constraint --> UE2TP / TP2UE / sizeTable 269 | """ori_UE2TP""" 270 | mask=[[i] for i in range(self.nSBS)] 271 | l2=[mask[i]+mask[j] for i in range(self.nSBS)for j in range(self.nSBS)] 272 | for cnt in range(self.nUE-2): 273 | l2=[l2[i]+mask[j] for i in range(len(l2))for j in range(self.nSBS)] 274 | self.ori_UE2TP = {i:l2[i] for i in range(len(l2))} 275 | """ori_TP2UE""" 276 | invalid_list=[] 277 | self.ori_TP2UE={} 278 | for key in self.ori_UE2TP: 279 | self.ori_TP2UE[key], invalid = self.index_list(self.ori_UE2TP[key].copy()) 280 | if invalid: 281 | invalid_list.append(key) 282 | self.ori_sizeTable=len(self.ori_UE2TP) 283 | """Check if action is invalid""" 284 | self.TP2UE=self.ori_TP2UE.copy() 285 | self.UE2TP=self.ori_UE2TP.copy() 286 | for i in invalid_list: 287 | self.TP2UE.pop(i) 288 | self.UE2TP.pop(i) 289 | """Re-create """ 290 | temp={} 291 | for i,key in enumerate(self.TP2UE): 292 | temp[i]=self.TP2UE[key] 293 | self.TP2UE=temp 294 | temp={} 295 | for i,key in enumerate(self.UE2TP): 296 | temp[i]=self.UE2TP[key] 297 | self.UE2TP=temp 298 | self.sizeTable=len(self.UE2TP) 299 | 300 | def channel(self): 301 | """ 1)Channel """ 302 | # 1)Rayleigh 303 | mu=0 304 | sigma=1 #var=sigma**2 305 | #X = list(st.norm(mu, sigma/2).rvs(2*(self.nUE+1)*self.nSBS)) 306 | #R=np.array([(X[i]**2+X[i+1]**2)**0.5 for i in range((self.nUE+1)*self.nSBS)]).reshape(self.nSBS,(self.nUE+1)) 307 | 308 | # 2)Path loss 309 | #Shadowing_UE=(st.norm(0, self.sigma_SBS).rvs(self.nSBS*self.nUE)).reshape(self.nSBS,self.nUE) 310 | #Shadowing_SBS=(st.norm(0, self.sigma_MBS).rvs(self.nSBS)).reshape(self.nSBS,1) 311 | Shadowing_UE=0 312 | Shadowing_SBS=0 313 | PL_UE=np.array([30.53+36.7*math.log10(d/1000) for dUE2SBS in self.dSBS2UE for d in dUE2SBS]).reshape(self.nSBS,self.nUE) + Shadowing_UE 314 | PL_SBS=np.array([19.77+3.91*math.log10(d/1000) for d in self.dMBS2SBS]).reshape(self.nSBS,1) + Shadowing_SBS 315 | self.PL=np.concatenate((PL_UE,PL_SBS),axis=1) # in dB 316 | self.PL = 10**(-self.PL/10) 317 | 318 | # 3)Combination 319 | #self.G=self.PL*(R**2) 320 | self.G=self.PL 321 | #print('G=',self.G,'\n') 322 | 323 | def SubchannelAllocation(self): 324 | # uniformly allocate subchannel 325 | # if cluster size <= subchannel number --> no intra-cluster interference 326 | # if cluster size > subchannel number --> intra-cluster interference 327 | self.B_TP2UE=[] 328 | for k in range(self.nSBS): 329 | if len(self.chosen_TP2UE[k])==0: 330 | self.B_TP2UE.append([]) 331 | else: 332 | # Method 1.uniform allocation --> for N>= #UE in a cluster 333 | nUE_SBSk = len(self.chosen_TP2UE[k]) 334 | if nUE_SBSk> self.N: 335 | temp=[] 336 | for i in range(int(nUE_SBSk/self.N)): 337 | temp.append(random.sample([i for i in range(self.N)],self.N)) 338 | temp.append(random.sample([i for i in range(self.N)],nUE_SBSk%self.N)) 339 | self.B_TP2UE.append([i for l in temp for i in l]) 340 | else: 341 | self.B_TP2UE.append(random.sample([i for i in range(self.N)],nUE_SBSk)) 342 | # Method 2.order allocation --> 0,1,..,(N-1),0,1.. 343 | #self.B_TP2UE.append([i%self.N for i in range(len(self.chosen_TP2UE[k]))]) 344 | self.B_UE2B={iUE:B for liUE,lB in zip(self.chosen_TP2UE,self.B_TP2UE) for iUE,B in zip(liUE,lB)} 345 | 346 | def mean_std(self,n,cflage,name): 347 | # 1) calculate mean and standard deviation and save 348 | # or 2) load mean and standard deviation to use 349 | key = ['Energy Efficiency','Backhaul Cost','QoS Gurantee','QoS Bad','QoS Good','System Throughput','QoS Squared Difference'] 350 | self.dic_mean={i:0 for i in key} 351 | self.dic_std={i:0 for i in key} 352 | if cflage: # 1) calculate mean and standard deviation and save 353 | dic_data={i:[] for i in key} 354 | for k in range(n): 355 | print(k,' steps.......') 356 | c=np.random.choice([i for i in range(self.ori_sizeTable)]) 357 | P=np.random.uniform(0,self.P_Max_SBS,self.nUE) 358 | info_ori = self.step_mean_std(c,P) 359 | Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_c_Throughput_ori,QoS_squaredD_ori = info_ori 360 | dic_data['Energy Efficiency'].append(Energy_Efficiency_ori) 361 | dic_data['Backhaul Cost'].append(Backhaul_cost_ori) 362 | dic_data['QoS Gurantee'].append(QoS_gurantee_ori) 363 | dic_data['QoS Bad'].append(QoS_bad_ori) 364 | dic_data['QoS Good'].append(QoS_good_ori) 365 | dic_data['System Throughput'].append(sum_c_Throughput_ori) 366 | dic_data['QoS Squared Difference'].append(QoS_squaredD_ori) 367 | for i in key: 368 | self.dic_mean[i]=np.mean(np.array(dic_data[i])) 369 | self.dic_std[i]=np.std(np.array(dic_data[i])) 370 | with open(name,'w',newline='') as csvfile: 371 | writer = csv.writer(csvfile) 372 | writer.writerow([self.dic_mean[i] for i in key]) 373 | writer.writerow([self.dic_std[i] for i in key]) 374 | else: # 2) load mean and standard deviation to use 375 | with open(name, newline='') as csvfile: 376 | rows = csv.reader(csvfile) 377 | rows = list(rows) 378 | for i,name in enumerate(key): 379 | self.dic_mean[name]=float(rows[0][i]) 380 | self.dic_std[name]=float(rows[1][i]) 381 | 382 | 383 | def step_mean_std(self,chosen_c,P): 384 | # calculate mean and standard deviation 385 | self.chosen_TP2UE=self.ori_TP2UE[chosen_c] 386 | self.chosen_UE2TP=self.ori_UE2TP[chosen_c] 387 | #1) channel------------------------------------------------------------ 388 | self.channel() 389 | #2) SubchannelAllocation----------------------------------------------- 390 | self.SubchannelAllocation() 391 | #3) R------------------------------------------------------------------ 392 | I = self._Interference(P) 393 | SINR = self._SINR(I,P) #array 394 | SINR = np.clip(SINR,self.SINR_min,self.SINR_max) 395 | Throughput = self._Throughput(P,SINR) 396 | n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0]) 397 | # 3-1) check backhaul constraint 398 | Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 399 | dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0} 400 | Backhaul_cost_ori=0 401 | # 3-2) correct throughput when violating backhaul constraint --> divide backhaul capacity based on the ratio of transmit power 402 | c_Throughput = Throughput.copy() 403 | for i in dic_backhaul: 404 | Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i] 405 | if dic_backhaul[i]>0: 406 | i_UE = self.chosen_TP2UE[i] 407 | for k in i_UE: 408 | c_Throughput[k]=Throughput[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i] 409 | c_Throughput=np.array(c_Throughput) 410 | Energy_Efficiency_ori = sum(c_Throughput)/(n*self.Pc_SBS+sum(P)) 411 | Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,self.delta_min,self.delta_max) 412 | QoS_difference = c_Throughput-self.Throughput_UE_threshold 413 | QoS_good_ori = sum([i for i in QoS_difference if i>0]) 414 | QoS_bad_ori = sum([-i for i in QoS_difference if i<0]) 415 | QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori 416 | sum_c_Throughput = sum(c_Throughput) 417 | QoS_squaredD_ori = sum([i*i for i in QoS_difference]) 418 | # 4)------------------------------------------------------------------- 419 | info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_c_Throughput,QoS_squaredD_ori) 420 | return info_ori 421 | 422 | def step_train(self,chosen_c,P,f_ori_c,f_subc,f_debug,episode,timestep): 423 | # step for train 424 | done=False # True if violate backhaul constraint 425 | QoS_R=0 # 1 if satisfy all UEs' QoS requirement 426 | #0) Determine cluster-------------------------------------------------- 427 | if f_ori_c ==False: 428 | self.chosen_TP2UE=self.TP2UE[chosen_c] 429 | self.chosen_UE2TP=self.UE2TP[chosen_c] 430 | else: 431 | self.chosen_TP2UE=self.ori_TP2UE[chosen_c] 432 | self.chosen_UE2TP=self.ori_UE2TP[chosen_c] 433 | #1) channel for same channel in 1 episode------------------------------ 434 | #self.channel() 435 | #2) SubchannelAllocation for different cluster------------------------- 436 | if f_subc == True: 437 | self.SubchannelAllocation() 438 | #3) R------------------------------------------------------------------ 439 | if f_debug: 440 | I = self.debug_Interference(P,episode,'1') 441 | else: 442 | I = self._Interference(P) 443 | SINR = self._SINR(I,P) #array 444 | SINRdb = 10*np.log10(np.clip(SINR,self.delta_min,SINR)) 445 | Throughput_ori = self._Throughput(P,SINR) 446 | n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0]) 447 | # 3-1) check backhaul constraint 448 | Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 449 | dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0} 450 | Backhaul_cost_ori=0 451 | # 3-2) correct throughput when violating backhaul constraint --> divide backhaul capacity based on the ratio of transmit power 452 | c_Throughput_ori = copy.deepcopy(Throughput_ori) 453 | for i in dic_backhaul: 454 | Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i] 455 | if dic_backhaul[i]>0: 456 | i_UE = self.chosen_TP2UE[i] 457 | for k in i_UE: 458 | c_Throughput_ori[k]=Throughput_ori[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i] 459 | c_Throughput_ori=np.array(c_Throughput_ori) 460 | sum_Throughput_ori =sum(c_Throughput_ori) 461 | Energy_Efficiency_ori = sum(c_Throughput_ori)/(n*self.Pc_SBS+sum(P)) 462 | Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,-self.delta_max,self.delta_max) 463 | QoS_difference = c_Throughput_ori-self.Throughput_UE_threshold 464 | QoS_good_ori = sum([i for i in QoS_difference if i>0]) 465 | QoS_bad_ori = sum([-i for i in QoS_difference if i<0]) 466 | QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori 467 | QoS_squaredD_ori = sum([i*i for i in QoS_difference]) 468 | # 3-3) standardize 469 | Energy_Efficiency = (Energy_Efficiency_ori-self.dic_mean['Energy Efficiency'])/self.dic_std['Energy Efficiency'] 470 | Backhaul_cost = Backhaul_cost_ori#(Backhaul_cost_ori-self.dic_mean['Backhaul Cost'])/self.dic_std['Backhaul Cost'] 471 | QoS_gurantee= (QoS_gurantee_ori-self.dic_mean['QoS Gurantee'])/self.dic_std['QoS Gurantee'] 472 | QoS_bad = (QoS_bad_ori-self.dic_mean['QoS Bad'])/self.dic_std['QoS Bad'] 473 | QoS_good =(QoS_good_ori-self.dic_mean['QoS Good'])/self.dic_std['QoS Good'] 474 | sum_Throughput =(sum_Throughput_ori-self.dic_mean['System Throughput'])/self.dic_std['System Throughput'] 475 | QoS_squaredD =(QoS_squaredD_ori-self.dic_mean['QoS Squared Difference'])/self.dic_std['QoS Squared Difference'] 476 | # check QoS 477 | if QoS_bad_ori==0: 478 | QoS_R=1 479 | else: 480 | self.debug_QoS['1'][str(episode)][str(timestep)]=[i_UE for i_UE,i in enumerate(QoS_difference) if i>0] 481 | # check Backhaul 482 | if Backhaul_cost_ori>0: 483 | done = True 484 | self.debug_backhaul['1'][str(episode)][str(timestep)]=[i for i in dic_backhaul if dic_backhaul[i]>0] 485 | # 3-4) reward 486 | if Backhaul_cost_ori>0: 487 | done = True 488 | R = self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD - 0.1 489 | else: 490 | R=self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD 491 | # 4) next state-------------------------------------------------------- 492 | Ths_=copy.deepcopy(c_Throughput_ori) 493 | #Ths_=np.clip((Ths_-np.mean(Ths_))/(Ths_.var()**0.5),self.delta_min,self.delta_max) 494 | #Gs_ = self.G.T.flatten() 495 | #s_ = np.concatenate((Ths_, Gs_),axis=0) 496 | s_ = Ths_ 497 | # 5) info-------------------------------------------------------------- 498 | info = (R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,sum_Throughput,QoS_squaredD) 499 | info_lis=(list(Backhaul_difference),list(SINRdb),list(QoS_difference),list(c_Throughput_ori)) 500 | info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_Throughput_ori,QoS_squaredD_ori) 501 | debug_info=(self.Throughput_SBS_threshold,self.Throughput_BS) 502 | # 6) debug-------------------------------------------------------------- 503 | if f_debug: 504 | self.debug_UE_throughput['1'][str(episode)].append(c_Throughput_ori) 505 | self.debug_SBS_throughput['1'][str(episode)].append([ sum(c_Throughput_ori[BS]) for BS in self.chosen_TP2UE ] ) 506 | self.debug_SBS_threshold['1'][str(episode)].append(self.Throughput_SBS_threshold ) 507 | self.debug_c['1'][str(episode)].append(self.chosen_UE2TP ) 508 | self.debug_p['1'][str(episode)].append(10*np.log10(P*1000)) 509 | return info,info_lis,s_,info_ori,done,debug_info,QoS_R 510 | 511 | def step(self,chosen_c,P,f_ori_c,f_subc,baseline,episode,timestep): 512 | # step for test 513 | done=False # True if violate backhaul constraint 514 | QoS_R=0 # 1 if satisfy all UEs' QoS requirement 515 | #0) Determine cluster-------------------------------------------------- 516 | if f_ori_c ==False: 517 | self.chosen_TP2UE=self.TP2UE[chosen_c] 518 | self.chosen_UE2TP=self.UE2TP[chosen_c] 519 | else: 520 | self.chosen_TP2UE=self.ori_TP2UE[chosen_c] 521 | self.chosen_UE2TP=self.ori_UE2TP[chosen_c] 522 | #1) channel for same channel in 1 episode------------------------------ 523 | #self.channel() 524 | #2) SubchannelAllocation for different cluster------------------------- 525 | if f_subc == True: 526 | self.SubchannelAllocation() 527 | #3) R------------------------------------------------------------------ 528 | I = self.debug_Interference(P,episode,baseline) 529 | SINR = self._SINR(I,P) #array 530 | SINRdb = 10*np.log10(np.clip(SINR,self.delta_min,SINR)) # cannot np.clip(SINR,self.delta_min,self.delta_max) 531 | Throughput_ori = self._Throughput(P,SINR) 532 | n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0]) 533 | # 3-1) backhaul 534 | Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 535 | dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0} 536 | Backhaul_cost_ori=0 537 | # 3-2) correct throughput 538 | c_Throughput_ori = copy.deepcopy(Throughput_ori) 539 | for i in dic_backhaul: 540 | Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i] 541 | if dic_backhaul[i]>0: 542 | i_UE = self.chosen_TP2UE[i] 543 | for k in i_UE: 544 | c_Throughput_ori[k]=Throughput_ori[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i] 545 | c_Throughput_ori=np.array(c_Throughput_ori) 546 | sum_Throughput_ori =sum(c_Throughput_ori) 547 | Energy_Efficiency_ori = sum(c_Throughput_ori)/(n*self.Pc_SBS+sum(P)) 548 | self.debug_system_throughput[baseline][str(episode)].append(sum(c_Throughput_ori)) 549 | self.debug_system_energy[baseline][str(episode)].append([n*self.Pc_SBS+sum(P),n*self.Pc_SBS,sum(P)]) # overall,operation,transmit 550 | Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,-self.delta_max,self.delta_max) 551 | QoS_difference = c_Throughput_ori-self.Throughput_UE_threshold 552 | QoS_good_ori = sum([i for i in QoS_difference if i>0]) 553 | QoS_bad_ori = sum([-i for i in QoS_difference if i<0 ]) 554 | 555 | QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori 556 | QoS_squaredD_ori = sum([i*i for i in QoS_difference]) 557 | # 3-3) standardize 558 | Energy_Efficiency = (Energy_Efficiency_ori-self.dic_mean['Energy Efficiency'])/self.dic_std['Energy Efficiency'] 559 | Backhaul_cost = Backhaul_cost_ori#(Backhaul_cost_ori-self.dic_mean['Backhaul Cost'])/self.dic_std['Backhaul Cost'] 560 | QoS_gurantee= (QoS_gurantee_ori-self.dic_mean['QoS Gurantee'])/self.dic_std['QoS Gurantee'] 561 | QoS_bad = (QoS_bad_ori-self.dic_mean['QoS Bad'])/self.dic_std['QoS Bad'] 562 | QoS_good =(QoS_good_ori-self.dic_mean['QoS Good'])/self.dic_std['QoS Good'] 563 | sum_Throughput =(sum_Throughput_ori-self.dic_mean['System Throughput'])/self.dic_std['System Throughput'] 564 | QoS_squaredD =(QoS_squaredD_ori-self.dic_mean['QoS Squared Difference'])/self.dic_std['QoS Squared Difference'] 565 | # check QoS 566 | if QoS_bad_ori==0: 567 | QoS_R=1 568 | else: 569 | self.debug_QoS['1'][str(episode)][str(timestep)]=[i_UE for i_UE,i in enumerate(QoS_difference) if i>0] 570 | # check Backhaul 571 | if Backhaul_cost_ori>0: 572 | done = True 573 | self.debug_backhaul['1'][str(episode)][str(timestep)]=[i for i in dic_backhaul if dic_backhaul[i]>0] 574 | # 3-4) reward 575 | if Backhaul_cost_ori>0: 576 | done = True 577 | R = self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD - 0.1 578 | else: 579 | R=self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD 580 | # 4) next state-------------------------------------------------------- 581 | Ths_=copy.deepcopy(c_Throughput_ori) 582 | #Ths_=np.clip((Ths_-np.mean(Ths_))/(Ths_.var()**0.5),self.delta_min,self.delta_max) 583 | #Gs_ = self.G.T.flatten() 584 | #s_ = np.concatenate((Ths_, Gs_),axis=0) 585 | s_ = Ths_ 586 | # 5) info-------------------------------------------------------------- 587 | info = (R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,sum_Throughput,QoS_squaredD) 588 | info_lis=(list(Backhaul_difference),list(SINRdb),list(QoS_difference),list(c_Throughput_ori)) 589 | info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_Throughput_ori,QoS_squaredD_ori) 590 | debug_info=(self.Throughput_SBS_threshold,self.Throughput_BS) 591 | # 6) debug-------------------------------------------------------------- 592 | self.debug_UE_throughput[baseline][str(episode)].append(c_Throughput_ori) 593 | self.debug_SBS_throughput[baseline][str(episode)].append([ sum(c_Throughput_ori[BS]) for BS in self.chosen_TP2UE ] ) 594 | self.debug_SBS_threshold[baseline][str(episode)].append(self.Throughput_SBS_threshold ) 595 | self.debug_c[baseline][str(episode)].append(self.chosen_UE2TP ) 596 | self.debug_p[baseline][str(episode)].append(10*np.log10(P*1000)) 597 | return info,info_lis,s_,info_ori,done,debug_info,QoS_R 598 | 599 | def _pInterference(self,P,iUE,k): 600 | #UEs use the same subchannel 601 | iUE=[key for key in iUE if self.B_UE2B[key]==self.B_UE2B[k] ] 602 | #3)iG 603 | iG=[self.G[self.chosen_UE2TP[j],k] for j in iUE] 604 | #4)iP 605 | iP=[P[i] for i in iUE] 606 | #5)I 607 | interference = np.sum( np.array(iG)*np.array(iP) ) 608 | return interference 609 | 610 | def _Interference(self,P): 611 | """Interference for ALL UEs""" 612 | I=[] 613 | for k in range(self.nUE): 614 | """1)inter-cell interference""" 615 | #1)iTP 616 | iTP=self.chosen_UE2TP[k] 617 | #2)iUE 618 | #UEs in different clusters 619 | inter_iUE=[i for i in range(self.nUE)] 620 | for i in self.chosen_TP2UE[iTP]: 621 | inter_iUE.remove(i) 622 | inter_interference=self._pInterference(P,inter_iUE,k) 623 | """2)intra-cell interference""" 624 | #UEs in same clusters 625 | intra_iUE = self.chosen_TP2UE[iTP].copy() 626 | intra_iUE.remove(k) 627 | intra_interference=self._pInterference(P,intra_iUE,k) 628 | """3)interference""" 629 | interference = inter_interference+intra_interference 630 | I.append(interference) 631 | return I 632 | 633 | def debug_Interference(self,P,episode,baseline): 634 | """Interference for ALL UEs""" 635 | I=[] 636 | for k in range(self.nUE): 637 | """1)inter-cell interference""" 638 | #1)iTP 639 | iTP=self.chosen_UE2TP[k] 640 | #2)iUE 641 | #UEs in different clusters 642 | inter_iUE=[i for i in range(self.nUE)] 643 | for i in self.chosen_TP2UE[iTP]: 644 | inter_iUE.remove(i) 645 | inter_interference=self._pInterference(P,inter_iUE,k) 646 | """2)intra-cell interference""" 647 | #UEs in same clusters 648 | intra_iUE = self.chosen_TP2UE[iTP].copy() 649 | intra_iUE.remove(k) 650 | intra_interference=self._pInterference(P,intra_iUE,k) 651 | """3)interference""" 652 | interference = inter_interference+intra_interference 653 | I.append(interference) 654 | ########################## 655 | self.debug_I[baseline][str(episode)]['UE'+str(k)].append([interference,intra_interference,inter_interference]) 656 | return I 657 | 658 | def _SINR(self,I,P): 659 | G_UE=[self.G[self.chosen_UE2TP[i],i] for i in range(self.nUE)] 660 | SINR=np.array(G_UE)*np.array(P)/(self.Noise*self.subB+np.array(I))#np.clip(np.array(G_UE)*np.array(P)/(self.Noise*self.subB+np.array(I)),-self.delta_max,self.delta_max) 661 | #signal_part=np.array(G_UE)*np.array(P) 662 | return SINR 663 | 664 | def _Throughput(self,P,SINR): 665 | # Method 2. ratio 666 | Throughput=np.log2(1+SINR) 667 | self.Throughput_BS=[ sum(Throughput[BS]) for BS in self.chosen_TP2UE ] 668 | """ 1) Equal constraint 669 | #Throughput_SBS_threshold=np.log2(1+(self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.B_MBS2SBS))/self.nSBS#################### 670 | #Throughput_SBS_threshold=self.B_MBS2SBS*np.log2(1+(self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.B_MBS2SBS)) 671 | """ 672 | # 2) MIMO constraint 673 | self.Throughput_SBS_threshold = np.log2(1+((self.NT-self.Ng+1)/self.Ng)*( (self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.subB) ) ) 674 | return Throughput 675 | 676 | def baseline1(self): 677 | # 1)UE choose the nearst SBS 678 | #the nearest SBS index 679 | dUE2SBS=[ ((np.array(self.xSBS)-x)**2+(np.array(self.ySBS)-y)**2)**0.5 for x,y in zip(self.xUE,self.yUE)] 680 | chosen_UE2TP=[np.argmin(i) for i in dUE2SBS] 681 | #the chosen_c 682 | for key,value in self.ori_UE2TP.items(): 683 | if value==chosen_UE2TP: 684 | b1_chosen_c=key 685 | return b1_chosen_c 686 | 687 | def baseline2(self): 688 | # 2)UE choose the SBS with the best channel state 689 | chosen_UE2TP=[np.argmax(self.G[:,i]) for i in range(self.nUE)] 690 | #the chosen_c 691 | for key,value in self.ori_UE2TP.items(): 692 | if value==chosen_UE2TP: 693 | b2_chosen_c=key 694 | return b2_chosen_c 695 | 696 | def checkBackhaul(self,P): 697 | I = self._Interference(P) 698 | SINR = self._SINR(I,P) #array 699 | _ = self._Throughput(P,SINR) 700 | # calculate sum rate for all SBSs 701 | Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 702 | dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0} 703 | Backhaul_cost_ori=0 704 | # check backhaul constraint 705 | for i in dic_backhaul: 706 | Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i] 707 | if Backhaul_cost_ori>0: 708 | violate = True 709 | else: 710 | violate = False 711 | return violate 712 | 713 | def checkQoS(self,Throughput): 714 | violate = False 715 | QoS_difference = Throughput - self.Throughput_UE_threshold 716 | for i in QoS_difference: 717 | if i<0: 718 | violate=True 719 | break 720 | return violate 721 | 722 | def randomP(self,chosen_c,f_ori_c): 723 | # determine random power that satisfies backhaul constraint 724 | #0) Determine cluster-------------------------------------------------- 725 | if f_ori_c ==False: 726 | self.chosen_TP2UE=self.TP2UE[chosen_c] 727 | self.chosen_UE2TP=self.UE2TP[chosen_c] 728 | else: 729 | self.chosen_TP2UE=self.ori_TP2UE[chosen_c] 730 | self.chosen_UE2TP=self.ori_UE2TP[chosen_c] 731 | # 1) SubchannelAllocation---------------------------------------------- 732 | self.SubchannelAllocation() 733 | # 2) checkBackhaul----------------------------------------------------- 734 | violate=True 735 | p_limit=1 736 | while violate: 737 | n_step=0 738 | if p_limit<0: 739 | break 740 | while violate: 741 | n_step=n_step+1 742 | P=np.random.uniform(0,self.P_Max_SBS*p_limit,self.nUE) 743 | violate = self.checkBackhaul(P) 744 | if n_step >100: 745 | break 746 | p_limit=p_limit-0.1 747 | return P 748 | 749 | def randomC(self,P): 750 | # determine random association that satisfies backhaul constraint 751 | # 1) SubchannelAllocation---------------------------------------------- 752 | self.SubchannelAllocation() 753 | # 2) checkBackhaul----------------------------------------------------- 754 | violate=True 755 | lis=[i for i in range(self.ori_sizeTable)] 756 | while violate: 757 | if len(lis)==0: 758 | return np.random.choice([i for i in range(self.ori_sizeTable)]) 759 | chosen_c=np.random.choice(lis) 760 | lis.remove(chosen_c) 761 | self.chosen_TP2UE=self.ori_TP2UE[chosen_c] 762 | self.chosen_UE2TP=self.ori_UE2TP[chosen_c] 763 | violate = self.checkBackhaul(P) 764 | return chosen_c 765 | #%% 766 | if __name__ == '__main__': 767 | 768 | 769 | #%% 1) (a)create new SBS-UE distribution and (b) calculate mean and standard deviation 770 | lambda1=0.43#0.53#1 771 | lambda2=0.16#0.05#0.42#0.8 772 | lambda3=0#0.1#0.3#0 773 | mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv' 774 | scenario_name = 'EnvInfo_3' 775 | mean_flage=True 776 | env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3) 777 | env.load(name=scenario_name) 778 | #env.new(name=scenario_name) 779 | env.channel() 780 | env.writeCSV(scenario_name) 781 | env.mean_std(10**6,mean_flage,mean_name) 782 | #%% 2) load (a)the SBS-UE distribution and (b) mean and standard deviation 783 | #lambda1=1#0.53#1 784 | #lambda2=0#0.05#0.42#0.8 785 | #lambda3=0#0.1#0.3#0 786 | #mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv' 787 | #scenario_name = 'EnvInfo_11' 788 | #mean_flage=False 789 | #env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3) 790 | #env.load(name=scenario_name) 791 | #env.mean_std(10**6,mean_flage,mean_name) 792 | 793 | -------------------------------------------------------------------------------- /mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv: -------------------------------------------------------------------------------- 1 | 14.27290812882412,0.0,21.789321971591978,1.4636380189483575,23.252959990540347,26.78932197159197,351.89412811801566 2 | 5.142182013770176,0.0,8.94766154843589,0.7804016158448511,8.679482600019545,8.947661548435889,152.70494406459784 3 | -------------------------------------------------------------------------------- /memory/__init__.py: -------------------------------------------------------------------------------- 1 | from memory.memory import Memory 2 | 3 | __all__ = ["Memory"] 4 | -------------------------------------------------------------------------------- /memory/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/memory/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /memory/__pycache__/memory.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/memory/__pycache__/memory.cpython-37.pyc -------------------------------------------------------------------------------- /memory/memory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Source: https://github.com/openai/baselines/blob/master/baselines/ddpg/ddpg.py 3 | """ 4 | import numpy as np 5 | 6 | 7 | class RingBuffer(object): 8 | def __init__(self, maxlen, shape, dtype='float32'): 9 | self.maxlen = maxlen 10 | self.start = 0 11 | self.length = 0 12 | self.data = np.zeros((maxlen,) + shape).astype(dtype) 13 | 14 | def __len__(self): 15 | return self.length 16 | 17 | def __getitem__(self, idx): 18 | if idx < 0 or idx >= self.length: 19 | raise KeyError() 20 | return self.data[(self.start + idx) % self.maxlen] 21 | 22 | def get_batch(self, idxs): 23 | return self.data[(self.start + idxs) % self.maxlen] 24 | 25 | def append(self, v): 26 | if self.length < self.maxlen: 27 | # We have space, simply increase the length. 28 | self.length += 1 29 | elif self.length == self.maxlen: 30 | # No space, "remove" the first item. 31 | self.start = (self.start + 1) % self.maxlen 32 | else: 33 | # This should never happen. 34 | raise RuntimeError() 35 | self.data[(self.start + self.length - 1) % self.maxlen] = v 36 | 37 | def clear(self): 38 | self.start = 0 39 | self.length = 0 40 | self.data[:] = 0 # unnecessary, not freeing any memory, could be slow 41 | 42 | 43 | def array_min2d(x): 44 | x = np.array(x) 45 | if x.ndim >= 2: 46 | return x 47 | return x.reshape(-1, 1) 48 | 49 | 50 | class Memory(object): 51 | def __init__(self, limit, observation_shape, action_shape, next_actions=False): 52 | self.limit = limit #replay_memory_size 53 | 54 | self.states = RingBuffer(limit, shape=observation_shape) 55 | self.actions = RingBuffer(limit, shape=action_shape) 56 | self.rewards = RingBuffer(limit, shape=(1,)) 57 | self.next_states = RingBuffer(limit, shape=observation_shape) 58 | self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None 59 | self.terminals = RingBuffer(limit, shape=(1,)) 60 | 61 | def sample(self, batch_size, random_machine=np.random): 62 | # Draw such that we always have a proceeding element. 63 | # batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size) 64 | batch_idxs = random_machine.random_integers(low=0, high=self.nb_entries-1, size=batch_size) 65 | 66 | '''states_batch = array_min2d(self.states.get_batch(batch_idxs)) 67 | actions_batch = array_min2d(self.actions.get_batch(batch_idxs)) 68 | rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs)) 69 | next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs)) 70 | terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))''' 71 | states_batch = self.states.get_batch(batch_idxs) 72 | actions_batch = self.actions.get_batch(batch_idxs) 73 | rewards_batch = self.rewards.get_batch(batch_idxs) 74 | next_states_batch = self.next_states.get_batch(batch_idxs) 75 | next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None 76 | terminals_batch = self.terminals.get_batch(batch_idxs) 77 | 78 | if next_actions is not None: 79 | return states_batch, actions_batch, rewards_batch, next_states_batch, next_actions, terminals_batch 80 | else: 81 | return states_batch, actions_batch, rewards_batch, next_states_batch, terminals_batch 82 | 83 | def append(self, state, action, reward, next_state, next_action=None, terminal=False, training=True): 84 | if not training: 85 | return 86 | 87 | self.states.append(state) 88 | self.actions.append(action) 89 | self.rewards.append(reward) 90 | self.next_states.append(next_state) 91 | if self.next_actions: 92 | self.next_actions.append(next_action) 93 | self.terminals.append(terminal) 94 | 95 | def clear(self): 96 | self.states.clear() 97 | self.actions.clear() 98 | self.rewards.clear() 99 | self.next_states.clear() 100 | self.next_actions.clear() 101 | self.terminals.clear() 102 | 103 | @property 104 | def nb_entries(self): 105 | return len(self.states) 106 | 107 | 108 | class MemoryV2(object): 109 | def __init__(self, limit, observation_shape, action_shape, next_actions=False, time_steps=False): 110 | self.limit = limit 111 | 112 | self.states = RingBuffer(limit, shape=observation_shape) 113 | self.actions = RingBuffer(limit, shape=action_shape) 114 | self.rewards = RingBuffer(limit, shape=(1,)) 115 | self.next_states = RingBuffer(limit, shape=observation_shape) 116 | self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None 117 | self.time_steps = RingBuffer(limit, shape=(1,)) if time_steps else None 118 | self.terminals = RingBuffer(limit, shape=(1,)) 119 | 120 | def sample(self, batch_size, random_machine=np.random): 121 | # Draw such that we always have a proceeding element. 122 | #batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size) 123 | batch_idxs = random_machine.choice(self.nb_entries, size=batch_size) 124 | # batch_idxs = random_machine.choice(self.nb_entries, weights=[i/self.nb_entries for i in range(self.nb_entries)], size=batch_size) 125 | 126 | '''states_batch = array_min2d(self.states.get_batch(batch_idxs)) 127 | actions_batch = array_min2d(self.actions.get_batch(batch_idxs)) 128 | rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs)) 129 | next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs)) 130 | terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))''' 131 | states_batch = self.states.get_batch(batch_idxs) 132 | actions_batch = self.actions.get_batch(batch_idxs) 133 | rewards_batch = self.rewards.get_batch(batch_idxs) 134 | next_states_batch = self.next_states.get_batch(batch_idxs) 135 | next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None 136 | terminals_batch = self.terminals.get_batch(batch_idxs) 137 | time_steps = self.time_steps.get_batch(batch_idxs) if self.time_steps is not None else None 138 | 139 | ret = [states_batch, actions_batch, rewards_batch, next_states_batch] 140 | if next_actions is not None: 141 | ret.append(next_actions) 142 | ret.append(terminals_batch) 143 | if time_steps is not None: 144 | ret.append(time_steps) 145 | return tuple(ret) 146 | 147 | def append(self, state, action, reward, next_state, next_action=None, terminal=False, time_steps=None): 148 | self.states.append(state) 149 | self.actions.append(action) 150 | self.rewards.append(reward) 151 | self.next_states.append(next_state) 152 | if self.next_actions is not None: 153 | self.next_actions.append(next_action) 154 | self.terminals.append(terminal) 155 | if self.time_steps is not None: 156 | self.time_steps.append(time_steps) 157 | 158 | @property 159 | def nb_entries(self): 160 | return len(self.states) 161 | 162 | 163 | class MemoryNStepReturns(object): 164 | def __init__(self, limit, observation_shape, action_shape, next_actions=False, time_steps=False, n_step_returns=False): 165 | self.limit = limit 166 | 167 | self.states = RingBuffer(limit, shape=observation_shape) 168 | self.actions = RingBuffer(limit, shape=action_shape) 169 | self.rewards = RingBuffer(limit, shape=(1,)) 170 | self.next_states = RingBuffer(limit, shape=observation_shape) 171 | self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None 172 | self.time_steps = RingBuffer(limit, shape=(1,)) if time_steps else None 173 | self.terminals = RingBuffer(limit, shape=(1,)) 174 | self.n_step_returns = RingBuffer(limit, shape=(1,)) if n_step_returns else None 175 | 176 | def sample(self, batch_size, random_machine=np.random): 177 | # Draw such that we always have a proceeding element. 178 | #batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size) 179 | batch_idxs = random_machine.choice(self.nb_entries, size=batch_size) 180 | # batch_idxs = random_machine.choice(self.nb_entries, weights=[i/self.nb_entries for i in range(self.nb_entries)], size=batch_size) 181 | 182 | '''states_batch = array_min2d(self.states.get_batch(batch_idxs)) 183 | actions_batch = array_min2d(self.actions.get_batch(batch_idxs)) 184 | rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs)) 185 | next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs)) 186 | terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))''' 187 | states_batch = self.states.get_batch(batch_idxs) 188 | actions_batch = self.actions.get_batch(batch_idxs) 189 | rewards_batch = self.rewards.get_batch(batch_idxs) 190 | next_states_batch = self.next_states.get_batch(batch_idxs) 191 | next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None 192 | terminals_batch = self.terminals.get_batch(batch_idxs) 193 | time_steps = self.time_steps.get_batch(batch_idxs) if self.time_steps is not None else None 194 | n_step_returns = self.n_step_returns.get_batch(batch_idxs) if self.n_step_returns is not None else None 195 | 196 | ret = [states_batch, actions_batch, rewards_batch, next_states_batch] 197 | if next_actions is not None: 198 | ret.append(next_actions) 199 | ret.append(terminals_batch) 200 | if time_steps is not None: 201 | ret.append(time_steps) 202 | if n_step_returns is not None: 203 | ret.append(n_step_returns) 204 | return tuple(ret) 205 | 206 | def append(self, state, action, reward, next_state, next_action=None, terminal=False, time_steps=None, 207 | n_step_return=None): 208 | self.states.append(state) 209 | self.actions.append(action) 210 | self.rewards.append(reward) 211 | self.next_states.append(next_state) 212 | if self.next_actions is not None: 213 | self.next_actions.append(next_action) 214 | self.terminals.append(terminal) 215 | if self.time_steps is not None: 216 | assert time_steps is not None 217 | self.time_steps.append(time_steps) 218 | if self.n_step_returns is not None: 219 | assert n_step_return is not None 220 | self.n_step_returns.append(n_step_return) 221 | 222 | @property 223 | def nb_entries(self): 224 | return len(self.states) 225 | -------------------------------------------------------------------------------- /test_DQN.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import numpy as np 7 | import random 8 | from collections import Counter 9 | from torch.autograd import Variable 10 | import time 11 | import scipy.stats as st 12 | import copy 13 | import matplotlib.pyplot as plt 14 | import os 15 | os.chdir('/home/chan/PDQN/') 16 | os.environ['CUDA_VISIBLE_DEVICES']='1' 17 | from agent import Agent 18 | from memory.memory import Memory 19 | #from memory import Memory 20 | from utils import soft_update_target_network, hard_update_target_network 21 | from utils.noise import OrnsteinUhlenbeckActionNoise 22 | from env import env_PowerAllocation 23 | import tool as t 24 | from pdqn import PDQNAgent 25 | from DQN import DQNAgent 26 | 27 | 28 | 29 | 30 | 31 | 32 | #%% 33 | 34 | if __name__ == '__main__': 35 | # PDQN===================================================================== 36 | batch_size=128#32 37 | initial_memory_threshold=128#1000 # Number of transitions required to start learning. 38 | replay_memory_size=20000 # Replay memory transition capacity 39 | epsilon_initial=1 40 | epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon 41 | epsilon_final=0.01 # Final epsilon value 42 | gamma=0.95 43 | clip_grad=1 # Parameter gradient clipping limit 44 | use_ornstein_noise= False # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 45 | inverting_gradients= True # Use inverting gradients scheme instead of squashing function 46 | seed=0 #Random seed 47 | save_freq = 100#0 # How often to save models (0 = never) 48 | # 1) ParamActor------------------------------------------------------------ 49 | learning_rate_actor_param=0.00001 50 | tau_actor_param=0.001 51 | """loss func for actor_parameter """ 52 | average=False # Average weighted loss function 53 | weighted=False # Naive weighted loss function 54 | random_weighted=False # Randomly weighted loss function 55 | indexed=False # Indexed loss function 56 | zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action 57 | # 2) Actor----------------------------------------------------------------- 58 | tau_actor=0.1 59 | learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output 60 | action_input_layer=0# Which layer to input action parameters-- useless? 61 | #-------------------------------------------------------------------------- 62 | # Performance 63 | dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput'] 64 | dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6'] } 65 | dic_info_no_back={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6'] } 66 | dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference'] 67 | dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6'] } 68 | dic_info_ori_no_back={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6'] } 69 | a_info={'c':[],'P':[]} 70 | dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]} 71 | dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]} 72 | num_back=0 73 | debug_QoSr={i:[] for i in ['1','2','3','4','5','6']} 74 | #-------------------------------------------------------------------------- 75 | # debug 76 | debug_PNN=[] 77 | debug_backhaul=[] 78 | debug_BSbackhaul=[] 79 | debug_channel_episode=[] 80 | debug_episode_back=[] 81 | debug_s=[] 82 | 83 | #%% Need to modify 84 | ########################################################################### 85 | scale_actions = True # True 86 | initialise_params = False#True#False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam 87 | MAXepisode = 100#1000 88 | MAXepisode_train = 1000 89 | MAXstep = 100#10#150 90 | realization=100#20 91 | title="PDQN1"#"PDQN_backhaul" # Prefix of output files 92 | #save_dir ="results" #Output directory 93 | n_baseline=6 94 | load_dir ="results_PDQN_5v3/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 95 | load_num="_done"#"400"# 96 | load_dirDQN ="results_DQN_5v3/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 97 | load_numDQN="_done"#"400"# 98 | layers_actor=[512,128,16] # 1055-- --5 # # Hidden layers 99 | actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"} 100 | layers_actor_param =[256]#[64,256] # 5-- --1050 101 | actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"} 102 | name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_nct.csv' 103 | scenario_name='EnvInfo_3' 104 | lambda1=0.43#0.53#1 105 | lambda2=0.16#0.05#0.42#0.8 106 | lambda3=0#0.1#0.3#0 107 | result_save=load_dirDQN+'/test_testChannel_block_fading'#'/test_all_'#'/test_testChannel'#'/test_last2000_' 108 | ########################################################################### 109 | #%% ENV 110 | env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=n_baseline) 111 | #-------------------------------------------------------------------------- Choose Network Geometry 112 | #env.reset() # create a new one 113 | env.load(name=scenario_name) # use the previous one 114 | #-------------------------------------------------------------------------- mean_std 115 | env.mean_std(10**5,False,name)#calculate(True) or load(False) 116 | num_actions = env.action_space[0] 117 | s_dim = env.nUE 118 | # use the same channel gain to test 119 | read_train_channel_episode = t.readCSI('CSI',env.nSBS,env.nUE,MAXepisode) 120 | 121 | #%% PDQN 122 | agent_class = PDQNAgent 123 | agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 124 | batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param, # 0.001 125 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 126 | clip_grad=clip_grad,indexed=indexed,average=average, 127 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 128 | tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold, 129 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 130 | actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs, 131 | zero_index_gradients=zero_index_gradients,seed=seed) 132 | 133 | power_level=5 134 | agent_classDQN = DQNAgent 135 | agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 136 | power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor, # 0.001 137 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 138 | clip_grad=clip_grad,indexed=indexed,average=average, 139 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 140 | initial_memory_threshold=initial_memory_threshold, 141 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 142 | actor_kwargs=actor_kwargs, 143 | zero_index_gradients=zero_index_gradients,seed=seed) 144 | # load the model 145 | agent.load_models(prefix = os.path.join(load_dir, load_num)) 146 | agentDQN.load_models(prefix = os.path.join(load_dirDQN, load_numDQN)) 147 | start_time = time.time() 148 | total_step=0 149 | done1 = True 150 | s = env.reset() 151 | s = np.array(list(s), dtype=np.float32, copy=False) 152 | 153 | 154 | for episode in range(MAXepisode): 155 | print(episode, 'episode-----------') 156 | 157 | #env.G=read_train_channel_episode[episode] 158 | 159 | for timestep in range(MAXstep): 160 | total_step = total_step + 1 161 | print('Iteration '+str(total_step)+'=======================================') 162 | #================================================================== 163 | # 1 164 | """ 1) take an action--------------------------------------------""" 165 | c1, PNN1, all_action_parameters = agent._act(s) # array 166 | P1 = t.p_normalize(env.P_Max_SBS,PNN1) 167 | """ 2) step -- next state, reward, done--------------------------""" 168 | info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step(c1,P1,False,True,'1',episode,timestep) 169 | debug_QoSr['1'].append(QoS_R1) 170 | s_ = np.array(list(s_), dtype=np.float32, copy=False) 171 | R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1, =info1 172 | Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD1_ori1 =info_ori1 173 | Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1 174 | Throughput_SBS_threshold,Throughput_BS = debug_info1 175 | debug_backhaul.append(Throughput_SBS_threshold) 176 | debug_BSbackhaul.append(Throughput_BS) 177 | # 2 the nearst SBS + random power allocation------------------------------------------- 178 | c2 = int(env.baseline1()) 179 | a2 = env.randomP(c2,True) 180 | info2, lis_info2,_,info_ori2,_ ,_,QoS_R2 = env.step(c2,a2,True,False,'2',episode,timestep) 181 | debug_QoSr['2'].append(QoS_R2) 182 | # 3 the best channel + random power allocation ---------------------------------------- 183 | c3 = int(env.baseline2()) 184 | a3 = env.randomP(c3,True) 185 | info3, lis_info3,_,info_ori3,_,_,QoS_R3= env.step(c3,a3,True,False,'3',episode,timestep) 186 | debug_QoSr['3'].append(QoS_R3) 187 | # 4 RL clustering + random power allocatin-------------------------------------------- 188 | c4 = copy.deepcopy(c1) 189 | a4 = env.randomP(c4,False) 190 | info4, lis_info4,_,info_ori4,_,_,QoS_R4 = env.step(c4,a4,False,False,'4',episode,timestep) 191 | debug_QoSr['4'].append(QoS_R4) 192 | # 5 random clustering + RL power--------------------------------------------------- 193 | a5 = copy.deepcopy(P1) 194 | c5 = env.randomC(a5) 195 | info5, lis_info5,_,info_ori5,_,_,QoS_R5= env.step(c5,a5,True,False,'5',episode,timestep) 196 | debug_QoSr['5'].append(QoS_R5) 197 | # 6 DQN 198 | a6 = agentDQN.act(s) 199 | c6,P6=agentDQN.action_decoder(a6, env.P_Max_SBS) 200 | info6, lis_info6, _, info_ori6,_,_,QoS_R6 = env.step(c6,P6,False,True,'6',episode,timestep) 201 | debug_QoSr['6'].append(QoS_R6) 202 | #================================================================== 203 | 204 | """ 3) Print and store info--------------------------------------""" 205 | # info=(R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,SINRdb) 206 | # lis_info1=(list(Backhaul_difference),list(SINRdb),list(QoS_difference)) 207 | key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 208 | key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput'] 209 | 210 | dic_info = t.test_inst_info(dic_info,(key_info,key_info_lis),((info1,info2,info3,info4,info5,info6),(lis_info1,lis_info2,lis_info3,lis_info4,lis_info5,lis_info6)),1) 211 | dic_info_ori = t.test_inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori2,info_ori3,info_ori4,info_ori5,info_ori6),0) 212 | 213 | t.test_print_info(((env.UE2TP[c1],P1),(env.ori_UE2TP[c2],a2),(env.ori_UE2TP[c3],a3),(env.UE2TP[c4],a4),(env.ori_UE2TP[c5],a5),(env.ori_UE2TP[c6],P6)),s) # print p in dBm 214 | a_info['c'].append(env.UE2TP[c1]) 215 | a_info['P'].append(10*np.log10(P1*1000)) 216 | """ 4) update state ---------------------------------------------""" 217 | s = s_ 218 | 219 | # not end the episode at the test phase 220 | if done1: 221 | num_back=num_back+1 222 | debug_episode_back.append(episode) 223 | print('violate backhaul') 224 | else: 225 | dic_info_no_back = t.test_inst_info(dic_info_no_back,(key_info,key_info_lis),((info1,info2,info3,info4,info5,info6),(lis_info1,lis_info2,lis_info3,lis_info4,lis_info5,lis_info6)),1) 226 | dic_info_ori_no_back = t.test_inst_info(dic_info_ori_no_back,dic_info_ori_key,(info_ori1,info_ori2,info_ori3,info_ori4,info_ori5,info_ori6),0) 227 | 228 | 229 | end_time = time.time() 230 | print('num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%') 231 | print("Training took %.2f seconds" % (end_time - start_time)) 232 | for i in debug_QoSr: 233 | num_QoS=sum([1 for k in debug_QoSr[i] if k==1 ]) 234 | print('[',i,']satify Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%') 235 | 236 | 237 | #%% debug for constraints about backhaul 238 | #t.plot_constraint(MAXepisode,debug_episode_back,'test',result_save,0) 239 | #t.writeConstraintHistory(result_save+'test_',MAXepisode,debug_episode_back,0) 240 | #t.plot_constraint(MAXepisode,debug_QoSr['1'],'test',result_save,1) 241 | #t.writeConstraintHistory_v2(result_save+'test_',MAXepisode,debug_QoSr,1) 242 | 243 | #%% 244 | # 7) Average per realization steps and Save -------------------------------- 245 | key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 246 | dic_avg_info = t.test_avg_info(dic_info,key_avg,realization) 247 | t.test_plot_avg(dic_avg_info,key_avg,realization,'normalize',result_save) 248 | #--------------------------------------------------------------------------- 249 | dic_avg_info_ori = t.test_avg_info(dic_info_ori,dic_info_ori_key,realization) 250 | t.test_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',result_save) 251 | #--------------------------------------------------------------------------- 252 | dic_avg_info_ori_no_back = t.test_avg_info(dic_info_ori_no_back,dic_info_ori_key,realization) 253 | t.test_plot_avg(dic_avg_info_ori_no_back,dic_info_ori_key,realization,'no_back_original',result_save) 254 | 255 | #%% 256 | def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir): 257 | #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput'] 258 | nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE] 259 | title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot] 260 | ylabel=key_plot 261 | xlabel='Training Steps (x'+str(n)+')' 262 | label=['SBS','UE','UE','UE'] 263 | color=['r','b','g','c','m','y','k','b'] 264 | linestyle=['-','--',':',':','-'] 265 | 266 | save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 267 | save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 268 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 269 | difference = dic_info[key] 270 | temp_list=[] 271 | for y in range(nTerm[i]): 272 | temp_list.append([difference[x][y] for x in range(len(difference))]) 273 | for length in range(len(temp_list)): 274 | Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))] 275 | save[key][length]=Bl 276 | Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))] 277 | for ori in Bl_ori: 278 | save_ori[key][length]=save_ori[key][length]+ori 279 | plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0]) 280 | 281 | plt.legend(loc='upper right') 282 | plt.title(title[i]) 283 | plt.ylabel(ylabel[i]) 284 | plt.xlabel(xlabel) 285 | plt.savefig(save_dir+title[i]+'.png') 286 | plt.show() 287 | return save,save_ori 288 | 289 | #realization=20 290 | # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE -- 291 | key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput'] 292 | save=[] 293 | ori_save=[] 294 | for i in ['1','2','3','4','5','6']: 295 | save_avg,save_ori=test_plot_individual(env,dic_info[i],i,key_individual,realization,result_save) 296 | save.append(save_avg) 297 | ori_save.append(save_ori) 298 | # avg info 299 | t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,result_save,1) 300 | # original 301 | t.writeCSV(dic_info,dic_info_ori,ori_save,dic_info_ori_key,key_individual,key_avg,result_save+'_original',1) 302 | #%% 303 | # no_backhaul + original -- don't use individual info 304 | t.writeCSV_nobackhaul(dic_info_no_back,dic_avg_info_ori_no_back,dic_info_ori_key,key_avg,result_save+'_original_nobackhaul') 305 | #%% write CSI 306 | #t.writeCSI('Rayleigh_CSIforTest_100episode_100timestep_s10',debug_channel_episode) 307 | #read = readBackhaulHistory('test_HistoryforBackhaulViolation') 308 | 309 | #%% debug 310 | test_debug_I = env.debug_I 311 | test_debug_UE_throughput = env.debug_UE_throughput # each UE throughput 312 | test_debug_SBS_throughput = env.debug_SBS_throughput 313 | test_debug_SBS_threshold = env.debug_SBS_threshold 314 | test_debug_c = env.debug_c 315 | test_debug_p = env.debug_p 316 | test_debug_backhaul = env.debug_backhaul 317 | test_debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index 318 | test_debug_system_throughput = env.debug_system_throughput 319 | test_debug_system_energy = env.debug_system_energy 320 | # 1) EE 321 | debug_dic_info_EE_key = ['System Power','Operational Power','Transmit Power','System Throughput'] 322 | debug_dic_info_EE = {key_dic_info:{name_EE:[] for name_EE in debug_dic_info_EE_key} for key_dic_info in ['1','2','3','4','5','6'] } 323 | 324 | for iMETHOD in ['1','2','3','4','5','6']: 325 | for index,nameEE in enumerate(debug_dic_info_EE_key[:3]): 326 | debug_dic_info_EE[iMETHOD][nameEE]=[episode_EE[index] for episode in range(MAXepisode) for episode_EE in test_debug_system_energy[iMETHOD][str(episode)] ] 327 | debug_dic_info_EE[iMETHOD]['System Throughput']=[episode_EE for episode in range(MAXepisode) for episode_EE in test_debug_system_throughput[iMETHOD][str(episode)] ] 328 | 329 | t.writeEE(debug_dic_info_EE,debug_dic_info_EE_key,result_save) 330 | 331 | # 2) Interference 332 | debug_dic_info_I_key = ['Interference','Intra-cluster Interference','Inter-cluster Interference'] 333 | debug_I={i:{'UE'+str(j):[] for j in range(env.nUE)} for i in debug_dic_info_I_key} # I, intra-cluster, inter-cluster 334 | debug_dic_info_I = {key_dic_info:copy.deepcopy(debug_I) for key_dic_info in ['1','2','3','4','5','6'] } 335 | 336 | for index,name_I in enumerate(debug_dic_info_I_key): 337 | for iUE in ['UE'+str(i) for i in range(env.nUE) ]: 338 | for iMETHOD in [str(k+1) for k in range(6)]: 339 | debug_dic_info_I[iMETHOD][name_I][iUE]=[episode_I[index] for episode in range(MAXepisode) for episode_I in copy.deepcopy(test_debug_I[iMETHOD][str(episode)][iUE])] 340 | 341 | t.writeI(debug_dic_info_I,debug_dic_info_I_key,env.nUE,result_save) 342 | 343 | # 3) action 344 | debug_dic_info_action_key = ['Association','Power Allocation'] 345 | debug_dic_info_action = {key_dic_info:{name_action:[] for name_action in debug_dic_info_action_key} for key_dic_info in [str(i+1) for i in range(n_baseline)] } 346 | 347 | for iMETHOD in [str(i+1) for i in range(n_baseline)] : 348 | debug_dic_info_action[iMETHOD]['Association'] = [ test_debug_c[iMETHOD][str(episode)] for episode in range(MAXepisode) ] 349 | debug_dic_info_action[iMETHOD]['Power Allocation'] = [ test_debug_p[iMETHOD][str(episode)] for episode in range(MAXepisode) ] 350 | 351 | t.writeAction(debug_dic_info_action,debug_dic_info_action_key,result_save) 352 | -------------------------------------------------------------------------------- /tool.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | #!/usr/bin/env python3 3 | # -*- coding: utf-8 -*- 4 | """ 5 | Created on Mon Jun 3 21:27:58 2019 6 | 7 | @author: kuokuo 8 | """ 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import math 12 | import csv 13 | 14 | 15 | 16 | 17 | 18 | def inst_info(dic_info,key,data,op): 19 | # key and data should be in order !!!!!! 20 | if op==1: #normalized 21 | key_info,key_info_lis=key 22 | info,lis_info1 = data 23 | n=[str(i+1) for i in range(len(info))] #['1','2','3','4','5','6','7'] 24 | # store key_info 25 | for i,content in zip(n,info): 26 | cnt=0 27 | for value in content: 28 | dic_info[i][key_info[cnt]].append(value) 29 | cnt=cnt+1 30 | # store key_info_lis 31 | for i,value in zip(key_info_lis,lis_info1): 32 | dic_info['1'][i].append(value) 33 | else: #original 34 | n=[str(i+1) for i in range(len(data))] 35 | for i,content in zip(n,data): 36 | cnt=0 37 | for value in content: 38 | dic_info[i][key[cnt]].append(value) 39 | cnt=cnt+1 40 | return dic_info 41 | 42 | def test_inst_info(dic_info,key,data,op): 43 | # key and data should be in order !!!!!! 44 | 45 | if op==1: #normalized 46 | key_info,key_info_lis=key 47 | info,lis_info = data 48 | n=[str(i+1) for i in range(len(info))] 49 | #lis_info1,lis_info2,lis_info3,lis_info4,lis_info5 = lis_info 50 | # store key_info 51 | for i,content in zip(n,info): 52 | cnt=0 53 | for value in content: 54 | dic_info[i][key_info[cnt]].append(value) 55 | cnt=cnt+1 56 | # store key_info_lis 57 | for i,content in zip(n,lis_info): 58 | #cnt=0 59 | for j,value in zip(key_info_lis,content): 60 | dic_info[i][j].append(value) 61 | else: #original 62 | n=[str(i+1) for i in range(len(data))] 63 | for i,content in zip(n,data): 64 | cnt=0 65 | for value in content: 66 | dic_info[i][key[cnt]].append(value) 67 | cnt=cnt+1 68 | return dic_info 69 | 70 | 71 | def train_avg_info(dic_info,key_avg,n): 72 | lis=[str(i+1) for i in range(1)] #['1','2','3','4','5','6','7'] 73 | dic_avg_info={key_dic_info:{term: [] for term in key_avg} for key_dic_info in lis } 74 | for key_dic_info in lis: 75 | dic = dic_info[key_dic_info] 76 | for key in key_avg: 77 | dic_avg_info[key_dic_info][key]=[sum(dic[key][i*n:(i+1)*n])/n for i in range(int(len(dic[key])/n))] 78 | return dic_avg_info 79 | 80 | def test_avg_info(dic_info,key_avg,n): 81 | lis=[str(i+1) for i in range(len(dic_info))] #['1','2','3','4','5','6','7'] 82 | dic_avg_info={key_dic_info:{term: [] for term in key_avg} for key_dic_info in lis } 83 | for key_dic_info, dic in dic_info.items(): #key_dic_info=['1','2','3','4','5'] 84 | for key in key_avg: 85 | dic_avg_info[key_dic_info][key]=[sum(dic[key][i*n:(i+1)*n])/n for i in range(int(len(dic[key])/n))] 86 | return dic_avg_info 87 | 88 | def train_plot_avg(dic_avg_info,key_avg,realization,name,save_dir): 89 | title=['('+name+')Average '+i+' with '+str(realization)+' Realizations' for i in key_avg] 90 | ylabel=['Average '+i for i in key_avg] 91 | xlabel='Training Steps (x'+str(realization)+')' 92 | n=1 93 | label=[str(i+1) for i in range(n)] 94 | 95 | for i,key in enumerate(key_avg): 96 | for j in range(n): 97 | plt.plot(np.arange(len(dic_avg_info[label[j]][key])), dic_avg_info[label[j]][key],label=label[j]) 98 | plt.legend(loc='upper right') 99 | plt.title(title[i]) 100 | plt.ylabel(ylabel[i]) 101 | plt.xlabel(xlabel) 102 | plt.savefig(save_dir+title[i]+'.png') 103 | plt.show() 104 | 105 | def test_plot_avg(dic_avg_info,key_avg,realization,name,save_dir): 106 | title=['('+name+')Average '+i+' with '+str(realization)+' Realizations' for i in key_avg] 107 | ylabel=['Average '+i for i in key_avg] 108 | xlabel='Training Steps (x'+str(realization)+')' 109 | n=len(dic_avg_info) 110 | label=[str(i+1) for i in range(n)] 111 | 112 | for i,key in enumerate(key_avg): 113 | for j in range(n): 114 | plt.plot(np.arange(len(dic_avg_info[label[j]][key])), dic_avg_info[label[j]][key],label=label[j]) 115 | plt.legend(loc='upper right') 116 | plt.title(title[i]) 117 | plt.ylabel(ylabel[i]) 118 | plt.xlabel(xlabel) 119 | plt.savefig(save_dir+title[i]+'.png') 120 | plt.show() 121 | 122 | 123 | def plot(start,lisRL,n,title,ylabel,xlabel): 124 | lisRL=lisRL[start::] 125 | avg = [sum(lisRL[i*n:(i+1*n)])/n for i in range(int(len(lisRL)/n))] 126 | plt.plot(np.arange(len(avg)), avg) 127 | plt.title(title) 128 | plt.ylabel(ylabel) 129 | plt.xlabel(xlabel) 130 | #plt.savefig(title+'.png') 131 | plt.show() 132 | 133 | def plot_individual(env,dic_info,key_plot,n): 134 | #key_plot=['Backhaul Difference','SINR','QoS Difference'] 135 | #n=100 136 | #n=10 137 | nTerm = [env.nSBS, env.nUE, env.nUE] 138 | title=[i+' with '+str(n)+' Realizations' for i in key_plot] 139 | ylabel=key_plot 140 | xlabel='Training Steps (x'+str(n)+')' 141 | label1=['(DDPG1)SBS','(DDPG1)UE','(DDPG1)UE'] 142 | label2=['(DDPG2)SBS','(DDPG2)UE','(DDPG2)UE'] 143 | label3=['(DDPG3)SBS','(DDPG3)UE','(DDPG3)UE'] 144 | label4=['(DDPG4)SBS','(DDPG4)UE','(DDPG4)UE'] 145 | label5=['(DDPG5)SBS','(DDPG5)UE','(DDPG5)UE'] 146 | label=[label1,label2,label3,label4,label5] 147 | color=['r','b','g','c','m'] 148 | linestyle=['-','--',':',':','-'] 149 | lis=['1'] 150 | save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 151 | ''' 152 | lis=['1','2','3','4','5'] 153 | 154 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 155 | difference_list=[dic_info[n][key] for n in lis] #[Method1 Method2 .. ] 156 | temp_list=[[]for i in range(5)] # number of Methods(5) 157 | for z,difference in enumerate(difference_list): 158 | for y in range(nTerm[i]): 159 | temp_list[z].append([difference[x][y] for x in range(len(difference))]) 160 | for length in range(len(temp_list[0])): 161 | for k in range(5): 162 | #print(len(temp_list[k][length]))#400 163 | Bl=[sum(temp_list[k][length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[k][length])/n))] 164 | plt.plot(np.arange(len(Bl)), Bl,label=label[k][i]+str(length),color=color[length],linestyle=linestyle[k]) 165 | ''' 166 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 167 | difference_list=[dic_info[n][key] for n in lis] #[Method1 Method2 .. ] 168 | temp_list=[[]for i in range(1)] #----------------------JUST PLOT rl 169 | for z,difference in enumerate(difference_list): 170 | for y in range(nTerm[i]): 171 | temp_list[z].append([difference[x][y] for x in range(len(difference))]) 172 | for length in range(len(temp_list[0])): 173 | for k in range(1): 174 | #print(len(temp_list[k][length]))#400 175 | Bl=[sum(temp_list[k][length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[k][length])/n))] 176 | save[key][length]=Bl 177 | plt.plot(np.arange(len(Bl)), Bl,label=label[k][i]+str(length),color=color[length],linestyle=linestyle[k]) 178 | 179 | plt.legend(loc='upper right') 180 | plt.title(title[i]) 181 | plt.ylabel(ylabel[i]) 182 | plt.xlabel(xlabel) 183 | #plt.savefig(title[i]+'.png') 184 | plt.show() 185 | return save 186 | 187 | def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir): 188 | #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput'] 189 | nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE] 190 | title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot] 191 | ylabel=key_plot 192 | xlabel='Training Steps (x'+str(n)+')' 193 | label=['SBS','UE','UE','UE'] 194 | color=['r','b','g','c','m','y','k','b'] 195 | linestyle=['-','--',':',':','-'] 196 | 197 | save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 198 | save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 199 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 200 | difference = dic_info[key] 201 | temp_list=[] 202 | for y in range(nTerm[i]): 203 | temp_list.append([difference[x][y] for x in range(len(difference))]) 204 | for length in range(len(temp_list)): 205 | Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))] 206 | save[key][length]=Bl 207 | Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))] 208 | for ori in Bl_ori: 209 | save_ori[key][length]=save_ori[key][length]+ori 210 | plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0]) 211 | 212 | plt.legend(loc='upper right') 213 | plt.title(title[i]) 214 | plt.ylabel(ylabel[i]) 215 | plt.xlabel(xlabel) 216 | plt.savefig(save_dir+title[i]+'.png') 217 | plt.show() 218 | return save,save_ori 219 | 220 | 221 | def print_info(info,s): 222 | c, P = info 223 | print('s ',s) 224 | print('c = ',c,' P in dBm=',10*np.log10(P*1000)) 225 | 226 | def test_print_info(info,s): 227 | print('s ',s) 228 | for i,data in enumerate(info): 229 | c, P = data 230 | print('[',i+1,'] c = ',c,' P in dBm=',10*np.log10(P*1000)) 231 | 232 | 233 | def writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,title,op): 234 | n=len(dic_avg_info) 235 | with open(title+'_history.csv','w',newline='') as csvfile: 236 | writer = csv.writer(csvfile) 237 | # 1)normalized 238 | for key in key_avg: 239 | for i in [str(i+1) for i in range(n)] : 240 | writer.writerow([i, '(normlaized)Average '+key ]+dic_avg_info[i][key]) 241 | # 2)original 242 | for key in dic_info_ori_key: 243 | for i in [str(i+1) for i in range(n)] : 244 | writer.writerow([i, '(original)Average '+key ]+dic_avg_info_ori[i][key]) 245 | # 3)individual 246 | if op==1: #test mode, all baselines 247 | for n_model in range(n): 248 | content=save[n_model] 249 | for key in key_individual: 250 | for i,v in enumerate(content[key]): 251 | writer.writerow(['['+str(n_model+1)+']'+str(i),key]+v) 252 | else: 253 | for key in key_individual: 254 | for i,v in enumerate(save[key]): 255 | writer.writerow([i,key]+v) 256 | 257 | def writeCSV_nobackhaul(dic_avg_info,dic_avg_info_ori,dic_info_ori_key,key_avg,title): 258 | n=len(dic_avg_info) 259 | with open(title+'_history.csv','w',newline='') as csvfile: 260 | writer = csv.writer(csvfile) 261 | # 1)normalized 262 | for key in key_avg: 263 | for i in [str(i+1) for i in range(n)] : 264 | writer.writerow([i, '(normlaized)Average '+key ]+dic_avg_info[i][key]) 265 | # 2)original 266 | for key in dic_info_ori_key: 267 | for i in [str(i+1) for i in range(n)] : 268 | writer.writerow([i, '(original)Average '+key ]+dic_avg_info_ori[i][key]) 269 | 270 | def writeEE(debug_dic_info_EE,debug_dic_info_EE_key,title): 271 | n=len(debug_dic_info_EE) 272 | with open(title+'_EE.csv','w',newline='') as csvfile: 273 | writer = csv.writer(csvfile) 274 | # 1)normalized 275 | for key in debug_dic_info_EE_key: 276 | for iMethod in [str(i+1) for i in range(n)] : 277 | writer.writerow([iMethod, key ]+debug_dic_info_EE[iMethod][key]) 278 | 279 | def writeI(debug_dic_info_I,debug_dic_info_I_key,nUE,title): 280 | n=len(debug_dic_info_I) 281 | with open(title+'_I.csv','w',newline='') as csvfile: 282 | writer = csv.writer(csvfile) 283 | # 1)normalized 284 | for key in debug_dic_info_I_key: 285 | for iMETHOD in [str(i+1) for i in range(n)] : 286 | for iUE in ['UE'+str(k) for k in range(nUE) ]: 287 | writer.writerow([ key,iMETHOD,iUE ]+debug_dic_info_I[iMETHOD][key][iUE]) 288 | 289 | def writeAction(debug_dic_info_action,debug_dic_info_action_key,title): 290 | n_baseline=len(debug_dic_info_action) 291 | with open(title+'_Action.csv','w',newline='') as csvfile: 292 | writer = csv.writer(csvfile) 293 | for iMethod in [str(i+1) for i in range(n_baseline)] : 294 | for episdoe,content in enumerate (debug_dic_info_action[iMethod]['Association']): 295 | for tstep,this in enumerate(content): 296 | writer.writerow([iMethod, 'Association', episdoe, tstep ]+this) 297 | writer.writerow([iMethod, 'Power Allocation', episdoe, tstep ]+list(debug_dic_info_action[iMethod]['Power Allocation'][episdoe][tstep])) 298 | 299 | 300 | def findinf_list(lis): 301 | dic={'inf_index_list':[],'non_inf_list':[]} 302 | for i,value in enumerate(lis): 303 | if math.isinf(float(str(value))): 304 | dic['inf_index_list'].append(i) 305 | else: 306 | dic['non_inf_list'].append(value) 307 | return dic 308 | 309 | def writeCSI(name,train_channel_episode): 310 | with open(name+'.csv','w',newline='') as csvfile: 311 | writer = csv.writer(csvfile) 312 | for G in train_channel_episode: 313 | for i in list(G): 314 | writer.writerow(i) 315 | 316 | def readCSI(name,nSBS,nUE,episode): 317 | with open(name+'.csv', newline='') as csvfile: 318 | rows = csv.reader(csvfile) 319 | rows = list(rows) 320 | lis_G=[] 321 | start=0 322 | for i in range(episode): 323 | G=np.array([float(i) for lis in rows[start:start+nSBS] for i in lis]).reshape(nSBS,-1) 324 | G=G[:,0:nUE+1] 325 | start=start+nSBS 326 | lis_G.append(G) 327 | return lis_G 328 | 329 | 330 | 331 | 332 | ''' 333 | def writeBackhaulHistory(name,MAXepisode,debug_episode_back): 334 | back=np.zeros((MAXepisode,)) 335 | back[debug_episode_back]=1 336 | with open(name+'.csv','w',newline='') as csvfile: 337 | writer = csv.writer(csvfile) 338 | writer.writerow(back) 339 | 340 | 341 | 342 | def readBackhaulHistory(name): 343 | with open('test_HistoryforBackhaulViolation.csv', newline='') as csvfile: 344 | rows = csv.reader(csvfile) 345 | rows = list(rows)[0] 346 | rows = [float(i) for i in rows] 347 | return rows 348 | 349 | def plot_violateBackhaul(MAXepisode,debug_episode_back,name,save_dir): 350 | title='('+name+')History of Backhaul Constraint Violation' 351 | xlabel='Steps' 352 | back=np.zeros((MAXepisode,)) 353 | back[debug_episode_back]=1 354 | plt.plot(np.arange(MAXepisode),back) 355 | plt.title(title) 356 | #plt.ylabel(ylabel[i]) 357 | plt.xlabel(xlabel) 358 | plt.savefig(save_dir+title+'.png') 359 | plt.show() 360 | ''' 361 | def writeConstraintHistory(name,episode,debug_episode_back,mode): 362 | if mode == 0: # backhaul 363 | back=np.zeros((episode,)) 364 | back[debug_episode_back]=1 365 | with open(name+'_HistoryforBackhaulViolation.csv','w',newline='') as csvfile: 366 | writer = csv.writer(csvfile) 367 | writer.writerow(back) 368 | else: #QoS 369 | with open(name+'_HistoryforQoSsatisfication.csv','w',newline='') as csvfile: 370 | writer = csv.writer(csvfile) 371 | writer.writerow(debug_episode_back) 372 | 373 | def writeConstraintHistory_v2(name,episode,debug_episode_back,mode): 374 | if mode == 0: # backhaul 375 | back=np.zeros((episode,)) 376 | back[debug_episode_back]=1 377 | with open(name+'_HistoryforBackhaulViolation.csv','w',newline='') as csvfile: 378 | writer = csv.writer(csvfile) 379 | writer.writerow(back) 380 | else: #QoS 381 | with open(name+'_HistoryforQoSsatisfication.csv','w',newline='') as csvfile: 382 | writer = csv.writer(csvfile) 383 | for i in debug_episode_back: 384 | writer.writerow(debug_episode_back[i]) 385 | 386 | def readConstraintHistory(name,mode): 387 | if mode == 0: # backhaul 388 | filename=name+'_HistoryforBackhaulViolation' 389 | else:#QoS 390 | filename=name+'_HistoryforQoSsatisfication' 391 | with open(filename+'.csv', newline='') as csvfile: 392 | rows = csv.reader(csvfile) 393 | rows = list(rows)[0] 394 | rows = [float(i) for i in rows] 395 | return rows 396 | 397 | def plot_constraint(episode,debug_episode_back,name,save_dir,mode): 398 | if mode ==0: # backhaul 399 | title='('+name+')History of Backhaul Constraint Violation' 400 | back=np.zeros((episode,)) 401 | back[debug_episode_back]=1 402 | xlabel='Steps' 403 | plt.plot(np.arange(episode),back) 404 | plt.title(title) 405 | else:#QoS 406 | title='('+name+')History of QoS Satisfication' 407 | xlabel='Steps' 408 | plt.plot(np.arange(len(debug_episode_back)),debug_episode_back) 409 | plt.title(title) 410 | plt.xlabel(xlabel) 411 | plt.savefig(save_dir+title+'.png') 412 | plt.show() 413 | 414 | 415 | def p_normalize(clip,P_NN): 416 | # for sigmoid: have added noise ------------------------------------------- 417 | P = np.array([ np.clip(power*clip, 0, clip) for power in P_NN ]) 418 | return P 419 | 420 | -------------------------------------------------------------------------------- /train_DQN.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | """ 3 | Created on Sat Jun 1 16:54:41 2019 4 | 5 | @author: kuo 6 | """ 7 | 8 | import time 9 | import numpy as np 10 | import os 11 | import scipy.stats as st 12 | import copy 13 | os.chdir('/home/chan/PDQN/') 14 | from pdqn import PDQNAgent 15 | from DQN import DQNAgent 16 | from env import env_PowerAllocation 17 | import tool as t 18 | 19 | #import tool 20 | os.environ['CUDA_VISIBLE_DEVICES']='0' 21 | 22 | 23 | 24 | 25 | #%% 26 | if __name__ == '__main__': 27 | # PDQN===================================================================== 28 | batch_size=128 29 | initial_memory_threshold=128 #1000 # Number of transitions required to start learning. 30 | replay_memory_size=20000 # Replay memory transition capacity 31 | epsilon_initial=1 32 | epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon 33 | epsilon_final=0.01 # Final epsilon value 34 | gamma=0.95 35 | clip_grad=1 # Parameter gradient clipping limit 36 | inverting_gradients=True # Use inverting gradients scheme instead of squashing function 37 | seed=0 #0 #Random seed 38 | # 1) ParamActor------------------------------------------------------------ 39 | layers_actor_param =[256]#[64,256]#(256,) # 5-- --1050 40 | actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"} 41 | learning_rate_actor_param=0.00001 42 | tau_actor_param=0.001 43 | """loss func for actor_parameter """ 44 | average=False # Average weighted loss function 45 | weighted=False # Naive weighted loss function 46 | random_weighted=False # Randomly weighted loss function 47 | indexed=False # Indexed loss function 48 | zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action 49 | # 2) Actor----------------------------------------------------------------- 50 | tau_actor=0.1 51 | learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output 52 | action_input_layer=0# Which layer to input action parameters-- useless? 53 | layers_actor=[512,128,16]#(256,)# # 1055-- --5 # # Hidden layers 54 | actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"} 55 | #-------------------------------------------------------------------------- 56 | # Performance 57 | dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput'] 58 | dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] } 59 | dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference'] 60 | dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] } 61 | 62 | a_info={'c':[],'P':[]} 63 | dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]} 64 | dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]} 65 | num_back=0 66 | QoS_R=[] 67 | #-------------------------------------------------------------------------- 68 | # debug 69 | debug_PNN=[] 70 | debug_backhaul=[] 71 | debug_BSbackhaul=[] 72 | debug_episode_back=[] 73 | #train_channel_episode=[] 74 | ############################################################################ change this 75 | scale_actions = True 76 | initialise_params = False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam 77 | use_ornstein_noise=True#True # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 78 | save_freq = 100#0 # How often to save models (0 = never) 79 | title="PDQN_cc_s11_r11_0dB_N3_1"#"PDQN2"#"PDQN_backhaul" # Prefix of output files 80 | save_dir ="results_DQN_5v3" #Output directory 81 | load = False 82 | load_dir ="results/"+title+"0" 83 | load_num="999" 84 | threshold = 0.005#1e-3 85 | start_episode=0 86 | MAXepisode = 80000#600#20000 87 | MAXstep = 100#150 88 | # evaluation_episodes=1000 # Episodes over which to evaluate after training 89 | realization=500#100 90 | lambda1=0.43#0.53#1 91 | lambda2=0.16#0.05#0.42#0.8 92 | lambda3=0#0.1#0.3#0 93 | mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv' 94 | scenario_name = 'EnvInfo_3' 95 | mean_flage=False 96 | ########################################################################### 97 | #%% ENV 98 | env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=1) 99 | #-------------------------------------------------------------------------- Choose Network Geometry 100 | env.load(name=scenario_name) # use the previous one 101 | #-------------------------------------------------------------------------- mean_std 102 | env.mean_std(10**6,mean_flage,mean_name)#calculate(True) or load(False) 103 | num_actions = env.action_space[0] 104 | s_dim = env.nUE 105 | #%% PDQN 106 | # save model -------------------------------------------------------------- 107 | if save_freq > 0 and save_dir: 108 | save_dir = os.path.join(save_dir, title + "{}".format(str(seed))) 109 | os.makedirs(save_dir, exist_ok=True) 110 | 111 | agent_class = PDQNAgent 112 | agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 113 | batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param, # 0.001 114 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 115 | clip_grad=clip_grad,indexed=indexed,average=average, 116 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 117 | tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold, 118 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 119 | actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs, 120 | zero_index_gradients=zero_index_gradients,seed=seed) 121 | 122 | power_level=5 123 | agent_classDQN = DQNAgent 124 | agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 125 | power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor, # 0.001 126 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 127 | clip_grad=clip_grad,indexed=indexed,average=average, 128 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 129 | initial_memory_threshold=initial_memory_threshold, 130 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 131 | actor_kwargs=actor_kwargs, 132 | zero_index_gradients=zero_index_gradients,seed=seed) 133 | #agentDQN.action_decoder(5, env.P_Max_SBS) 134 | 135 | # 0) add bias to ActorParm by initialize bias of paaathrough -------------- 136 | # desired bias 137 | initial_params_ = list(np.random.uniform(0,env.P_Max_SBS,num_actions*5)) 138 | # change the original parameter range to [-1,1] 139 | if scale_actions: 140 | for a in range (num_actions*5): 141 | initial_params_[a] = 2. * (initial_params_[a] - 0) / (env.P_Max_SBS - 0) - 1. 142 | # initilize bias 143 | if initialise_params: 144 | initial_weights = np.zeros((num_actions*5,s_dim))#np.zeros((env.action_space.spaces[0].n, env.observation_space.spaces[0].shape[0])) 145 | initial_bias = np.zeros(num_actions*5)#np.zeros(env.action_space.spaces[0].n) 146 | for a in range (num_actions*5):#(env.action_space.spaces[0].n): 147 | initial_bias[a] = initial_params_[a] 148 | agent.set_action_parameter_passthrough_weights(initial_weights, initial_bias) 149 | 150 | 151 | start_time = time.time() 152 | total_step=start_episode*MAXstep 153 | cont = True 154 | episode=0 155 | episode_r_list=[] 156 | #=========================================================================== load existing model to train 157 | #load_dir='results_53/PDQN_cc_s11_r9_0dB_N3_20' 158 | #load_num='1_done' 159 | #agent.load_models(prefix = os.path.join(load_dir, load_num)) 160 | #=========================================================================== 161 | while cont: # episode 162 | episode=episode+1 163 | print(episode, 'episode--------------------------') 164 | # save model 165 | if save_freq > 0 and save_dir and episode % save_freq == 0: 166 | agentDQN.save_models(os.path.join(save_dir, str(episode))) 167 | # reset 168 | s = env.reset() 169 | s = np.array(list(s), dtype=np.float32, copy=False) 170 | # 1) take an action---------------------------------------------------- 171 | a1 = agentDQN.act(s) 172 | c1,P1=agentDQN.action_decoder(a1, env.P_Max_SBS) 173 | 174 | #train_channel_episode.append(env.G) 175 | episode_r=[] 176 | tstep=0 177 | if total_step>100000: 178 | break 179 | while True: # step 180 | tstep = tstep + 1 181 | total_step = total_step + 1 182 | print('Iteration '+str(total_step)+'=======================================') 183 | # 2) step -- next state, reward, done------------------------------ 184 | #================================================================== 185 | info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step_train(c1,P1,False,True,True,episode-1,tstep-1) 186 | s_ = np.array(list(s_), dtype=np.float32, copy=False) 187 | R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1 =info1 188 | Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD_ori1=info_ori1 189 | Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1 190 | Throughput_SBS_threshold,Throughput_BS = debug_info1 191 | debug_backhaul.append(Throughput_SBS_threshold) 192 | debug_BSbackhaul.append(Throughput_BS) 193 | QoS_R.append(QoS_R1) 194 | #================================================================== 195 | # 3) take an action------------------------------------------------ 196 | a1_ = agentDQN.act(s_) 197 | c1_,P1_=agentDQN.action_decoder(a1_, env.P_Max_SBS) 198 | # 4) learn--------------------------------------------------------- 199 | agentDQN.step(s, (a1), R1, s_, (a1_), done1 ) 200 | dic_store['a'].append([c1]+P1) 201 | dic_store['r'].append(R1) 202 | dic_store['ddpg_s'].append(s) 203 | 204 | # 5) Print and store info ------------------------------------------ 205 | key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 206 | key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput'] 207 | 208 | dic_info = t.inst_info(dic_info,(key_info,key_info_lis),((info1,info1),lis_info1),1) 209 | dic_info_ori = t.inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori1),0) 210 | 211 | key_inst=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 212 | t.print_info((env.UE2TP[c1],P1),s) # print p in dB 213 | a_info['c'].append(env.UE2TP[c1]) 214 | a_info['P'].append(10*np.log10(P1*1000)) 215 | 216 | episode_r.append(R1) 217 | 218 | # 6) update -------------------------------------------------------- 219 | c1, P1 = c1_, P1_ 220 | s = s_ 221 | # number of backhaul constraint violation 222 | if done1: 223 | num_back=num_back+1 224 | debug_episode_back.append(episode) 225 | # check if end the episode 226 | if (tstep>=MAXstep) or done1: 227 | break 228 | 229 | agentDQN.end_episode() 230 | episode_r_list.append(np.mean([episode_r])) 231 | # check if end the training 232 | if (episode>=MAXepisode) : 233 | print('MAXepisode') 234 | cont=False 235 | if (episode>100): 236 | m = np.mean([episode_r_list[episode-100:episode-1]]) 237 | not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold] 238 | if sum(not_convergence)==0: 239 | print('Convergence') 240 | cont=False 241 | #%% end training 242 | end_time = time.time() 243 | if episode>=MAXepisode: 244 | print('MAXepisode') 245 | else: 246 | print('episode=',episode) 247 | 248 | print("Training took %.2f seconds" % (end_time - start_time)) 249 | print('(violate)num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%') 250 | num_QoS=sum([1 for k in QoS_R if k==1 ]) 251 | print('(follow) Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%') 252 | 253 | # debug for constraints about backhaul 254 | #debug_episode_back = [i-1 for i in debug_episode_back] 255 | #t.plot_constraint(MAXepisode,debug_episode_back,'train',save_dir+'/',0) 256 | #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,debug_episode_back,0) 257 | #t.plot_constraint(MAXepisode,QoS_R,'train',save_dir+'/',1) 258 | #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,QoS_R,1) 259 | 260 | #%% 261 | # save model 262 | if save_freq > 0 and save_dir: 263 | agentDQN.save_models(os.path.join(save_dir, '_done')) 264 | 265 | #%% 266 | # 7) Average per realization steps and Save -------------------------------- 267 | key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 268 | dic_avg_info = t.train_avg_info(dic_info,key_avg,realization) 269 | t.train_plot_avg(dic_avg_info,key_avg,realization,'normalize',save_dir+'/train_') 270 | #------------------------------------------------------------------------- 271 | dic_avg_info_ori = t.train_avg_info(dic_info_ori,dic_info_ori_key,realization) 272 | t.train_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',save_dir+'/train_') 273 | 274 | 275 | 276 | #%% 277 | import matplotlib.pyplot as plt 278 | def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir): 279 | #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput'] 280 | nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE] 281 | title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot] 282 | ylabel=key_plot 283 | xlabel='Training Steps (x'+str(n)+')' 284 | label=['SBS','UE','UE','UE'] 285 | color=['r','b','g','c','m','y','k','b'] 286 | linestyle=['-','--',':',':','-'] 287 | 288 | save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 289 | save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 290 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 291 | difference = dic_info[key] 292 | temp_list=[] 293 | for y in range(nTerm[i]): 294 | temp_list.append([difference[x][y] for x in range(len(difference))]) 295 | for length in range(len(temp_list)): 296 | Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))] 297 | save[key][length]=Bl 298 | Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))] 299 | for ori in Bl_ori: 300 | save_ori[key][length]=save_ori[key][length]+ori 301 | plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0]) 302 | 303 | plt.legend(loc='upper right') 304 | plt.title(title[i]) 305 | plt.ylabel(ylabel[i]) 306 | plt.xlabel(xlabel) 307 | plt.savefig(save_dir+title[i]+'.png') 308 | plt.show() 309 | return save,save_ori 310 | 311 | # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE-------------- 312 | key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput'] 313 | save,save_ori= test_plot_individual(env,dic_info['1'],'1',key_individual,realization,save_dir+'/train_') 314 | # 9) write info ------------------------------------------------------------ 315 | # average info 316 | t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,save_dir+'/train',0) 317 | # original 318 | #t.writeCSV(dic_info,dic_info_ori,save_ori,dic_info_ori_key,key_individual,key_avg,save_dir+'/train_original',0) 319 | 320 | #%% debug 321 | #t.writeCSI(save_dir+'/CSI',train_channel_episode) 322 | debug_I = env.debug_I 323 | debug_UE_throughput = env.debug_UE_throughput # each UE throughput 324 | debug_SBS_throughput = env.debug_SBS_throughput 325 | debug_SBS_threshold = env.debug_SBS_threshold 326 | debug_c = env.debug_c 327 | debug_p = env.debug_p 328 | debug_backhaul = env.debug_backhaul 329 | debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index 330 | #%% test actual converage range 331 | threshold=7.5 332 | m = np.mean([episode_r_list[episode-100:episode-1]]) 333 | not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold] 334 | if sum(not_convergence)==0: 335 | print('Convergence') 336 | else: 337 | print('not') -------------------------------------------------------------------------------- /train_PDQN.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | """ 3 | Created on Sat Jun 1 16:54:41 2019 4 | 5 | @author: kuo 6 | """ 7 | 8 | import time 9 | import numpy as np 10 | import os 11 | import scipy.stats as st 12 | import copy 13 | os.chdir('/home/chan/PDQN/') 14 | from pdqn import PDQNAgent 15 | from env import env_PowerAllocation 16 | import tool as t 17 | 18 | #import tool 19 | os.environ['CUDA_VISIBLE_DEVICES']='0' 20 | 21 | 22 | 23 | 24 | #%% 25 | if __name__ == '__main__': 26 | # PDQN===================================================================== 27 | batch_size=128 28 | initial_memory_threshold=128 #1000 # Number of transitions required to start learning. 29 | replay_memory_size=20000 # Replay memory transition capacity 30 | epsilon_initial=1 31 | epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon 32 | epsilon_final=0.01 # Final epsilon value 33 | gamma=0.95 34 | clip_grad=1 # Parameter gradient clipping limit 35 | inverting_gradients=True # Use inverting gradients scheme instead of squashing function 36 | seed=0 #0 #Random seed 37 | # 1) ParamActor------------------------------------------------------------ 38 | layers_actor_param =[256]#[64,256]#(256,) # 5-- --1050 39 | actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"} 40 | learning_rate_actor_param=0.00001 41 | tau_actor_param=0.001 42 | """loss func for actor_parameter """ 43 | average=False # Average weighted loss function 44 | weighted=False # Naive weighted loss function 45 | random_weighted=False # Randomly weighted loss function 46 | indexed=False # Indexed loss function 47 | zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action 48 | # 2) Actor----------------------------------------------------------------- 49 | tau_actor=0.1 50 | learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output 51 | action_input_layer=0# Which layer to input action parameters-- useless? 52 | layers_actor=[512,128,16]#(256,)# # 1055-- --5 # # Hidden layers 53 | actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"} 54 | #-------------------------------------------------------------------------- 55 | # Performance 56 | dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput'] 57 | dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] } 58 | dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference'] 59 | dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] } 60 | 61 | a_info={'c':[],'P':[]} 62 | dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]} 63 | dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]} 64 | num_back=0 65 | QoS_R=[] 66 | #-------------------------------------------------------------------------- 67 | # debug 68 | debug_PNN=[] 69 | debug_backhaul=[] 70 | debug_BSbackhaul=[] 71 | debug_episode_back=[] 72 | train_channel_episode=[] 73 | ############################################################################ change this 74 | scale_actions = True 75 | initialise_params = False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam 76 | use_ornstein_noise=True#True # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 77 | save_freq = 100#0 # How often to save models (0 = never) 78 | title="PDQN_cc_s11_r11_0dB_N3_1"#"PDQN2"#"PDQN_backhaul" # Prefix of output files 79 | save_dir ="results_PDQN_5v3" #Output directory 80 | load = False 81 | load_dir ="results/"+title+"0" 82 | load_num="999" 83 | threshold = 0.005#1e-3 84 | start_episode=0 85 | MAXepisode = 100000#600#20000 86 | MAXstep = 100#150 87 | # evaluation_episodes=1000 # Episodes over which to evaluate after training 88 | realization=500#100 89 | lambda1=0.43#0.53#1 90 | lambda2=0.16#0.05#0.42#0.8 91 | lambda3=0#0.1#0.3#0 92 | mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv' 93 | scenario_name = 'EnvInfo_3' 94 | mean_flage=False 95 | ########################################################################### 96 | #%% ENV 97 | env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=1) 98 | #-------------------------------------------------------------------------- Choose Network Geometry 99 | env.load(name=scenario_name) # use the previous one 100 | #-------------------------------------------------------------------------- mean_std 101 | env.mean_std(10**6,mean_flage,mean_name)#calculate(True) or load(False) 102 | num_actions = env.action_space[0] 103 | s_dim = env.nUE 104 | #%% PDQN 105 | # save model -------------------------------------------------------------- 106 | if save_freq > 0 and save_dir: 107 | save_dir = os.path.join(save_dir, title + "{}".format(str(seed))) 108 | os.makedirs(save_dir, exist_ok=True) 109 | 110 | agent_class = PDQNAgent 111 | agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space, 112 | batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param, # 0.001 113 | epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma, 114 | clip_grad=clip_grad,indexed=indexed,average=average, 115 | random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted, 116 | tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold, 117 | use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients, 118 | actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs, 119 | zero_index_gradients=zero_index_gradients,seed=seed) 120 | 121 | 122 | # 0) add bias to ActorParm by initialize bias of paaathrough -------------- 123 | # desired bias 124 | initial_params_ = list(np.random.uniform(0,env.P_Max_SBS,num_actions*5)) 125 | # change the original parameter range to [-1,1] 126 | if scale_actions: 127 | for a in range (num_actions*5): 128 | initial_params_[a] = 2. * (initial_params_[a] - 0) / (env.P_Max_SBS - 0) - 1. 129 | # initilize bias 130 | if initialise_params: 131 | initial_weights = np.zeros((num_actions*5,s_dim))#np.zeros((env.action_space.spaces[0].n, env.observation_space.spaces[0].shape[0])) 132 | initial_bias = np.zeros(num_actions*5)#np.zeros(env.action_space.spaces[0].n) 133 | for a in range (num_actions*5):#(env.action_space.spaces[0].n): 134 | initial_bias[a] = initial_params_[a] 135 | agent.set_action_parameter_passthrough_weights(initial_weights, initial_bias) 136 | 137 | 138 | start_time = time.time() 139 | total_step=start_episode*MAXstep 140 | cont = True 141 | episode=0 142 | episode_r_list=[] 143 | #=========================================================================== load existing model to train 144 | #load_dir='results_53/PDQN_cc_s11_r9_0dB_N3_20' 145 | #load_num='1_done' 146 | #agent.load_models(prefix = os.path.join(load_dir, load_num)) 147 | #=========================================================================== 148 | while cont: # episode 149 | 150 | episode=episode+1 151 | print(episode, 'episode--------------------------') 152 | # save model 153 | if save_freq > 0 and save_dir and episode % save_freq == 0: 154 | agent.save_models(os.path.join(save_dir, str(episode))) 155 | # reset 156 | s = env.reset() 157 | s = np.array(list(s), dtype=np.float32, copy=False) 158 | # 1) take an action---------------------------------------------------- 159 | c1, PNN1, all_action_parameters = agent.act(s) 160 | P1 = t.p_normalize(env.P_Max_SBS,PNN1) 161 | 162 | train_channel_episode.append(env.G) 163 | episode_r=[] 164 | tstep=0 165 | if total_step>50000: 166 | break 167 | while True: # step 168 | tstep = tstep + 1 169 | total_step = total_step + 1 170 | print('Iteration '+str(total_step)+'=======================================') 171 | # 2) step -- next state, reward, done------------------------------ 172 | #================================================================== 173 | info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step_train(c1,P1,False,True,True,episode-1,tstep-1) 174 | s_ = np.array(list(s_), dtype=np.float32, copy=False) 175 | R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1 =info1 176 | Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD_ori1=info_ori1 177 | Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1 178 | Throughput_SBS_threshold,Throughput_BS = debug_info1 179 | debug_backhaul.append(Throughput_SBS_threshold) 180 | debug_BSbackhaul.append(Throughput_BS) 181 | QoS_R.append(QoS_R1) 182 | #================================================================== 183 | # 3) take an action------------------------------------------------ 184 | c1_, PNN1_, all_action_parameters_ = agent.act(s_) 185 | P1_ = t.p_normalize(env.P_Max_SBS,PNN1_) 186 | # 4) learn--------------------------------------------------------- 187 | agent.step(s, (c1, all_action_parameters), R1, s_, (c1_, all_action_parameters_), done1 ) 188 | dic_store['a'].append([c1]+P1) 189 | dic_store['r'].append(R1) 190 | dic_store['ddpg_s'].append(s) 191 | debug_PNN.append(PNN1) 192 | # 5) Print and store info ------------------------------------------ 193 | key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 194 | key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput'] 195 | 196 | dic_info = t.inst_info(dic_info,(key_info,key_info_lis),((info1,info1),lis_info1),1) 197 | dic_info_ori = t.inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori1),0) 198 | 199 | key_inst=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 200 | t.print_info((env.UE2TP[c1],P1),s) # print p in dB 201 | a_info['c'].append(env.UE2TP[c1]) 202 | a_info['P'].append(10*np.log10(P1*1000)) 203 | 204 | episode_r.append(R1) 205 | 206 | # 6) update -------------------------------------------------------- 207 | c1, P1, all_action_parameters = c1_, P1_, all_action_parameters_ 208 | s = s_ 209 | # number of backhaul constraint violation 210 | if done1: 211 | num_back=num_back+1 212 | debug_episode_back.append(episode) 213 | # check if end the episode 214 | if (tstep>=MAXstep) or done1: 215 | break 216 | 217 | agent.end_episode() 218 | episode_r_list.append(np.mean([episode_r])) 219 | # check if end the training 220 | if (episode>=MAXepisode) : 221 | print('MAXepisode') 222 | cont=False 223 | if (episode>100): 224 | m = np.mean([episode_r_list[episode-100:episode-1]]) 225 | not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold] 226 | if sum(not_convergence)==0: 227 | print('Convergence') 228 | cont=False 229 | #%% end training 230 | end_time = time.time() 231 | if episode>=MAXepisode: 232 | print('MAXepisode') 233 | else: 234 | print('episode=',episode) 235 | 236 | print("Training took %.2f seconds" % (end_time - start_time)) 237 | print('(violate)num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%') 238 | num_QoS=sum([1 for k in QoS_R if k==1 ]) 239 | print('(follow) Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%') 240 | 241 | # debug for constraints about backhaul 242 | #debug_episode_back = [i-1 for i in debug_episode_back] 243 | #t.plot_constraint(MAXepisode,debug_episode_back,'train',save_dir+'/',0) 244 | #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,debug_episode_back,0) 245 | #t.plot_constraint(MAXepisode,QoS_R,'train',save_dir+'/',1) 246 | #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,QoS_R,1) 247 | 248 | #%% 249 | # save model 250 | if save_freq > 0 and save_dir: 251 | agent.save_models(os.path.join(save_dir, '_done')) 252 | 253 | #%% 254 | # 7) Average per realization steps and Save -------------------------------- 255 | key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference'] 256 | dic_avg_info = t.train_avg_info(dic_info,key_avg,realization) 257 | t.train_plot_avg(dic_avg_info,key_avg,realization,'normalize',save_dir+'/train_') 258 | #------------------------------------------------------------------------- 259 | dic_avg_info_ori = t.train_avg_info(dic_info_ori,dic_info_ori_key,realization) 260 | t.train_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',save_dir+'/train_') 261 | 262 | 263 | 264 | #%% 265 | import matplotlib.pyplot as plt 266 | def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir): 267 | #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput'] 268 | nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE] 269 | title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot] 270 | ylabel=key_plot 271 | xlabel='Training Steps (x'+str(n)+')' 272 | label=['SBS','UE','UE','UE'] 273 | color=['r','b','g','c','m','y','k','b'] 274 | linestyle=['-','--',':',':','-'] 275 | 276 | save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 277 | save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)} 278 | for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods 279 | difference = dic_info[key] 280 | temp_list=[] 281 | for y in range(nTerm[i]): 282 | temp_list.append([difference[x][y] for x in range(len(difference))]) 283 | for length in range(len(temp_list)): 284 | Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))] 285 | save[key][length]=Bl 286 | Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))] 287 | for ori in Bl_ori: 288 | save_ori[key][length]=save_ori[key][length]+ori 289 | plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0]) 290 | 291 | plt.legend(loc='upper right') 292 | plt.title(title[i]) 293 | plt.ylabel(ylabel[i]) 294 | plt.xlabel(xlabel) 295 | plt.savefig(save_dir+title[i]+'.png') 296 | plt.show() 297 | return save,save_ori 298 | 299 | # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE-------------- 300 | key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput'] 301 | save,save_ori= test_plot_individual(env,dic_info['1'],'1',key_individual,realization,save_dir+'/train_') 302 | # 9) write info ------------------------------------------------------------ 303 | # average info 304 | t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,save_dir+'/train',0) 305 | # original 306 | #t.writeCSV(dic_info,dic_info_ori,save_ori,dic_info_ori_key,key_individual,key_avg,save_dir+'/train_original',0) 307 | 308 | #%% debug 309 | t.writeCSI(save_dir+'/CSI',train_channel_episode) 310 | debug_I = env.debug_I 311 | debug_UE_throughput = env.debug_UE_throughput # each UE throughput 312 | debug_SBS_throughput = env.debug_SBS_throughput 313 | debug_SBS_threshold = env.debug_SBS_threshold 314 | debug_c = env.debug_c 315 | debug_p = env.debug_p 316 | debug_backhaul = env.debug_backhaul 317 | debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index 318 | #%% test actual converage range 319 | threshold=7.5 320 | m = np.mean([episode_r_list[episode-100:episode-1]]) 321 | not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold] 322 | if sum(not_convergence)==0: 323 | print('Convergence') 324 | else: 325 | print('not') -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | def soft_update_target_network(source_network, target_network, tau): 2 | for target_param, param in zip(target_network.parameters(), source_network.parameters()): 3 | target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data) 4 | 5 | 6 | def hard_update_target_network(source_network, target_network): 7 | for target_param, param in zip(target_network.parameters(), source_network.parameters()): 8 | target_param.data.copy_(param.data) 9 | -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/noise.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/utils/__pycache__/noise.cpython-37.pyc -------------------------------------------------------------------------------- /utils/noise.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class OrnsteinUhlenbeckActionNoise(object): 5 | """ 6 | Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab 7 | Source: https://github.com/vy007vikas/PyTorch-ActorCriticRL/blob/master/utils.py 8 | """ 9 | 10 | def __init__(self, action_dim, mu=0, theta=0.15, sigma=0.2, random_machine=np.random): 11 | super(OrnsteinUhlenbeckActionNoise, self).__init__() 12 | self.random = random_machine 13 | self.action_dim = action_dim 14 | self.mu = mu 15 | self.theta = theta 16 | self.sigma = sigma 17 | self.X = np.ones(self.action_dim) * self.mu 18 | 19 | def reset(self): 20 | self.X = np.ones(self.action_dim) * self.mu 21 | 22 | def sample(self): 23 | dx = self.theta * (self.mu - self.X) 24 | dx = dx + self.sigma * self.random.randn(len(self.X)) 25 | self.X = self.X + dx 26 | return self.X 27 | --------------------------------------------------------------------------------