├── CSI.csv
├── DQN.py
├── EnvInfo_3.csv
├── README.md
├── __pycache__
    ├── DQN.cpython-37.pyc
    ├── agent.cpython-37.pyc
    ├── env.cpython-37.pyc
    ├── pdqn.cpython-37.pyc
    └── tool.cpython-37.pyc
├── agent.py
├── env.py
├── mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv
├── memory
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-37.pyc
    │   └── memory.cpython-37.pyc
    └── memory.py
├── pdqn.py
├── test_DQN.py
├── tool.py
├── train_DQN.py
├── train_PDQN.py
└── utils
    ├── __init__.py
    ├── __pycache__
        ├── __init__.cpython-37.pyc
        └── noise.cpython-37.pyc
    └── noise.py


/DQN.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | import numpy as np
  7 | import random
  8 | from collections import Counter
  9 | from torch.autograd import Variable
 10 | import time
 11 | import scipy.stats as st
 12 | import copy
 13 | import matplotlib.pyplot as plt
 14 | import os 
 15 | os.chdir('/home/chan/PDQN/') 
 16 | os.environ['CUDA_VISIBLE_DEVICES']='1'
 17 | from agent import Agent
 18 | from memory.memory import Memory
 19 | #from memory import Memory
 20 | from utils import soft_update_target_network, hard_update_target_network
 21 | from utils.noise import OrnsteinUhlenbeckActionNoise
 22 | from env import env_PowerAllocation
 23 | import tool as t
 24 | 
 25 | 
 26 | 
 27 | class DQNActor(nn.Module):
 28 | 
 29 |     def __init__(self, state_size, action_size, power_level, hidden_layers=(100,),
 30 |                  output_layer_init_std=None, activation="relu", **kwargs):
 31 |         super(DQNActor, self).__init__()
 32 |         self.state_size = state_size
 33 |         self.action_size = action_size
 34 |         self.activation = activation
 35 |         self.power_level=power_level
 36 |         #self.state_size-----------------------
 37 |         # version 1 (hidden layer >= 2)
 38 |         # create layers -------------------------------------------------------
 39 |         self.layers = nn.ModuleList()
 40 |         # 1-0) state input layer - 1st hidden layer    
 41 |         self.state_input_layer = nn.Linear(self.state_size , hidden_layers[0])      
 42 |         # 1-1) action input layer - 2nd hidden layer 
 43 | 
 44 |         # 1-2) all hidden layer
 45 |         nh = len(hidden_layers)
 46 |         for i in range(1,nh):
 47 |             self.layers.append(nn.Linear(hidden_layers[i - 1], hidden_layers[i]))
 48 |         # 1-3) the last hidden layer - output layer (action_size) -- A(s,a)
 49 |         self.layers.append(nn.Linear(hidden_layers[nh-1], self.action_size))
 50 |         # 1-4) the last hidden layer - output layer (1) -- V(s)
 51 |         self.value_layer = nn.Linear(hidden_layers[nh-1], self.action_size)
 52 |         
 53 |         # initialise layer weights --------------------------------------------
 54 |         # 1-0) all layers except the last layer -- He initialization / zero initialzation
 55 |         nn.init.kaiming_normal_(self.state_input_layer.weight, nonlinearity=activation)
 56 |         nn.init.zeros_(self.state_input_layer.bias)
 57 |         for i in range(0, len(self.layers) - 1):
 58 |             nn.init.kaiming_normal_(self.layers[i].weight, nonlinearity=activation)
 59 |             nn.init.zeros_(self.layers[i].bias)
 60 |         # 1-1) the last layer for A(s,a) -- normal initialzation / zero initialzation
 61 |         nn.init.normal_(self.layers[-1].weight, mean=0., std=output_layer_init_std)
 62 |         nn.init.zeros_(self.layers[-1].bias)
 63 |         # 1-2) the last layer for V(s) -- normal initialzation / zero initialzation
 64 |         nn.init.normal_(self.value_layer.weight, mean=0., std=output_layer_init_std)
 65 |         nn.init.zeros_(self.value_layer.bias)
 66 |         
 67 |         '''
 68 |         # version 0
 69 |         # create layers -------------------------------------------------------
 70 |         self.layers = nn.ModuleList()
 71 |         inputSize = self.state_size + self.action_parameter_size #5+210*5
 72 |         if hidden_layers is not None:
 73 |             nh = len(hidden_layers)
 74 |             # 1-0) input layer (inputSize) - 1st hidden layer
 75 |             self.layers.append(nn.Linear(inputSize, hidden_layers[0]))
 76 |             # 1-1) all hidden layer
 77 |             for i in range(1, nh):
 78 |                 self.layers.append(nn.Linear(hidden_layers[i - 1], hidden_layers[i]))
 79 |             # 1-2) the last hidden layer - output layer (action_size)  
 80 |             lastHiddenLayerSize = hidden_layers[nh - 1]
 81 |         self.layers.append(nn.Linear(lastHiddenLayerSize, self.action_size))
 82 | 
 83 |         # initialise layer weights --------------------------------------------
 84 |         # 1-0) all layers except the last layer -- He initialization / zero initialzation
 85 |         for i in range(0, len(self.layers) - 1):
 86 |             nn.init.kaiming_normal_(self.layers[i].weight, nonlinearity=activation)
 87 |             nn.init.zeros_(self.layers[i].bias)
 88 |         # 1-1) the last layer -- normal initialzation / zero initialzation
 89 |         nn.init.normal_(self.layers[-1].weight, mean=0., std=output_layer_init_std)
 90 |         nn.init.zeros_(self.layers[-1].bias)
 91 |         '''
 92 |     def forward(self, state):
 93 |         negative_slope = 0.01 # slope for leaky_relu
 94 | 
 95 |         # version 1
 96 |         num_layers = len(self.layers)
 97 |         if self.activation == "relu":
 98 |             # 0-0) state input layer - 1st hidden layer
 99 |             x= F.relu(self.state_input_layer(state))
100 |             x = F.relu(self.layers[0](x))
101 |             # 0-1) action input layer + 1st hidden layer
102 |         elif self.activation == "leaky_relu":
103 |             # 0-0) state input layer - 1st hidden layer
104 |             x= F.leaky_relu(self.state_input_layer(state),negative_slope)
105 |             x = F.leaky_relu(self.layers[0](x),negative_slope)
106 |             # 0-1) action input layer + 1st hidden layer
107 |         else:
108 |             raise ValueError("Unknown activation function "+str(self.activation))
109 |         # 0-2) (action input layer + 1st hidden layer) - other hidden layers        
110 |         for i in range(1, num_layers - 1):
111 |             if self.activation == "relu":
112 |                 x = F.relu(self.layers[i](x))
113 |             elif self.activation == "leaky_relu":
114 |                 x = F.leaky_relu(self.layers[i](x), negative_slope)
115 |             else:
116 |                 raise ValueError("Unknown activation function "+str(self.activation))
117 |         # 0-3)  the last hidden layer - output layer ( not pass through activation function  ) 
118 |         V = self.value_layer(x)
119 |         """
120 |         # version 0
121 |         # 1-0) all layers except the last layer -- pass through activation function
122 |         x = torch.cat((state, action_parameters), dim=1)
123 |         num_layers = len(self.layers)
124 |         for i in range(0, num_layers - 1):
125 |             if self.activation == "relu":
126 |                 x = F.relu(self.layers[i](x))
127 |             elif self.activation == "leaky_relu":
128 |                 x = F.leaky_relu(self.layers[i](x), negative_slope)
129 |             else:
130 |                 raise ValueError("Unknown activation function "+str(self.activation))
131 |         # 1-1) the last layer -- not pass through activation function
132 |         Q = self.layers[-1](x)
133 |         """
134 |         return V
135 | 
136 | #%%
137 | 
138 | """   
139 | num_actions=210
140 | action_parameter_size=210*5
141 | s_dim=5
142 | action_input_layer=0# Which layer to input action parameters
143 | layers=[32,16]#(256,)# # Hidden layers 
144 | actor_param_kwargs={'hidden_layers': layers, 'output_layer_init_std': 1e-5,'squashing_function': False}
145 | actor_param = ParamActor(s_dim, num_actions, action_parameter_size, **actor_param_kwargs)
146 | print(actor_param)
147 | """
148 | #%%
149 | class DQNAgent(Agent):
150 |     #DDPG actor-critic agent for parameterised action spaces [Hausknecht and Stone 2016]
151 | 
152 |     NAME = "DQN Agent"
153 | 
154 |     def __init__(self,
155 |                  s_dim,#observation_space,
156 |                  action_space,
157 |                  nUE, power_level,
158 |                  actor_class=DQNActor,
159 |                  actor_kwargs={},
160 |                  epsilon_initial=1.0,
161 |                  epsilon_final=0.05,
162 |                  epsilon_steps=10000,
163 |                  batch_size=64,
164 |                  gamma=0.99,
165 |                  tau_actor=0.01,  # Polyak averaging factor for copying target weights
166 |                  replay_memory_size=1000000,
167 |                  learning_rate_actor=0.0001,
168 |                  initial_memory_threshold=0,
169 |                  use_ornstein_noise=False,  # if false, uses epsilon-greedy with uniform-random action-parameter exploration
170 |                  loss_func=F.mse_loss, # F.mse_loss
171 |                  clip_grad=10,
172 |                  inverting_gradients=False,
173 |                  zero_index_gradients=False,
174 |                  indexed=False,
175 |                  weighted=False,
176 |                  average=False,
177 |                  random_weighted=False,
178 |                  device="cuda" if torch.cuda.is_available() else "cpu",
179 |                  seed=None):
180 |         super(DQNAgent, self).__init__(s_dim, action_space)#observation_space, action_space)
181 |         self.device = torch.device(device)
182 |         self.nUE=nUE
183 |         """
184 |         parameter_min[i] -- np.array
185 |         action_space=(num_action, [(parameter_min[i],parameter_max[i]) for i in range(num_action)])
186 |         """
187 |         self.power_level=power_level
188 |         self.num_actions = self.action_space[0]*(self.power_level**self.nUE) # number of discrete actions
189 |         self.action_max = torch.from_numpy(np.ones((self.num_actions,))).float().to(device)##
190 |         self.action_min = -self.action_max.detach()##
191 |         self.action_range = (self.action_max-self.action_min).detach()##
192 |         #print([self.action_space.spaces[i].high for i in range(1,self.num_actions+1)])
193 |         self.epsilon = epsilon_initial
194 |         self.epsilon_initial = epsilon_initial
195 |         self.epsilon_final = epsilon_final
196 |         self.epsilon_steps = epsilon_steps
197 |         self.indexed = indexed
198 |         self.weighted = weighted
199 |         self.average = average
200 |         self.random_weighted = random_weighted
201 |         assert (weighted ^ average ^ random_weighted) or not (weighted or average or random_weighted)
202 |         #??
203 |         self.batch_size = batch_size
204 |         self.gamma = gamma
205 |         self.replay_memory_size = replay_memory_size
206 |         self.initial_memory_threshold = initial_memory_threshold
207 |         self.learning_rate_actor = learning_rate_actor
208 |         self.inverting_gradients = inverting_gradients
209 |         self.tau_actor = tau_actor
210 |         self._step = 0
211 |         self._episode = 0
212 |         self.updates = 0
213 |         self.clip_grad = clip_grad
214 |         self.zero_index_gradients = zero_index_gradients
215 | 
216 |         self.np_random = None
217 |         self.seed = seed
218 |         self._seed(seed)
219 |         #??
220 |         self.use_ornstein_noise = use_ornstein_noise
221 | 
222 |         #print(self.num_actions+self.action_parameter_size)
223 |         """
224 |         observation_space=np.array([Qos_difference of UE 0])
225 |         """
226 |         # 0) Memory
227 |         self.replay_memory =  Memory(replay_memory_size, (s_dim,), (1,), next_actions=False)## #Memory(replay_memory_size, observation_space.shape, (1+self.action_parameter_size,), next_actions=False)
228 |         # 1-1) Actor-eval
229 |         self.actor = actor_class(s_dim, self.num_actions, power_level , **actor_kwargs).to(device)#self.actor = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device)
230 |         # 2-2) Actor-target
231 |         self.actor_target = actor_class(s_dim, self.num_actions, power_level, **actor_kwargs).to(device)#self.actor_target = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device)
232 |         hard_update_target_network(self.actor, self.actor_target) # directly copy without ratio
233 |         self.actor_target.eval()
234 |         # 2-3) Actor parameter
235 |         # 2-4) Actor Loss Function
236 |         self.loss_func = loss_func  # l1_smooth_loss performs better but original paper used MSE
237 | 
238 |         # Original DDPG paper [Lillicrap et al. 2016] used a weight decay of 0.01 for Q (critic)
239 |         # but setting weight_decay=0.01 on the critic_optimiser seems to perform worse...
240 |         # using AMSgrad ("fixed" version of Adam, amsgrad=True) doesn't seem to help either...
241 |         self.actor_optimiser = optim.Adam(self.actor.parameters(), lr=self.learning_rate_actor) #, betas=(0.95, 0.999))
242 | 
243 |     def __str__(self):
244 |         desc = super().__str__() + "\n"
245 |         desc += "Actor Network {}\n".format(self.actor) + \
246 |                 "Actor Alpha: {}\n".format(self.learning_rate_actor) + \
247 |                 "Gamma: {}\n".format(self.gamma) + \
248 |                 "Tau (actor): {}\n".format(self.tau_actor) + \
249 |                 "Inverting Gradients: {}\n".format(self.inverting_gradients) + \
250 |                 "Replay Memory: {}\n".format(self.replay_memory_size) + \
251 |                 "Batch Size: {}\n".format(self.batch_size) + \
252 |                 "Initial memory: {}\n".format(self.initial_memory_threshold) + \
253 |                 "epsilon_initial: {}\n".format(self.epsilon_initial) + \
254 |                 "epsilon_final: {}\n".format(self.epsilon_final) + \
255 |                 "epsilon_steps: {}\n".format(self.epsilon_steps) + \
256 |                 "Clip Grad: {}\n".format(self.clip_grad) + \
257 |                 "Ornstein Noise?: {}\n".format(self.use_ornstein_noise) + \
258 |                 "Zero Index Grads?: {}\n".format(self.zero_index_gradients) + \
259 |                 "Seed: {}\n".format(self.seed)
260 |         return desc
261 |     
262 |     # initialize parameter(passthrough layer of ActorParam) by user
263 | 
264 | 
265 |     def _seed(self, seed=None):
266 |         """
267 |         NOTE: this will not reset the randomly initialised weights; use the seed parameter in the constructor instead.
268 | 
269 |         :param seed:
270 |         :return:
271 |         """
272 |         self.seed = seed
273 |         random.seed(seed)
274 |         np.random.seed(seed)
275 |         self.np_random = np.random.RandomState(seed=seed)
276 |         if seed is not None:
277 |             torch.manual_seed(seed)
278 |             if self.device == torch.device("cuda"):
279 |                 torch.cuda.manual_seed(seed)
280 | 
281 |     def start_episode(self):
282 |         pass
283 | 
284 |     def end_episode(self):
285 |         # adjust epsilon for epsilon-greedy
286 |         self._episode += 1
287 |         ep = self._episode
288 |         if ep < self.epsilon_steps:
289 |             self.epsilon = self.epsilon_initial - (self.epsilon_initial - self.epsilon_final) * (
290 |                     ep / self.epsilon_steps)
291 |         else:
292 |             self.epsilon = self.epsilon_final
293 |             
294 |     # take an action for train =================================================
295 |     def act(self, state):
296 |         with torch.no_grad():
297 |             state = torch.from_numpy(state).to(self.device)
298 |             # 0) get action parameters-----------------------------------------
299 |             
300 |             # 1) get discrete action-------------------------------------------
301 |             # Hausknecht and Stone [2016] use epsilon greedy actions with uniform random action-parameter exploration
302 |             rnd = self.np_random.uniform()
303 |             if rnd < self.epsilon:
304 |                 action = self.np_random.choice(self.num_actions)
305 | 
306 |             else:
307 |                 # select maximum action
308 |                 Q_a = self.actor.forward(state.unsqueeze(0))
309 |                 Q_a = Q_a.detach().cpu().data.numpy()
310 |                 action = np.argmax(Q_a)
311 |             # 3) add noise-----------------------------------------------------
312 |             # add noise only to parameters of chosen action
313 |             #print('action=',action)
314 |             #print('all_action_parameters=',all_action_parameters.shape)
315 | 
316 |                 #noise = self.noise.sample().reshape(self.num_actions,5)[action,:]
317 |                 #action_parameters = action_parameters + noise
318 |                 
319 |             
320 |         return action
321 |     
322 |     # take the deterministic action for test ===================================
323 |     def _act(self, state):
324 |         with torch.no_grad():
325 |             state = torch.from_numpy(state).to(self.device)
326 |             # 0) get all action parameters-------------------------------------
327 |             # 1) get discrete action (select maximum action)-------------------
328 |             Q_a = self.actor.forward(state.unsqueeze(0))
329 |             Q_a = Q_a.detach().cpu().data.numpy()
330 |             action = np.argmax(Q_a)
331 |             # 3) get action parameters-----------------------------------------          
332 | 
333 |             #print('act all_action_parameters.shape=',action_parameters.shape)
334 |         return action
335 | 
336 |     def action_decoder(self, action, max_power):
337 |         cluster=int(action/(self.power_level**self.nUE))
338 |         power=[0 for i in range(self.nUE)]
339 |         temppower=action%(self.power_level**self.nUE)
340 |         idx=self.nUE-1
341 |         while True:
342 |           if idx>0:
343 |             #power[idx]=(temppower%self.power_level+1)/self.power_level*max_power
344 |             power[idx]=1/10**(temppower%self.power_level)*max_power       
345 |             temppower=int(temppower/self.power_level)
346 |             idx=idx-1
347 |           else:
348 |             #power[idx]=(temppower/self.power_level+1)/self.power_level*max_power
349 |             power[idx]=1/10**(temppower/self.power_level)*max_power
350 |             break
351 |             
352 |         power=np.array(power)
353 |         
354 |         return cluster,power
355 | 
356 | 
357 | 
358 |     def step(self, state, action, reward, next_state, next_action, terminal):
359 |         #c1,P1
360 |         act = action
361 |         self._step += 1 # number of agent.step
362 |         #self._step = _step
363 |         # self._add_sample(state, np.concatenate((all_actions.data, all_action_parameters.data)).ravel(), reward, next_state, terminal)
364 |         # 1) Memory -----------------------------------------------------------
365 |         self._add_sample(state, np.array([act]), reward, next_state, np.array([next_action]), terminal=terminal)
366 |         # 2) Update -----------------------------------------------------------
367 |         if self._step >= self.batch_size and self._step >= self.initial_memory_threshold:
368 |             self._optimize_td_loss()
369 |             self.updates += 1
370 |             #self.update = update
371 | 
372 |     def _add_sample(self, state, action, reward, next_state, next_action, terminal):
373 |         assert len(action) == 1 
374 |         self.replay_memory.append(state, action, reward, next_state, terminal=terminal)
375 | 
376 |     def _optimize_td_loss(self):
377 |         if self._step < self.batch_size or self._step < self.initial_memory_threshold:
378 |             return
379 |         # 2-1) Sample a batch from replay memory
380 |         states, actions, rewards, next_states, terminals = self.replay_memory.sample(self.batch_size, random_machine=self.np_random)
381 |         # 2-2) form
382 |         states = torch.from_numpy(states).to(self.device)
383 |         actions_combined = torch.from_numpy(actions).to(self.device)  # make sure to separate actions and parameters
384 |         actions = actions_combined.long()
385 |         rewards = torch.from_numpy(rewards).to(self.device).squeeze()
386 |         next_states = torch.from_numpy(next_states).to(self.device)
387 |         terminals = torch.from_numpy(terminals).to(self.device).squeeze()
388 |         # 2-3) Update parameters
389 |         # ---------------------- optimize actor ----------------------
390 |         with torch.no_grad():
391 |             pred_Q_a = self.actor_target(next_states)
392 |             Qprime = torch.max(pred_Q_a, 1, keepdim=True)[0].squeeze()
393 |             # Compute the TD error
394 |             target = rewards + (1 - terminals) * self.gamma * Qprime
395 | 
396 |         # Compute current Q-values using policy network
397 |         q_values = self.actor(states)
398 |         y_predicted = q_values.gather(1, actions.view(-1, 1)).squeeze()
399 |         y_expected = target
400 |         loss_Q = self.loss_func(y_predicted, y_expected)
401 | 
402 |         self.actor_optimiser.zero_grad() # 1
403 |         loss_Q.backward() # 2 
404 |         if self.clip_grad > 0:
405 |             torch.nn.utils.clip_grad_norm(self.actor.parameters(), self.clip_grad)
406 |         self.actor_optimiser.step() # 3
407 |         # ---------------------- optimize actor-parameter ----------------------
408 | 
409 | 
410 |         # ---------------------- update target-network ------------------------
411 |         soft_update_target_network(self.actor, self.actor_target, self.tau_actor)
412 | 
413 |     def save_models(self, prefix):
414 |         """
415 |         saves the target actor and critic models
416 |         :param prefix: the count of episodes iterated
417 |         :return:
418 |         """
419 |         torch.save(self.actor.state_dict(), prefix + '_actor.pt')
420 |         print('Models saved successfully')
421 | 
422 |     def load_models(self, prefix):
423 |         """
424 |         loads the target actor and critic models, and copies them onto actor and critic models
425 |         :param prefix: the count of episodes iterated (used to find the file name)
426 |         :param target: whether to load the target newtwork too (not necessary for evaluation)
427 |         :return:
428 |         """
429 |         # also try load on CPU if no GPU available?
430 |         self.actor.load_state_dict(torch.load(prefix + '_actor.pt', map_location='cpu'))
431 |         print('Models loaded successfully')     
432 | 
433 | if __name__ == '__main__':
434 |     batch_size=128#32
435 |     initial_memory_threshold=128#1000 # Number of transitions required to start learning.
436 |     replay_memory_size=20000 # Replay memory transition capacity 
437 |     epsilon_initial=1
438 |     epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon
439 |     epsilon_final=0.01 # Final epsilon value
440 |     gamma=0.95
441 |     clip_grad=1 # Parameter gradient clipping limit 
442 |     use_ornstein_noise= False # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 
443 |     inverting_gradients= True # Use inverting gradients scheme instead of squashing function
444 |     seed=0 #Random seed
445 |     save_freq = 100#0 # How often to save models (0 = never)
446 |     # 1) ParamActor------------------------------------------------------------   
447 |     learning_rate_actor_param=0.00001
448 |     tau_actor_param=0.001
449 |     """loss func for actor_parameter """
450 |     average=False # Average weighted loss function  
451 |     weighted=False # Naive weighted loss function
452 |     random_weighted=False # Randomly weighted loss function
453 |     indexed=False # Indexed loss function
454 |     zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action
455 |     # 2) Actor-----------------------------------------------------------------
456 |     tau_actor=0.1  
457 |     learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output
458 |     action_input_layer=0# Which layer to input action parameters-- useless?  
459 |     #--------------------------------------------------------------------------
460 |     # Performance 
461 |     dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput']
462 |     dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5']  }
463 |     dic_info_no_back={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5']  }
464 |     dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference']
465 |     dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5'] }
466 |     dic_info_ori_no_back={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5'] }
467 |     a_info={'c':[],'P':[]}
468 |     dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]}
469 |     dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]}
470 |     num_back=0
471 |     debug_QoSr={i:[] for i in ['1','2','3','4','5']}
472 |     #--------------------------------------------------------------------------
473 |     # debug
474 |     debug_PNN=[]  
475 |     debug_backhaul=[]
476 |     debug_BSbackhaul=[]
477 |     debug_channel_episode=[]
478 |     debug_episode_back=[]
479 |     debug_s=[]
480 |     
481 |     #%% Need to modify
482 |     ###########################################################################
483 |     scale_actions = True # True
484 |     initialise_params = False#True#False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam
485 |     MAXepisode = 100#1000
486 |     MAXepisode_train = 1000
487 |     MAXstep = 100#10#150
488 |     realization=100#20
489 |     title="PDQN1"#"PDQN_backhaul" # Prefix of output files
490 |     #save_dir ="results" #Output directory 
491 |     n_baseline=5
492 |     load_dir ="results_53/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 
493 |     load_num="_done"#"400"#
494 |     layers_actor=[512,128,16] # 1055-- --5  # # Hidden layers
495 |     actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"}
496 |     layers_actor_param =[256]#[64,256] # 5-- --1050
497 |     actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"}
498 |     name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE8.csv'#'mean_std_cc_nct.csv'
499 |     scenario_name='EnvInfo_11'
500 |     lambda1=0.2#0.53#1
501 |     lambda2=0.8#0.05#0.42#0.8
502 |     lambda3=0#0.1#0.3#0
503 |     result_save=load_dir+'/test_testChannel_block_fading'#'/test_all_'#'/test_testChannel'#'/test_last2000_'
504 |     ###########################################################################
505 |     #%% ENV
506 |     env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=n_baseline)
507 |     #-------------------------------------------------------------------------- Choose Network Geometry
508 |     #env.reset() # create a new one
509 |     env.load(name=scenario_name) # use the previous one 
510 |     #-------------------------------------------------------------------------- mean_std   
511 |     env.mean_std(10**5,False,name)#calculate(True) or load(False)
512 |     num_actions = env.action_space[0]
513 |     s_dim = env.nUE
514 |     # use the same channel gain to test
515 |     read_train_channel_episode = t.readCSI('Rayleigh_CSIforTest_100episode_100timestep_s11',env.nSBS,env.nUE,MAXepisode)
516 | 
517 |     #%% DQN
518 |     power_level=2
519 |     agent_classDQN = DQNAgent
520 |     agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
521 |                         power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor,  # 0.001
522 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
523 |                         clip_grad=clip_grad,indexed=indexed,average=average,
524 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
525 |                         initial_memory_threshold=initial_memory_threshold,
526 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
527 |                         actor_kwargs=actor_kwargs,
528 |                         zero_index_gradients=zero_index_gradients,seed=seed)
529 |     agentDQN.action_decoder(5, env.P_Max_SBS)
530 | 
531 | #%%
532 | 
533 | 
534 | 


--------------------------------------------------------------------------------
/EnvInfo_3.csv:
--------------------------------------------------------------------------------
   1 | 130.66766715061817,6.103612044283106,149.13216990439463
   2 | 112.77970089450794,-222.92821834528178,327.19287066463886
   3 | -224.61027764370075,346.2031408013222,-406.9085457985689,291.44245949621796,301.39543513106366
   4 | -407.87410382359803,13.87531070132368,-4.2654563033904305,-113.8322325759938,324.45315913531704
   5 | 0.004815172076881084,0.174060647665381,0.007931849219840477,0.09732104116184383,0.10530752181166962,0.020956068593393933
   6 | 0.07744932935443383,0.02243851774429035,0.01443773216963905,0.06872183236607807,0.005057240946593546,0.01895849440144912
   7 | 0.0017961313331011153,0.03396952573879356,0.00436666596957212,0.014889589929801985,0.884337151418975,0.0157284306455081
   8 | 0,1,2,3,4
   9 | 
  10 | 
  11 | 0,1,2,3
  12 | 4
  13 | 
  14 | 0,1,2,3
  15 | 
  16 | 4
  17 | 0,1,2,4
  18 | 3
  19 | 
  20 | 0,1,2
  21 | 3,4
  22 | 
  23 | 0,1,2
  24 | 3
  25 | 4
  26 | 0,1,2,4
  27 | 
  28 | 3
  29 | 0,1,2
  30 | 4
  31 | 3
  32 | 0,1,2
  33 | 
  34 | 3,4
  35 | 0,1,3,4
  36 | 2
  37 | 
  38 | 0,1,3
  39 | 2,4
  40 | 
  41 | 0,1,3
  42 | 2
  43 | 4
  44 | 0,1,4
  45 | 2,3
  46 | 
  47 | 0,1
  48 | 2,3,4
  49 | 
  50 | 0,1
  51 | 2,3
  52 | 4
  53 | 0,1,4
  54 | 2
  55 | 3
  56 | 0,1
  57 | 2,4
  58 | 3
  59 | 0,1
  60 | 2
  61 | 3,4
  62 | 0,1,3,4
  63 | 
  64 | 2
  65 | 0,1,3
  66 | 4
  67 | 2
  68 | 0,1,3
  69 | 
  70 | 2,4
  71 | 0,1,4
  72 | 3
  73 | 2
  74 | 0,1
  75 | 3,4
  76 | 2
  77 | 0,1
  78 | 3
  79 | 2,4
  80 | 0,1,4
  81 | 
  82 | 2,3
  83 | 0,1
  84 | 4
  85 | 2,3
  86 | 0,1
  87 | 
  88 | 2,3,4
  89 | 0,2,3,4
  90 | 1
  91 | 
  92 | 0,2,3
  93 | 1,4
  94 | 
  95 | 0,2,3
  96 | 1
  97 | 4
  98 | 0,2,4
  99 | 1,3
 100 | 
 101 | 0,2
 102 | 1,3,4
 103 | 
 104 | 0,2
 105 | 1,3
 106 | 4
 107 | 0,2,4
 108 | 1
 109 | 3
 110 | 0,2
 111 | 1,4
 112 | 3
 113 | 0,2
 114 | 1
 115 | 3,4
 116 | 0,3,4
 117 | 1,2
 118 | 
 119 | 0,3
 120 | 1,2,4
 121 | 
 122 | 0,3
 123 | 1,2
 124 | 4
 125 | 0,4
 126 | 1,2,3
 127 | 
 128 | 0
 129 | 1,2,3,4
 130 | 
 131 | 0
 132 | 1,2,3
 133 | 4
 134 | 0,4
 135 | 1,2
 136 | 3
 137 | 0
 138 | 1,2,4
 139 | 3
 140 | 0
 141 | 1,2
 142 | 3,4
 143 | 0,3,4
 144 | 1
 145 | 2
 146 | 0,3
 147 | 1,4
 148 | 2
 149 | 0,3
 150 | 1
 151 | 2,4
 152 | 0,4
 153 | 1,3
 154 | 2
 155 | 0
 156 | 1,3,4
 157 | 2
 158 | 0
 159 | 1,3
 160 | 2,4
 161 | 0,4
 162 | 1
 163 | 2,3
 164 | 0
 165 | 1,4
 166 | 2,3
 167 | 0
 168 | 1
 169 | 2,3,4
 170 | 0,2,3,4
 171 | 
 172 | 1
 173 | 0,2,3
 174 | 4
 175 | 1
 176 | 0,2,3
 177 | 
 178 | 1,4
 179 | 0,2,4
 180 | 3
 181 | 1
 182 | 0,2
 183 | 3,4
 184 | 1
 185 | 0,2
 186 | 3
 187 | 1,4
 188 | 0,2,4
 189 | 
 190 | 1,3
 191 | 0,2
 192 | 4
 193 | 1,3
 194 | 0,2
 195 | 
 196 | 1,3,4
 197 | 0,3,4
 198 | 2
 199 | 1
 200 | 0,3
 201 | 2,4
 202 | 1
 203 | 0,3
 204 | 2
 205 | 1,4
 206 | 0,4
 207 | 2,3
 208 | 1
 209 | 0
 210 | 2,3,4
 211 | 1
 212 | 0
 213 | 2,3
 214 | 1,4
 215 | 0,4
 216 | 2
 217 | 1,3
 218 | 0
 219 | 2,4
 220 | 1,3
 221 | 0
 222 | 2
 223 | 1,3,4
 224 | 0,3,4
 225 | 
 226 | 1,2
 227 | 0,3
 228 | 4
 229 | 1,2
 230 | 0,3
 231 | 
 232 | 1,2,4
 233 | 0,4
 234 | 3
 235 | 1,2
 236 | 0
 237 | 3,4
 238 | 1,2
 239 | 0
 240 | 3
 241 | 1,2,4
 242 | 0,4
 243 | 
 244 | 1,2,3
 245 | 0
 246 | 4
 247 | 1,2,3
 248 | 0
 249 | 
 250 | 1,2,3,4
 251 | 1,2,3,4
 252 | 0
 253 | 
 254 | 1,2,3
 255 | 0,4
 256 | 
 257 | 1,2,3
 258 | 0
 259 | 4
 260 | 1,2,4
 261 | 0,3
 262 | 
 263 | 1,2
 264 | 0,3,4
 265 | 
 266 | 1,2
 267 | 0,3
 268 | 4
 269 | 1,2,4
 270 | 0
 271 | 3
 272 | 1,2
 273 | 0,4
 274 | 3
 275 | 1,2
 276 | 0
 277 | 3,4
 278 | 1,3,4
 279 | 0,2
 280 | 
 281 | 1,3
 282 | 0,2,4
 283 | 
 284 | 1,3
 285 | 0,2
 286 | 4
 287 | 1,4
 288 | 0,2,3
 289 | 
 290 | 1
 291 | 0,2,3,4
 292 | 
 293 | 1
 294 | 0,2,3
 295 | 4
 296 | 1,4
 297 | 0,2
 298 | 3
 299 | 1
 300 | 0,2,4
 301 | 3
 302 | 1
 303 | 0,2
 304 | 3,4
 305 | 1,3,4
 306 | 0
 307 | 2
 308 | 1,3
 309 | 0,4
 310 | 2
 311 | 1,3
 312 | 0
 313 | 2,4
 314 | 1,4
 315 | 0,3
 316 | 2
 317 | 1
 318 | 0,3,4
 319 | 2
 320 | 1
 321 | 0,3
 322 | 2,4
 323 | 1,4
 324 | 0
 325 | 2,3
 326 | 1
 327 | 0,4
 328 | 2,3
 329 | 1
 330 | 0
 331 | 2,3,4
 332 | 2,3,4
 333 | 0,1
 334 | 
 335 | 2,3
 336 | 0,1,4
 337 | 
 338 | 2,3
 339 | 0,1
 340 | 4
 341 | 2,4
 342 | 0,1,3
 343 | 
 344 | 2
 345 | 0,1,3,4
 346 | 
 347 | 2
 348 | 0,1,3
 349 | 4
 350 | 2,4
 351 | 0,1
 352 | 3
 353 | 2
 354 | 0,1,4
 355 | 3
 356 | 2
 357 | 0,1
 358 | 3,4
 359 | 3,4
 360 | 0,1,2
 361 | 
 362 | 3
 363 | 0,1,2,4
 364 | 
 365 | 3
 366 | 0,1,2
 367 | 4
 368 | 4
 369 | 0,1,2,3
 370 | 
 371 | 
 372 | 0,1,2,3,4
 373 | 
 374 | 
 375 | 0,1,2,3
 376 | 4
 377 | 4
 378 | 0,1,2
 379 | 3
 380 | 
 381 | 0,1,2,4
 382 | 3
 383 | 
 384 | 0,1,2
 385 | 3,4
 386 | 3,4
 387 | 0,1
 388 | 2
 389 | 3
 390 | 0,1,4
 391 | 2
 392 | 3
 393 | 0,1
 394 | 2,4
 395 | 4
 396 | 0,1,3
 397 | 2
 398 | 
 399 | 0,1,3,4
 400 | 2
 401 | 
 402 | 0,1,3
 403 | 2,4
 404 | 4
 405 | 0,1
 406 | 2,3
 407 | 
 408 | 0,1,4
 409 | 2,3
 410 | 
 411 | 0,1
 412 | 2,3,4
 413 | 2,3,4
 414 | 0
 415 | 1
 416 | 2,3
 417 | 0,4
 418 | 1
 419 | 2,3
 420 | 0
 421 | 1,4
 422 | 2,4
 423 | 0,3
 424 | 1
 425 | 2
 426 | 0,3,4
 427 | 1
 428 | 2
 429 | 0,3
 430 | 1,4
 431 | 2,4
 432 | 0
 433 | 1,3
 434 | 2
 435 | 0,4
 436 | 1,3
 437 | 2
 438 | 0
 439 | 1,3,4
 440 | 3,4
 441 | 0,2
 442 | 1
 443 | 3
 444 | 0,2,4
 445 | 1
 446 | 3
 447 | 0,2
 448 | 1,4
 449 | 4
 450 | 0,2,3
 451 | 1
 452 | 
 453 | 0,2,3,4
 454 | 1
 455 | 
 456 | 0,2,3
 457 | 1,4
 458 | 4
 459 | 0,2
 460 | 1,3
 461 | 
 462 | 0,2,4
 463 | 1,3
 464 | 
 465 | 0,2
 466 | 1,3,4
 467 | 3,4
 468 | 0
 469 | 1,2
 470 | 3
 471 | 0,4
 472 | 1,2
 473 | 3
 474 | 0
 475 | 1,2,4
 476 | 4
 477 | 0,3
 478 | 1,2
 479 | 
 480 | 0,3,4
 481 | 1,2
 482 | 
 483 | 0,3
 484 | 1,2,4
 485 | 4
 486 | 0
 487 | 1,2,3
 488 | 
 489 | 0,4
 490 | 1,2,3
 491 | 
 492 | 0
 493 | 1,2,3,4
 494 | 1,2,3,4
 495 | 
 496 | 0
 497 | 1,2,3
 498 | 4
 499 | 0
 500 | 1,2,3
 501 | 
 502 | 0,4
 503 | 1,2,4
 504 | 3
 505 | 0
 506 | 1,2
 507 | 3,4
 508 | 0
 509 | 1,2
 510 | 3
 511 | 0,4
 512 | 1,2,4
 513 | 
 514 | 0,3
 515 | 1,2
 516 | 4
 517 | 0,3
 518 | 1,2
 519 | 
 520 | 0,3,4
 521 | 1,3,4
 522 | 2
 523 | 0
 524 | 1,3
 525 | 2,4
 526 | 0
 527 | 1,3
 528 | 2
 529 | 0,4
 530 | 1,4
 531 | 2,3
 532 | 0
 533 | 1
 534 | 2,3,4
 535 | 0
 536 | 1
 537 | 2,3
 538 | 0,4
 539 | 1,4
 540 | 2
 541 | 0,3
 542 | 1
 543 | 2,4
 544 | 0,3
 545 | 1
 546 | 2
 547 | 0,3,4
 548 | 1,3,4
 549 | 
 550 | 0,2
 551 | 1,3
 552 | 4
 553 | 0,2
 554 | 1,3
 555 | 
 556 | 0,2,4
 557 | 1,4
 558 | 3
 559 | 0,2
 560 | 1
 561 | 3,4
 562 | 0,2
 563 | 1
 564 | 3
 565 | 0,2,4
 566 | 1,4
 567 | 
 568 | 0,2,3
 569 | 1
 570 | 4
 571 | 0,2,3
 572 | 1
 573 | 
 574 | 0,2,3,4
 575 | 2,3,4
 576 | 1
 577 | 0
 578 | 2,3
 579 | 1,4
 580 | 0
 581 | 2,3
 582 | 1
 583 | 0,4
 584 | 2,4
 585 | 1,3
 586 | 0
 587 | 2
 588 | 1,3,4
 589 | 0
 590 | 2
 591 | 1,3
 592 | 0,4
 593 | 2,4
 594 | 1
 595 | 0,3
 596 | 2
 597 | 1,4
 598 | 0,3
 599 | 2
 600 | 1
 601 | 0,3,4
 602 | 3,4
 603 | 1,2
 604 | 0
 605 | 3
 606 | 1,2,4
 607 | 0
 608 | 3
 609 | 1,2
 610 | 0,4
 611 | 4
 612 | 1,2,3
 613 | 0
 614 | 
 615 | 1,2,3,4
 616 | 0
 617 | 
 618 | 1,2,3
 619 | 0,4
 620 | 4
 621 | 1,2
 622 | 0,3
 623 | 
 624 | 1,2,4
 625 | 0,3
 626 | 
 627 | 1,2
 628 | 0,3,4
 629 | 3,4
 630 | 1
 631 | 0,2
 632 | 3
 633 | 1,4
 634 | 0,2
 635 | 3
 636 | 1
 637 | 0,2,4
 638 | 4
 639 | 1,3
 640 | 0,2
 641 | 
 642 | 1,3,4
 643 | 0,2
 644 | 
 645 | 1,3
 646 | 0,2,4
 647 | 4
 648 | 1
 649 | 0,2,3
 650 | 
 651 | 1,4
 652 | 0,2,3
 653 | 
 654 | 1
 655 | 0,2,3,4
 656 | 2,3,4
 657 | 
 658 | 0,1
 659 | 2,3
 660 | 4
 661 | 0,1
 662 | 2,3
 663 | 
 664 | 0,1,4
 665 | 2,4
 666 | 3
 667 | 0,1
 668 | 2
 669 | 3,4
 670 | 0,1
 671 | 2
 672 | 3
 673 | 0,1,4
 674 | 2,4
 675 | 
 676 | 0,1,3
 677 | 2
 678 | 4
 679 | 0,1,3
 680 | 2
 681 | 
 682 | 0,1,3,4
 683 | 3,4
 684 | 2
 685 | 0,1
 686 | 3
 687 | 2,4
 688 | 0,1
 689 | 3
 690 | 2
 691 | 0,1,4
 692 | 4
 693 | 2,3
 694 | 0,1
 695 | 
 696 | 2,3,4
 697 | 0,1
 698 | 
 699 | 2,3
 700 | 0,1,4
 701 | 4
 702 | 2
 703 | 0,1,3
 704 | 
 705 | 2,4
 706 | 0,1,3
 707 | 
 708 | 2
 709 | 0,1,3,4
 710 | 3,4
 711 | 
 712 | 0,1,2
 713 | 3
 714 | 4
 715 | 0,1,2
 716 | 3
 717 | 
 718 | 0,1,2,4
 719 | 4
 720 | 3
 721 | 0,1,2
 722 | 
 723 | 3,4
 724 | 0,1,2
 725 | 
 726 | 3
 727 | 0,1,2,4
 728 | 4
 729 | 
 730 | 0,1,2,3
 731 | 
 732 | 4
 733 | 0,1,2,3
 734 | 
 735 | 
 736 | 0,1,2,3,4
 737 | 0,0,0,0,0
 738 | 0,0,0,0,1
 739 | 0,0,0,0,2
 740 | 0,0,0,1,0
 741 | 0,0,0,1,1
 742 | 0,0,0,1,2
 743 | 0,0,0,2,0
 744 | 0,0,0,2,1
 745 | 0,0,0,2,2
 746 | 0,0,1,0,0
 747 | 0,0,1,0,1
 748 | 0,0,1,0,2
 749 | 0,0,1,1,0
 750 | 0,0,1,1,1
 751 | 0,0,1,1,2
 752 | 0,0,1,2,0
 753 | 0,0,1,2,1
 754 | 0,0,1,2,2
 755 | 0,0,2,0,0
 756 | 0,0,2,0,1
 757 | 0,0,2,0,2
 758 | 0,0,2,1,0
 759 | 0,0,2,1,1
 760 | 0,0,2,1,2
 761 | 0,0,2,2,0
 762 | 0,0,2,2,1
 763 | 0,0,2,2,2
 764 | 0,1,0,0,0
 765 | 0,1,0,0,1
 766 | 0,1,0,0,2
 767 | 0,1,0,1,0
 768 | 0,1,0,1,1
 769 | 0,1,0,1,2
 770 | 0,1,0,2,0
 771 | 0,1,0,2,1
 772 | 0,1,0,2,2
 773 | 0,1,1,0,0
 774 | 0,1,1,0,1
 775 | 0,1,1,0,2
 776 | 0,1,1,1,0
 777 | 0,1,1,1,1
 778 | 0,1,1,1,2
 779 | 0,1,1,2,0
 780 | 0,1,1,2,1
 781 | 0,1,1,2,2
 782 | 0,1,2,0,0
 783 | 0,1,2,0,1
 784 | 0,1,2,0,2
 785 | 0,1,2,1,0
 786 | 0,1,2,1,1
 787 | 0,1,2,1,2
 788 | 0,1,2,2,0
 789 | 0,1,2,2,1
 790 | 0,1,2,2,2
 791 | 0,2,0,0,0
 792 | 0,2,0,0,1
 793 | 0,2,0,0,2
 794 | 0,2,0,1,0
 795 | 0,2,0,1,1
 796 | 0,2,0,1,2
 797 | 0,2,0,2,0
 798 | 0,2,0,2,1
 799 | 0,2,0,2,2
 800 | 0,2,1,0,0
 801 | 0,2,1,0,1
 802 | 0,2,1,0,2
 803 | 0,2,1,1,0
 804 | 0,2,1,1,1
 805 | 0,2,1,1,2
 806 | 0,2,1,2,0
 807 | 0,2,1,2,1
 808 | 0,2,1,2,2
 809 | 0,2,2,0,0
 810 | 0,2,2,0,1
 811 | 0,2,2,0,2
 812 | 0,2,2,1,0
 813 | 0,2,2,1,1
 814 | 0,2,2,1,2
 815 | 0,2,2,2,0
 816 | 0,2,2,2,1
 817 | 0,2,2,2,2
 818 | 1,0,0,0,0
 819 | 1,0,0,0,1
 820 | 1,0,0,0,2
 821 | 1,0,0,1,0
 822 | 1,0,0,1,1
 823 | 1,0,0,1,2
 824 | 1,0,0,2,0
 825 | 1,0,0,2,1
 826 | 1,0,0,2,2
 827 | 1,0,1,0,0
 828 | 1,0,1,0,1
 829 | 1,0,1,0,2
 830 | 1,0,1,1,0
 831 | 1,0,1,1,1
 832 | 1,0,1,1,2
 833 | 1,0,1,2,0
 834 | 1,0,1,2,1
 835 | 1,0,1,2,2
 836 | 1,0,2,0,0
 837 | 1,0,2,0,1
 838 | 1,0,2,0,2
 839 | 1,0,2,1,0
 840 | 1,0,2,1,1
 841 | 1,0,2,1,2
 842 | 1,0,2,2,0
 843 | 1,0,2,2,1
 844 | 1,0,2,2,2
 845 | 1,1,0,0,0
 846 | 1,1,0,0,1
 847 | 1,1,0,0,2
 848 | 1,1,0,1,0
 849 | 1,1,0,1,1
 850 | 1,1,0,1,2
 851 | 1,1,0,2,0
 852 | 1,1,0,2,1
 853 | 1,1,0,2,2
 854 | 1,1,1,0,0
 855 | 1,1,1,0,1
 856 | 1,1,1,0,2
 857 | 1,1,1,1,0
 858 | 1,1,1,1,1
 859 | 1,1,1,1,2
 860 | 1,1,1,2,0
 861 | 1,1,1,2,1
 862 | 1,1,1,2,2
 863 | 1,1,2,0,0
 864 | 1,1,2,0,1
 865 | 1,1,2,0,2
 866 | 1,1,2,1,0
 867 | 1,1,2,1,1
 868 | 1,1,2,1,2
 869 | 1,1,2,2,0
 870 | 1,1,2,2,1
 871 | 1,1,2,2,2
 872 | 1,2,0,0,0
 873 | 1,2,0,0,1
 874 | 1,2,0,0,2
 875 | 1,2,0,1,0
 876 | 1,2,0,1,1
 877 | 1,2,0,1,2
 878 | 1,2,0,2,0
 879 | 1,2,0,2,1
 880 | 1,2,0,2,2
 881 | 1,2,1,0,0
 882 | 1,2,1,0,1
 883 | 1,2,1,0,2
 884 | 1,2,1,1,0
 885 | 1,2,1,1,1
 886 | 1,2,1,1,2
 887 | 1,2,1,2,0
 888 | 1,2,1,2,1
 889 | 1,2,1,2,2
 890 | 1,2,2,0,0
 891 | 1,2,2,0,1
 892 | 1,2,2,0,2
 893 | 1,2,2,1,0
 894 | 1,2,2,1,1
 895 | 1,2,2,1,2
 896 | 1,2,2,2,0
 897 | 1,2,2,2,1
 898 | 1,2,2,2,2
 899 | 2,0,0,0,0
 900 | 2,0,0,0,1
 901 | 2,0,0,0,2
 902 | 2,0,0,1,0
 903 | 2,0,0,1,1
 904 | 2,0,0,1,2
 905 | 2,0,0,2,0
 906 | 2,0,0,2,1
 907 | 2,0,0,2,2
 908 | 2,0,1,0,0
 909 | 2,0,1,0,1
 910 | 2,0,1,0,2
 911 | 2,0,1,1,0
 912 | 2,0,1,1,1
 913 | 2,0,1,1,2
 914 | 2,0,1,2,0
 915 | 2,0,1,2,1
 916 | 2,0,1,2,2
 917 | 2,0,2,0,0
 918 | 2,0,2,0,1
 919 | 2,0,2,0,2
 920 | 2,0,2,1,0
 921 | 2,0,2,1,1
 922 | 2,0,2,1,2
 923 | 2,0,2,2,0
 924 | 2,0,2,2,1
 925 | 2,0,2,2,2
 926 | 2,1,0,0,0
 927 | 2,1,0,0,1
 928 | 2,1,0,0,2
 929 | 2,1,0,1,0
 930 | 2,1,0,1,1
 931 | 2,1,0,1,2
 932 | 2,1,0,2,0
 933 | 2,1,0,2,1
 934 | 2,1,0,2,2
 935 | 2,1,1,0,0
 936 | 2,1,1,0,1
 937 | 2,1,1,0,2
 938 | 2,1,1,1,0
 939 | 2,1,1,1,1
 940 | 2,1,1,1,2
 941 | 2,1,1,2,0
 942 | 2,1,1,2,1
 943 | 2,1,1,2,2
 944 | 2,1,2,0,0
 945 | 2,1,2,0,1
 946 | 2,1,2,0,2
 947 | 2,1,2,1,0
 948 | 2,1,2,1,1
 949 | 2,1,2,1,2
 950 | 2,1,2,2,0
 951 | 2,1,2,2,1
 952 | 2,1,2,2,2
 953 | 2,2,0,0,0
 954 | 2,2,0,0,1
 955 | 2,2,0,0,2
 956 | 2,2,0,1,0
 957 | 2,2,0,1,1
 958 | 2,2,0,1,2
 959 | 2,2,0,2,0
 960 | 2,2,0,2,1
 961 | 2,2,0,2,2
 962 | 2,2,1,0,0
 963 | 2,2,1,0,1
 964 | 2,2,1,0,2
 965 | 2,2,1,1,0
 966 | 2,2,1,1,1
 967 | 2,2,1,1,2
 968 | 2,2,1,2,0
 969 | 2,2,1,2,1
 970 | 2,2,1,2,2
 971 | 2,2,2,0,0
 972 | 2,2,2,0,1
 973 | 2,2,2,0,2
 974 | 2,2,2,1,0
 975 | 2,2,2,1,1
 976 | 2,2,2,1,2
 977 | 2,2,2,2,0
 978 | 2,2,2,2,1
 979 | 2,2,2,2,2
 980 | 210
 981 | 0,1,2
 982 | 3,4
 983 | 
 984 | 0,1,2
 985 | 3
 986 | 4
 987 | 0,1,2
 988 | 4
 989 | 3
 990 | 0,1,2
 991 | 
 992 | 3,4
 993 | 0,1,3
 994 | 2,4
 995 | 
 996 | 0,1,3
 997 | 2
 998 | 4
 999 | 0,1,4
1000 | 2,3
1001 | 
1002 | 0,1
1003 | 2,3,4
1004 | 
1005 | 0,1
1006 | 2,3
1007 | 4
1008 | 0,1,4
1009 | 2
1010 | 3
1011 | 0,1
1012 | 2,4
1013 | 3
1014 | 0,1
1015 | 2
1016 | 3,4
1017 | 0,1,3
1018 | 4
1019 | 2
1020 | 0,1,3
1021 | 
1022 | 2,4
1023 | 0,1,4
1024 | 3
1025 | 2
1026 | 0,1
1027 | 3,4
1028 | 2
1029 | 0,1
1030 | 3
1031 | 2,4
1032 | 0,1,4
1033 | 
1034 | 2,3
1035 | 0,1
1036 | 4
1037 | 2,3
1038 | 0,1
1039 | 
1040 | 2,3,4
1041 | 0,2,3
1042 | 1,4
1043 | 
1044 | 0,2,3
1045 | 1
1046 | 4
1047 | 0,2,4
1048 | 1,3
1049 | 
1050 | 0,2
1051 | 1,3,4
1052 | 
1053 | 0,2
1054 | 1,3
1055 | 4
1056 | 0,2,4
1057 | 1
1058 | 3
1059 | 0,2
1060 | 1,4
1061 | 3
1062 | 0,2
1063 | 1
1064 | 3,4
1065 | 0,3,4
1066 | 1,2
1067 | 
1068 | 0,3
1069 | 1,2,4
1070 | 
1071 | 0,3
1072 | 1,2
1073 | 4
1074 | 0,4
1075 | 1,2,3
1076 | 
1077 | 0
1078 | 1,2,3
1079 | 4
1080 | 0,4
1081 | 1,2
1082 | 3
1083 | 0
1084 | 1,2,4
1085 | 3
1086 | 0
1087 | 1,2
1088 | 3,4
1089 | 0,3,4
1090 | 1
1091 | 2
1092 | 0,3
1093 | 1,4
1094 | 2
1095 | 0,3
1096 | 1
1097 | 2,4
1098 | 0,4
1099 | 1,3
1100 | 2
1101 | 0
1102 | 1,3,4
1103 | 2
1104 | 0
1105 | 1,3
1106 | 2,4
1107 | 0,4
1108 | 1
1109 | 2,3
1110 | 0
1111 | 1,4
1112 | 2,3
1113 | 0
1114 | 1
1115 | 2,3,4
1116 | 0,2,3
1117 | 4
1118 | 1
1119 | 0,2,3
1120 | 
1121 | 1,4
1122 | 0,2,4
1123 | 3
1124 | 1
1125 | 0,2
1126 | 3,4
1127 | 1
1128 | 0,2
1129 | 3
1130 | 1,4
1131 | 0,2,4
1132 | 
1133 | 1,3
1134 | 0,2
1135 | 4
1136 | 1,3
1137 | 0,2
1138 | 
1139 | 1,3,4
1140 | 0,3,4
1141 | 2
1142 | 1
1143 | 0,3
1144 | 2,4
1145 | 1
1146 | 0,3
1147 | 2
1148 | 1,4
1149 | 0,4
1150 | 2,3
1151 | 1
1152 | 0
1153 | 2,3,4
1154 | 1
1155 | 0
1156 | 2,3
1157 | 1,4
1158 | 0,4
1159 | 2
1160 | 1,3
1161 | 0
1162 | 2,4
1163 | 1,3
1164 | 0
1165 | 2
1166 | 1,3,4
1167 | 0,3,4
1168 | 
1169 | 1,2
1170 | 0,3
1171 | 4
1172 | 1,2
1173 | 0,3
1174 | 
1175 | 1,2,4
1176 | 0,4
1177 | 3
1178 | 1,2
1179 | 0
1180 | 3,4
1181 | 1,2
1182 | 0
1183 | 3
1184 | 1,2,4
1185 | 0,4
1186 | 
1187 | 1,2,3
1188 | 0
1189 | 4
1190 | 1,2,3
1191 | 1,2,3
1192 | 0,4
1193 | 
1194 | 1,2,3
1195 | 0
1196 | 4
1197 | 1,2,4
1198 | 0,3
1199 | 
1200 | 1,2
1201 | 0,3,4
1202 | 
1203 | 1,2
1204 | 0,3
1205 | 4
1206 | 1,2,4
1207 | 0
1208 | 3
1209 | 1,2
1210 | 0,4
1211 | 3
1212 | 1,2
1213 | 0
1214 | 3,4
1215 | 1,3,4
1216 | 0,2
1217 | 
1218 | 1,3
1219 | 0,2,4
1220 | 
1221 | 1,3
1222 | 0,2
1223 | 4
1224 | 1,4
1225 | 0,2,3
1226 | 
1227 | 1
1228 | 0,2,3
1229 | 4
1230 | 1,4
1231 | 0,2
1232 | 3
1233 | 1
1234 | 0,2,4
1235 | 3
1236 | 1
1237 | 0,2
1238 | 3,4
1239 | 1,3,4
1240 | 0
1241 | 2
1242 | 1,3
1243 | 0,4
1244 | 2
1245 | 1,3
1246 | 0
1247 | 2,4
1248 | 1,4
1249 | 0,3
1250 | 2
1251 | 1
1252 | 0,3,4
1253 | 2
1254 | 1
1255 | 0,3
1256 | 2,4
1257 | 1,4
1258 | 0
1259 | 2,3
1260 | 1
1261 | 0,4
1262 | 2,3
1263 | 1
1264 | 0
1265 | 2,3,4
1266 | 2,3,4
1267 | 0,1
1268 | 
1269 | 2,3
1270 | 0,1,4
1271 | 
1272 | 2,3
1273 | 0,1
1274 | 4
1275 | 2,4
1276 | 0,1,3
1277 | 
1278 | 2
1279 | 0,1,3
1280 | 4
1281 | 2,4
1282 | 0,1
1283 | 3
1284 | 2
1285 | 0,1,4
1286 | 3
1287 | 2
1288 | 0,1
1289 | 3,4
1290 | 3,4
1291 | 0,1,2
1292 | 
1293 | 3
1294 | 0,1,2
1295 | 4
1296 | 4
1297 | 0,1,2
1298 | 3
1299 | 
1300 | 0,1,2
1301 | 3,4
1302 | 3,4
1303 | 0,1
1304 | 2
1305 | 3
1306 | 0,1,4
1307 | 2
1308 | 3
1309 | 0,1
1310 | 2,4
1311 | 4
1312 | 0,1,3
1313 | 2
1314 | 
1315 | 0,1,3
1316 | 2,4
1317 | 4
1318 | 0,1
1319 | 2,3
1320 | 
1321 | 0,1,4
1322 | 2,3
1323 | 
1324 | 0,1
1325 | 2,3,4
1326 | 2,3,4
1327 | 0
1328 | 1
1329 | 2,3
1330 | 0,4
1331 | 1
1332 | 2,3
1333 | 0
1334 | 1,4
1335 | 2,4
1336 | 0,3
1337 | 1
1338 | 2
1339 | 0,3,4
1340 | 1
1341 | 2
1342 | 0,3
1343 | 1,4
1344 | 2,4
1345 | 0
1346 | 1,3
1347 | 2
1348 | 0,4
1349 | 1,3
1350 | 2
1351 | 0
1352 | 1,3,4
1353 | 3,4
1354 | 0,2
1355 | 1
1356 | 3
1357 | 0,2,4
1358 | 1
1359 | 3
1360 | 0,2
1361 | 1,4
1362 | 4
1363 | 0,2,3
1364 | 1
1365 | 
1366 | 0,2,3
1367 | 1,4
1368 | 4
1369 | 0,2
1370 | 1,3
1371 | 
1372 | 0,2,4
1373 | 1,3
1374 | 
1375 | 0,2
1376 | 1,3,4
1377 | 3,4
1378 | 0
1379 | 1,2
1380 | 3
1381 | 0,4
1382 | 1,2
1383 | 3
1384 | 0
1385 | 1,2,4
1386 | 4
1387 | 0,3
1388 | 1,2
1389 | 
1390 | 0,3,4
1391 | 1,2
1392 | 
1393 | 0,3
1394 | 1,2,4
1395 | 4
1396 | 0
1397 | 1,2,3
1398 | 
1399 | 0,4
1400 | 1,2,3
1401 | 1,2,3
1402 | 4
1403 | 0
1404 | 1,2,3
1405 | 
1406 | 0,4
1407 | 1,2,4
1408 | 3
1409 | 0
1410 | 1,2
1411 | 3,4
1412 | 0
1413 | 1,2
1414 | 3
1415 | 0,4
1416 | 1,2,4
1417 | 
1418 | 0,3
1419 | 1,2
1420 | 4
1421 | 0,3
1422 | 1,2
1423 | 
1424 | 0,3,4
1425 | 1,3,4
1426 | 2
1427 | 0
1428 | 1,3
1429 | 2,4
1430 | 0
1431 | 1,3
1432 | 2
1433 | 0,4
1434 | 1,4
1435 | 2,3
1436 | 0
1437 | 1
1438 | 2,3,4
1439 | 0
1440 | 1
1441 | 2,3
1442 | 0,4
1443 | 1,4
1444 | 2
1445 | 0,3
1446 | 1
1447 | 2,4
1448 | 0,3
1449 | 1
1450 | 2
1451 | 0,3,4
1452 | 1,3,4
1453 | 
1454 | 0,2
1455 | 1,3
1456 | 4
1457 | 0,2
1458 | 1,3
1459 | 
1460 | 0,2,4
1461 | 1,4
1462 | 3
1463 | 0,2
1464 | 1
1465 | 3,4
1466 | 0,2
1467 | 1
1468 | 3
1469 | 0,2,4
1470 | 1,4
1471 | 
1472 | 0,2,3
1473 | 1
1474 | 4
1475 | 0,2,3
1476 | 2,3,4
1477 | 1
1478 | 0
1479 | 2,3
1480 | 1,4
1481 | 0
1482 | 2,3
1483 | 1
1484 | 0,4
1485 | 2,4
1486 | 1,3
1487 | 0
1488 | 2
1489 | 1,3,4
1490 | 0
1491 | 2
1492 | 1,3
1493 | 0,4
1494 | 2,4
1495 | 1
1496 | 0,3
1497 | 2
1498 | 1,4
1499 | 0,3
1500 | 2
1501 | 1
1502 | 0,3,4
1503 | 3,4
1504 | 1,2
1505 | 0
1506 | 3
1507 | 1,2,4
1508 | 0
1509 | 3
1510 | 1,2
1511 | 0,4
1512 | 4
1513 | 1,2,3
1514 | 0
1515 | 
1516 | 1,2,3
1517 | 0,4
1518 | 4
1519 | 1,2
1520 | 0,3
1521 | 
1522 | 1,2,4
1523 | 0,3
1524 | 
1525 | 1,2
1526 | 0,3,4
1527 | 3,4
1528 | 1
1529 | 0,2
1530 | 3
1531 | 1,4
1532 | 0,2
1533 | 3
1534 | 1
1535 | 0,2,4
1536 | 4
1537 | 1,3
1538 | 0,2
1539 | 
1540 | 1,3,4
1541 | 0,2
1542 | 
1543 | 1,3
1544 | 0,2,4
1545 | 4
1546 | 1
1547 | 0,2,3
1548 | 
1549 | 1,4
1550 | 0,2,3
1551 | 2,3,4
1552 | 
1553 | 0,1
1554 | 2,3
1555 | 4
1556 | 0,1
1557 | 2,3
1558 | 
1559 | 0,1,4
1560 | 2,4
1561 | 3
1562 | 0,1
1563 | 2
1564 | 3,4
1565 | 0,1
1566 | 2
1567 | 3
1568 | 0,1,4
1569 | 2,4
1570 | 
1571 | 0,1,3
1572 | 2
1573 | 4
1574 | 0,1,3
1575 | 3,4
1576 | 2
1577 | 0,1
1578 | 3
1579 | 2,4
1580 | 0,1
1581 | 3
1582 | 2
1583 | 0,1,4
1584 | 4
1585 | 2,3
1586 | 0,1
1587 | 
1588 | 2,3,4
1589 | 0,1
1590 | 
1591 | 2,3
1592 | 0,1,4
1593 | 4
1594 | 2
1595 | 0,1,3
1596 | 
1597 | 2,4
1598 | 0,1,3
1599 | 3,4
1600 | 
1601 | 0,1,2
1602 | 3
1603 | 4
1604 | 0,1,2
1605 | 4
1606 | 3
1607 | 0,1,2
1608 | 
1609 | 3,4
1610 | 0,1,2
1611 | 0,0,0,1,1
1612 | 0,0,0,1,2
1613 | 0,0,0,2,1
1614 | 0,0,0,2,2
1615 | 0,0,1,0,1
1616 | 0,0,1,0,2
1617 | 0,0,1,1,0
1618 | 0,0,1,1,1
1619 | 0,0,1,1,2
1620 | 0,0,1,2,0
1621 | 0,0,1,2,1
1622 | 0,0,1,2,2
1623 | 0,0,2,0,1
1624 | 0,0,2,0,2
1625 | 0,0,2,1,0
1626 | 0,0,2,1,1
1627 | 0,0,2,1,2
1628 | 0,0,2,2,0
1629 | 0,0,2,2,1
1630 | 0,0,2,2,2
1631 | 0,1,0,0,1
1632 | 0,1,0,0,2
1633 | 0,1,0,1,0
1634 | 0,1,0,1,1
1635 | 0,1,0,1,2
1636 | 0,1,0,2,0
1637 | 0,1,0,2,1
1638 | 0,1,0,2,2
1639 | 0,1,1,0,0
1640 | 0,1,1,0,1
1641 | 0,1,1,0,2
1642 | 0,1,1,1,0
1643 | 0,1,1,1,2
1644 | 0,1,1,2,0
1645 | 0,1,1,2,1
1646 | 0,1,1,2,2
1647 | 0,1,2,0,0
1648 | 0,1,2,0,1
1649 | 0,1,2,0,2
1650 | 0,1,2,1,0
1651 | 0,1,2,1,1
1652 | 0,1,2,1,2
1653 | 0,1,2,2,0
1654 | 0,1,2,2,1
1655 | 0,1,2,2,2
1656 | 0,2,0,0,1
1657 | 0,2,0,0,2
1658 | 0,2,0,1,0
1659 | 0,2,0,1,1
1660 | 0,2,0,1,2
1661 | 0,2,0,2,0
1662 | 0,2,0,2,1
1663 | 0,2,0,2,2
1664 | 0,2,1,0,0
1665 | 0,2,1,0,1
1666 | 0,2,1,0,2
1667 | 0,2,1,1,0
1668 | 0,2,1,1,1
1669 | 0,2,1,1,2
1670 | 0,2,1,2,0
1671 | 0,2,1,2,1
1672 | 0,2,1,2,2
1673 | 0,2,2,0,0
1674 | 0,2,2,0,1
1675 | 0,2,2,0,2
1676 | 0,2,2,1,0
1677 | 0,2,2,1,1
1678 | 0,2,2,1,2
1679 | 0,2,2,2,0
1680 | 0,2,2,2,1
1681 | 1,0,0,0,1
1682 | 1,0,0,0,2
1683 | 1,0,0,1,0
1684 | 1,0,0,1,1
1685 | 1,0,0,1,2
1686 | 1,0,0,2,0
1687 | 1,0,0,2,1
1688 | 1,0,0,2,2
1689 | 1,0,1,0,0
1690 | 1,0,1,0,1
1691 | 1,0,1,0,2
1692 | 1,0,1,1,0
1693 | 1,0,1,1,2
1694 | 1,0,1,2,0
1695 | 1,0,1,2,1
1696 | 1,0,1,2,2
1697 | 1,0,2,0,0
1698 | 1,0,2,0,1
1699 | 1,0,2,0,2
1700 | 1,0,2,1,0
1701 | 1,0,2,1,1
1702 | 1,0,2,1,2
1703 | 1,0,2,2,0
1704 | 1,0,2,2,1
1705 | 1,0,2,2,2
1706 | 1,1,0,0,0
1707 | 1,1,0,0,1
1708 | 1,1,0,0,2
1709 | 1,1,0,1,0
1710 | 1,1,0,1,2
1711 | 1,1,0,2,0
1712 | 1,1,0,2,1
1713 | 1,1,0,2,2
1714 | 1,1,1,0,0
1715 | 1,1,1,0,2
1716 | 1,1,1,2,0
1717 | 1,1,1,2,2
1718 | 1,1,2,0,0
1719 | 1,1,2,0,1
1720 | 1,1,2,0,2
1721 | 1,1,2,1,0
1722 | 1,1,2,1,2
1723 | 1,1,2,2,0
1724 | 1,1,2,2,1
1725 | 1,1,2,2,2
1726 | 1,2,0,0,0
1727 | 1,2,0,0,1
1728 | 1,2,0,0,2
1729 | 1,2,0,1,0
1730 | 1,2,0,1,1
1731 | 1,2,0,1,2
1732 | 1,2,0,2,0
1733 | 1,2,0,2,1
1734 | 1,2,0,2,2
1735 | 1,2,1,0,0
1736 | 1,2,1,0,1
1737 | 1,2,1,0,2
1738 | 1,2,1,1,0
1739 | 1,2,1,1,2
1740 | 1,2,1,2,0
1741 | 1,2,1,2,1
1742 | 1,2,1,2,2
1743 | 1,2,2,0,0
1744 | 1,2,2,0,1
1745 | 1,2,2,0,2
1746 | 1,2,2,1,0
1747 | 1,2,2,1,1
1748 | 1,2,2,1,2
1749 | 1,2,2,2,0
1750 | 1,2,2,2,1
1751 | 2,0,0,0,1
1752 | 2,0,0,0,2
1753 | 2,0,0,1,0
1754 | 2,0,0,1,1
1755 | 2,0,0,1,2
1756 | 2,0,0,2,0
1757 | 2,0,0,2,1
1758 | 2,0,0,2,2
1759 | 2,0,1,0,0
1760 | 2,0,1,0,1
1761 | 2,0,1,0,2
1762 | 2,0,1,1,0
1763 | 2,0,1,1,1
1764 | 2,0,1,1,2
1765 | 2,0,1,2,0
1766 | 2,0,1,2,1
1767 | 2,0,1,2,2
1768 | 2,0,2,0,0
1769 | 2,0,2,0,1
1770 | 2,0,2,0,2
1771 | 2,0,2,1,0
1772 | 2,0,2,1,1
1773 | 2,0,2,1,2
1774 | 2,0,2,2,0
1775 | 2,0,2,2,1
1776 | 2,1,0,0,0
1777 | 2,1,0,0,1
1778 | 2,1,0,0,2
1779 | 2,1,0,1,0
1780 | 2,1,0,1,1
1781 | 2,1,0,1,2
1782 | 2,1,0,2,0
1783 | 2,1,0,2,1
1784 | 2,1,0,2,2
1785 | 2,1,1,0,0
1786 | 2,1,1,0,1
1787 | 2,1,1,0,2
1788 | 2,1,1,1,0
1789 | 2,1,1,1,2
1790 | 2,1,1,2,0
1791 | 2,1,1,2,1
1792 | 2,1,1,2,2
1793 | 2,1,2,0,0
1794 | 2,1,2,0,1
1795 | 2,1,2,0,2
1796 | 2,1,2,1,0
1797 | 2,1,2,1,1
1798 | 2,1,2,1,2
1799 | 2,1,2,2,0
1800 | 2,1,2,2,1
1801 | 2,2,0,0,0
1802 | 2,2,0,0,1
1803 | 2,2,0,0,2
1804 | 2,2,0,1,0
1805 | 2,2,0,1,1
1806 | 2,2,0,1,2
1807 | 2,2,0,2,0
1808 | 2,2,0,2,1
1809 | 2,2,1,0,0
1810 | 2,2,1,0,1
1811 | 2,2,1,0,2
1812 | 2,2,1,1,0
1813 | 2,2,1,1,1
1814 | 2,2,1,1,2
1815 | 2,2,1,2,0
1816 | 2,2,1,2,1
1817 | 2,2,2,0,0
1818 | 2,2,2,0,1
1819 | 2,2,2,1,0
1820 | 2,2,2,1,1
1821 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning
2 | env.py can create the environment.
3 | DQN.py is the function code of DQN.
4 | train_DQN.py train the DQN model.
5 | test_DQN.py test the DQN model.
6 | pdqn.py is the function code of PQDN, and it can test the PDQN model.
7 | train_PDQN.py train the PDQN model.
8 | 


--------------------------------------------------------------------------------
/__pycache__/DQN.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/DQN.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/agent.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/env.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/pdqn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/pdqn.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/tool.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/__pycache__/tool.cpython-37.pyc


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
 1 | #!python3
 2 | class Agent(object):
 3 |     """
 4 |     Defines a basic reinforcement learning agent for OpenAI Gym environments
 5 |     """
 6 | 
 7 |     NAME = "Abstract Agent"
 8 | 
 9 |     def __init__(self, observation_space, action_space):
10 |         super().__init__()
11 |         self.observation_space = observation_space
12 |         self.action_space = action_space
13 | 
14 |     def act(self, state):
15 |         """
16 |         Determines the action to take in the given state.
17 | 
18 |         :param state:
19 |         :return:
20 |         """
21 |         raise NotImplementedError
22 | 
23 |     def step(self, state, action, reward, next_state, next_action, terminal, time_steps=1):
24 |         """
25 |         Performs a learning step given a (s,a,r,s',a') sample.
26 | 
27 |         :param state: previous observed state (s)
28 |         :param action: action taken in previous state (a)
29 |         :param reward: reward for the transition (r)
30 |         :param next_state: the resulting observed state (s')
31 |         :param next_action: action taken in next state (a')
32 |         :param terminal: whether the episode is over
33 |         :param time_steps: number of time steps the action took to execute (default=1)
34 |         :return:
35 |         """
36 |         raise NotImplementedError
37 | 
38 |     def start_episode(self):
39 |         """
40 |         Perform any initialisation for the start of an episode.
41 | 
42 |         :return:
43 |         """
44 |         raise NotImplementedError
45 | 
46 |     def end_episode(self):
47 |         """
48 |         Performs any cleanup before the next episode.
49 | 
50 |         :return:
51 |         """
52 |         raise NotImplementedError
53 | 
54 |     def __str__(self):
55 |         desc = self.NAME
56 |         return desc
57 | 


--------------------------------------------------------------------------------
/env.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | #!/usr/bin/env python3
  3 | # -*- coding: utf-8 -*-
  4 | """
  5 | Created on Fri Jul 26 02:02:15 2019
  6 | 
  7 | @author: kuo
  8 | """
  9 | 
 10 | 
 11 | import numpy as np
 12 | #from itertools import combinations
 13 | import random
 14 | import matplotlib.pyplot as plt
 15 | import math
 16 | import csv
 17 | import scipy.stats as st
 18 | import copy
 19 | 
 20 | 
 21 | class env_PowerAllocation(object):
 22 |     def __init__(self,nMBS=1,lambda1=0,lambda2=0,lambda3=0,MAXepisode=1500,n_baseline=6):
 23 |         super(env_PowerAllocation, self).__init__()
 24 |         """setting---------------------------------------------------------------------"""
 25 |         self.nMBS = 1
 26 |         self.nSBS = 3 # 5  # or J
 27 |         self.nTP = self.nMBS+self.nSBS
 28 |         self.nUE = 5 #8 # or K
 29 |         self.rMBS = 500 # in m 
 30 |         self.dmin = 10
 31 |         self.P_Max_MBS = 10**(4.3-3) # in 19.95 W
 32 |         self.P_Max_SBS = 10**(2.4-3) # in 0.25 W
 33 |         self.Pc_MBS = 130 # in W
 34 |         self.Pc_SBS = 0.5 #6.8# in W
 35 |         self.NT = 100 
 36 |         self.Ng= 20  
 37 |         self.N = 3 #5#200 # number of subchannel
 38 |         #self.sigma_MBS = 10**(0.6) # in 6 dB
 39 |         #self.sigma_SBS = 10**(0.4) 
 40 |         self.subB = 15000 # in Hz
 41 |         #self.B = self.subB*self.N   
 42 |         #self.B_MBS2SBS = self.B/self.nSBS
 43 |         self.Noise = (10**(-17.4))*0.001 # W/Hz
 44 |         self.SINR_threshold = 1 # 0dB / 1dB / 2dB
 45 |         self.Throughput_UE_threshold = np.log2(1+self.SINR_threshold) # 1/1.1756/ 1.37
 46 |         self.lambda1=lambda1
 47 |         self.lambda2=lambda2
 48 |         self.lambda3=lambda3
 49 |         self.ori_sizeTable = self.nSBS**self.nUE      # all possible association    
 50 |         self.s_dim =  (self.nUE+1)*self.nSBS+self.nUE # state dimension
 51 |         #self.Throughput_SBS_threshold = [ 50 for i in range(self.nSBS)] 
 52 |         ######################################################################## limit of continuous parameters corresponding to each discrete action 
 53 |         self.parameter_min=[np.array([0 for i in range(self.nUE)]) ] #P_min_SBS
 54 |         self.parameter_max=[np.array([self.P_Max_SBS for i in range(self.nUE)]) ]
 55 |         ######################################################################## avoid inf or divide 0
 56 |         self.delta_min = 10**(-20)
 57 |         self.delta_max = 10**(20)
 58 |         self.SINR_min=10**(-5.5) #-20dB
 59 |         self.SINR_max=10**(5.5)  # 20dB
 60 |         ######################################################################## debug or analysis
 61 |         debug_I={str(i):{'UE'+str(j):[] for j in range(self.nUE)} for i in range(MAXepisode)} # I, intra-cluster, inter-cluster
 62 |         #self.debug_channel={str(i):[]for i in range(MAXepisode)}
 63 |         debug_UE_throughput={str(i):[]for i in range(MAXepisode)}  # each UE throughput
 64 |         debug_SBS_throughput={str(i):[]for i in range(MAXepisode)} 
 65 |         debug_SBS_threshold={str(i):[]for i in range(MAXepisode)}
 66 |         debug_c={str(i):[]for i in range(MAXepisode)}  # user association
 67 |         debug_p={str(i):[]for i in range(MAXepisode)}  # power allocation
 68 |         debug_backhaul={str(i):{}for i in range(MAXepisode)} # which episode and step violate backhaul constraint & SBS index
 69 |         debug_QoS={str(i):{}for i in range(MAXepisode)}      # which episode and step violate QoS constraint & UE index
 70 |         debug_system_throughput={str(i):[]for i in range(MAXepisode)}
 71 |         debug_system_energy={str(i):[]for i in range(MAXepisode)}
 72 |         #----------------------------------------------------------------------- for all n_baseline methods
 73 |         self.debug_I={str(i):copy.deepcopy(debug_I) for i in range(n_baseline+1)}
 74 |         self.debug_UE_throughput={str(i):copy.deepcopy(debug_UE_throughput) for i in range(n_baseline+1)}
 75 |         self.debug_SBS_throughput={str(i):copy.deepcopy(debug_SBS_throughput) for i in range(n_baseline+1)}
 76 |         self.debug_SBS_threshold={str(i):debug_SBS_threshold for i in range(n_baseline+1)}
 77 |         self.debug_c={str(i):copy.deepcopy(debug_c) for i in range(n_baseline+1)}
 78 |         self.debug_p={str(i):copy.deepcopy(debug_p) for i in range(n_baseline+1)}
 79 |         self.debug_backhaul={str(i):copy.deepcopy(debug_backhaul) for i in range(n_baseline+1)}
 80 |         self.debug_QoS={str(i):copy.deepcopy(debug_QoS) for i in range(n_baseline+1)}
 81 |         self.debug_system_throughput={str(i):copy.deepcopy(debug_system_throughput) for i in range(n_baseline+1)}
 82 |         self.debug_system_energy={str(i):copy.deepcopy(debug_system_energy) for i in range(n_baseline+1)}
 83 | 
 84 | 
 85 |         
 86 |     def new(self,name):
 87 |         # to create new "Network Geometry"
 88 |         # Uniform distribution of SBSs and UEs
 89 |         # SBS and UE at least far 10 meters from MBS --> if violate, print something and need to create a new SBS-UE distribution again 
 90 |         SBS_R,SBS_A = np.random.uniform(self.dmin,self.rMBS,self.nSBS), np.random.uniform(0,2*math.pi,self.nSBS)
 91 |         self.xSBS,self.ySBS = [ r*math.cos(a) for r,a in zip(SBS_R,SBS_A)],[ r*math.sin(a) for r,a in zip(SBS_R,SBS_A)]
 92 |         UE_R,UE_A = np.random.uniform(self.dmin,self.rMBS,self.nUE), np.random.uniform(0,2*math.pi,self.nUE)
 93 |         self.xUE,self.yUE = [ r*math.cos(a) for r,a in zip(UE_R,UE_A)],[ r*math.sin(a) for r,a in zip(UE_R,UE_A)]
 94 |         # for pathloss
 95 |         self.dSBS2UE=[ ((self.xUE-x)**2+(self.yUE-y)**2)**0.5 for x,y in zip(self.xSBS,self.ySBS)]
 96 |         self.dMBS2SBS=[((x)**2+(y)**2)**0.5 for x,y in zip(self.xSBS,self.ySBS)]
 97 |         print('dSBS2UE=',self.dSBS2UE,'\n')
 98 |         print('dMBS2SBS=',self.dMBS2SBS,'\n')
 99 |         # check distance     
100 |         for iSBS,D in enumerate(self.dSBS2UE):
101 |             for iUE,d in enumerate(list(D)):
102 |                 if d < 10:
103 |                     print('SBS '+str(iSBS)+' UE '+str(iUE)+' too close')
104 |                 if d>2000:
105 |                     print('SBS '+str(iSBS)+' UE '+str(iUE)+' too far')      
106 |         for i,d in enumerate(self.dMBS2SBS):
107 |             if d < 10:
108 |                 print('SBS '+str(i)+' MBS too close')
109 |             if d > 5000:
110 |                 print('SBS '+str(i)+' MBS too far')
111 |         
112 |         """2)Plot"""
113 |         self.plotNetwork(name)
114 |         """3)Build Table"""
115 |         self.build_table()
116 |         chosen_c=np.random.choice([i for i in range(self.sizeTable)])
117 |         self.chosen_TP2UE=self.TP2UE[chosen_c]
118 |         self.chosen_UE2TP=self.UE2TP[chosen_c]
119 |         """4)Initialize state"""
120 |         self.channel()
121 |         """5)Store location and channel gain """
122 |         self.writeCSV(name)
123 |  
124 |     
125 |     def load(self,name):
126 |         # to build the used env
127 |         # 1)load
128 |         self.readCSV(name)
129 |         # 2)plotNetwork
130 |         self.plotNetwork(name)
131 |         # 3)Build Table
132 |         # 4)for pathloss
133 |         self.dSBS2UE=[ ((np.array(self.xUE)-x)**2+(np.array(self.yUE)-y)**2)**0.5 for x,y in zip(np.array(self.xSBS),np.array(self.ySBS))]
134 |         self.dMBS2SBS=[((x)**2+(y)**2)**0.5 for x,y in zip(np.array(self.xSBS),np.array(self.ySBS))]
135 |         # 5)action_space 
136 |         self.action_space=(self.sizeTable,[(self.parameter_min[0],self.parameter_max[0]) for i in range(self.sizeTable)])
137 |         
138 |     def reset(self):
139 |         """1)Initialize channel/ state"""
140 |         #self.channel()
141 |         c = np.random.randint(low=0, high=self.sizeTable, size= 1)[0] #####################
142 |         #P = np.array([ self.P_Max_SBS for i in range(self.nUE) ])
143 |         P = np.random.uniform(0,self.P_Max_SBS*0.1,self.nUE).flatten()
144 |         _, _,s,_,_ ,_,_= self.step(c,P,False,True,'0',0,0)
145 |         #self.channel()
146 |         #inits =  list(st.norm(0, 1).rvs(self.nUE))
147 |         #initG = list(self.G.T.flatten())
148 |         return s#inits,initG     
149 |         
150 |     def plotNetwork(self,name):
151 |         # 1)plot TP & UE
152 |         plt.figure(figsize=(5,5))
153 |         plt.scatter([0],[0],s=80,c='red',marker='o',alpha=0.5,label='MBS')
154 |         plt.scatter(self.xSBS,self.ySBS,s=50,c='green',marker='D',alpha=0.5,label='SBS')
155 |         plt.scatter(self.xUE,self.yUE,s=50,c='blue',marker='*',alpha=0.5,label='UE')
156 |         # 2)Display index
157 |         plt.annotate("0", xy=(0,0), xytext=(0, 0))
158 |         cnt=1
159 |         for x,y in zip(self.xSBS,self.ySBS):
160 |             plt.annotate("%s" % cnt, xy=(x,y), xytext=(x, y))
161 |             cnt = cnt+1
162 |         cnt=1
163 |         for x,y in zip(self.xUE,self.yUE):
164 |             plt.annotate("%s" % cnt, xy=(x,y), xytext=(x, y))
165 |             cnt = cnt+1
166 |         margin=50    
167 |         plt.xlim((-self.rMBS-margin, self.rMBS+margin))
168 |         plt.ylim((-self.rMBS-margin, self.rMBS+margin))
169 |         plt.title('Network Geometry ')
170 |         plt.xlabel('Distance(m)')
171 |         plt.ylabel('Distance(m)')
172 |         plt.legend(loc='upper right')
173 |         plt.savefig(name+'.png')
174 |         plt.show()
175 |         print('SBS Location')
176 |         for i in range(self.nSBS):
177 |             print(i,' (',self.xSBS[i],',',self.ySBS[i],')')
178 |         print('UE Location')
179 |         for i in range(self.nUE):
180 |             print(i,' (',self.xUE[i],',',self.yUE[i],')')
181 |             
182 |     def writeCSV(self,name):
183 |         with open(name+'.csv','w',newline='') as csvfile:
184 |             writer = csv.writer(csvfile) 
185 |             # write SBS, UE location 
186 |             writer.writerow(self.xSBS)
187 |             writer.writerow(self.ySBS)
188 |             writer.writerow(self.xUE)
189 |             writer.writerow(self.yUE) 
190 |             # write channel gain
191 |             for i in list(self.G):                 
192 |                 writer.writerow(i)
193 |             # write ori_TP2UE
194 |             for key,lis in self.ori_TP2UE.items(): 
195 |                 for i in lis:
196 |                     writer.writerow(i)
197 |             # write ori_UE2TP        
198 |             for key,lis in self.ori_UE2TP.items(): 
199 |                     writer.writerow(lis)
200 |             # write sizeTable ??  ##############################################      
201 |             writer.writerow([self.sizeTable])
202 |             # write TP2UE
203 |             for key,lis in self.TP2UE.items():
204 |                 for i in lis:
205 |                     writer.writerow(i)
206 |             # write UE2TP
207 |             for key,lis in self.UE2TP.items():
208 |                 writer.writerow(lis)
209 |                 
210 |     def readCSV(self,FileName):
211 |         self.ori_TP2UE={i:[] for i in range(self.ori_sizeTable)}
212 |         self.ori_UE2TP={}
213 |         with open(FileName+'.csv', newline='') as csvfile:
214 |             rows = csv.reader(csvfile)
215 |             rows = list(rows)
216 |             # read SBS, UE location
217 |             self.xSBS,self.ySBS=[ float(i) for i in rows[0]],[ float(i) for i in rows[1]]
218 |             self.xUE,self.yUE=[ float(i) for i in rows[2]],[ float(i) for i in rows[3]] 
219 |             # read channel gain
220 |             self.G=np.array([float(i) for lis in rows[4:4+self.nSBS] for i in lis]).reshape(self.nSBS,self.nUE+1)
221 |             # read ori_TP2UE
222 |             cnt=4+self.nSBS
223 |             for i in range(self.ori_sizeTable):
224 |                 for j in range(self.nSBS):
225 |                     self.ori_TP2UE[i].append([int(v) for v in rows[cnt]])
226 |                     cnt=cnt+1
227 |             # read ori_UE2TP        
228 |             for i in range(self.ori_sizeTable):
229 |                 self.ori_UE2TP[i]=[int(v) for v in rows[cnt]]
230 |                 cnt=cnt+1
231 |             # read sizeTable ??    
232 |             self.sizeTable=int(rows[cnt][0])
233 |             self.TP2UE={i:[] for i in range(self.sizeTable)}
234 |             self.UE2TP={}
235 |             cnt=cnt+1
236 |             # read TP2UE
237 |             for i in range(self.sizeTable):
238 |                 for j in range(self.nSBS):
239 |                     self.TP2UE[i].append([int(v) for v in rows[cnt]])
240 |                     cnt=cnt+1
241 |             # read UE2TP
242 |             for i in range(self.sizeTable):
243 |                 self.UE2TP[i]=[int(v) for v in rows[cnt]]
244 |                 cnt=cnt+1 
245 |         # action dimension               
246 |         self.a_dim = self.nUE+ self.sizeTable
247 |     
248 |     def index_list(self,l):
249 |         # l=[1,1,2,3,5,5]
250 |         # L_list =[[], [0, 1], [2], [3], [], [4, 5]]
251 |         # invalid: True, need to delete this action
252 |         L_list=[]
253 |         invalid= False
254 |         for i in range(self.nSBS):
255 |             loc=[]
256 |             c=l.count(i)
257 |             if c>self.N:
258 |                 invalid = True
259 |             while c!=0:
260 |                 loc.append(l.index(i))
261 |                 l[l.index(i)]=self.nSBS+1
262 |                 c=l.count(i)
263 |             L_list.append(loc)
264 |         return L_list, invalid
265 |     
266 |     def build_table(self):
267 |         # build table for 1) all possible associations  --> ori_UE2TP / ori_TP2UE / ori_sizeTable
268 |         #                 2) those expect that violates cluster size constraint --> UE2TP / TP2UE / sizeTable
269 |         """ori_UE2TP"""
270 |         mask=[[i] for i in range(self.nSBS)]
271 |         l2=[mask[i]+mask[j] for i in range(self.nSBS)for j in range(self.nSBS)]
272 |         for cnt in range(self.nUE-2):
273 |            l2=[l2[i]+mask[j] for i in range(len(l2))for j in range(self.nSBS)] 
274 |         self.ori_UE2TP = {i:l2[i] for i in range(len(l2))}
275 |         """ori_TP2UE"""       
276 |         invalid_list=[]
277 |         self.ori_TP2UE={}
278 |         for key in self.ori_UE2TP:
279 |             self.ori_TP2UE[key], invalid = self.index_list(self.ori_UE2TP[key].copy())
280 |             if invalid:
281 |                 invalid_list.append(key)
282 |         self.ori_sizeTable=len(self.ori_UE2TP)
283 |         """Check if action is invalid"""
284 |         self.TP2UE=self.ori_TP2UE.copy()
285 |         self.UE2TP=self.ori_UE2TP.copy()
286 |         for i in invalid_list:
287 |             self.TP2UE.pop(i)
288 |             self.UE2TP.pop(i)
289 |         """Re-create """ 
290 |         temp={}    
291 |         for i,key in enumerate(self.TP2UE):    
292 |             temp[i]=self.TP2UE[key]
293 |         self.TP2UE=temp
294 |         temp={}    
295 |         for i,key in enumerate(self.UE2TP):    
296 |             temp[i]=self.UE2TP[key]
297 |         self.UE2TP=temp
298 |         self.sizeTable=len(self.UE2TP)
299 |             
300 |     def channel(self):
301 |         """ 1)Channel """    
302 |         # 1)Rayleigh  
303 |         mu=0
304 |         sigma=1 #var=sigma**2
305 |         #X = list(st.norm(mu, sigma/2).rvs(2*(self.nUE+1)*self.nSBS))
306 |         #R=np.array([(X[i]**2+X[i+1]**2)**0.5 for i in range((self.nUE+1)*self.nSBS)]).reshape(self.nSBS,(self.nUE+1))
307 |     
308 |         # 2)Path loss                
309 |         #Shadowing_UE=(st.norm(0, self.sigma_SBS).rvs(self.nSBS*self.nUE)).reshape(self.nSBS,self.nUE)
310 |         #Shadowing_SBS=(st.norm(0, self.sigma_MBS).rvs(self.nSBS)).reshape(self.nSBS,1)
311 |         Shadowing_UE=0
312 |         Shadowing_SBS=0
313 |         PL_UE=np.array([30.53+36.7*math.log10(d/1000) for dUE2SBS in self.dSBS2UE for d in dUE2SBS]).reshape(self.nSBS,self.nUE) + Shadowing_UE
314 |         PL_SBS=np.array([19.77+3.91*math.log10(d/1000) for d in self.dMBS2SBS]).reshape(self.nSBS,1) + Shadowing_SBS      
315 |         self.PL=np.concatenate((PL_UE,PL_SBS),axis=1) # in dB
316 |         self.PL = 10**(-self.PL/10)
317 |         
318 |         # 3)Combination   
319 |         #self.G=self.PL*(R**2)
320 |         self.G=self.PL
321 |         #print('G=',self.G,'\n')
322 |     
323 |     def SubchannelAllocation(self): 
324 |         # uniformly allocate subchannel
325 |         # if cluster size <= subchannel number --> no intra-cluster interference
326 |         # if cluster size >  subchannel number --> intra-cluster interference
327 |         self.B_TP2UE=[]
328 |         for k in range(self.nSBS):
329 |             if len(self.chosen_TP2UE[k])==0:
330 |                self.B_TP2UE.append([]) 
331 |             else:
332 |                # Method 1.uniform allocation --> for N>= #UE in a cluster
333 |                nUE_SBSk = len(self.chosen_TP2UE[k])
334 |                if  nUE_SBSk> self.N:
335 |                    temp=[]
336 |                    for i in range(int(nUE_SBSk/self.N)):
337 |                        temp.append(random.sample([i for i in range(self.N)],self.N))
338 |                    temp.append(random.sample([i for i in range(self.N)],nUE_SBSk%self.N))
339 |                    self.B_TP2UE.append([i for l in temp for i in l])
340 |                else:
341 |                    self.B_TP2UE.append(random.sample([i for i in range(self.N)],nUE_SBSk))  
342 |                # Method 2.order allocation --> 0,1,..,(N-1),0,1..
343 |                #self.B_TP2UE.append([i%self.N for i in range(len(self.chosen_TP2UE[k]))])
344 |         self.B_UE2B={iUE:B for liUE,lB in zip(self.chosen_TP2UE,self.B_TP2UE) for iUE,B in zip(liUE,lB)}
345 |         
346 |     def mean_std(self,n,cflage,name):
347 |         #    1) calculate mean and standard deviation and save 
348 |         # or 2) load mean and standard deviation to use   
349 |         key = ['Energy Efficiency','Backhaul Cost','QoS Gurantee','QoS Bad','QoS Good','System Throughput','QoS Squared Difference']
350 |         self.dic_mean={i:0 for i in key}
351 |         self.dic_std={i:0 for i in key}
352 |         if cflage: # 1) calculate mean and standard deviation and save
353 |             dic_data={i:[] for i in key}
354 |             for k in range(n):
355 |                 print(k,' steps.......')
356 |                 c=np.random.choice([i for i in range(self.ori_sizeTable)])
357 |                 P=np.random.uniform(0,self.P_Max_SBS,self.nUE)             
358 |                 info_ori = self.step_mean_std(c,P)
359 |                 Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_c_Throughput_ori,QoS_squaredD_ori = info_ori
360 |                 dic_data['Energy Efficiency'].append(Energy_Efficiency_ori)
361 |                 dic_data['Backhaul Cost'].append(Backhaul_cost_ori)
362 |                 dic_data['QoS Gurantee'].append(QoS_gurantee_ori)
363 |                 dic_data['QoS Bad'].append(QoS_bad_ori)
364 |                 dic_data['QoS Good'].append(QoS_good_ori)
365 |                 dic_data['System Throughput'].append(sum_c_Throughput_ori)
366 |                 dic_data['QoS Squared Difference'].append(QoS_squaredD_ori)
367 |             for i in key:
368 |                 self.dic_mean[i]=np.mean(np.array(dic_data[i]))
369 |                 self.dic_std[i]=np.std(np.array(dic_data[i]))
370 |             with open(name,'w',newline='') as csvfile:
371 |                 writer = csv.writer(csvfile)  
372 |                 writer.writerow([self.dic_mean[i] for i in key])  
373 |                 writer.writerow([self.dic_std[i] for i in key])
374 |         else:  # 2) load mean and standard deviation to use
375 |             with open(name, newline='') as csvfile:
376 |                 rows = csv.reader(csvfile)
377 |                 rows = list(rows)
378 |                 for i,name in enumerate(key):
379 |                     self.dic_mean[name]=float(rows[0][i])
380 |                     self.dic_std[name]=float(rows[1][i])   
381 |     
382 |     
383 |     def step_mean_std(self,chosen_c,P):
384 |         # calculate mean and standard deviation
385 |         self.chosen_TP2UE=self.ori_TP2UE[chosen_c]
386 |         self.chosen_UE2TP=self.ori_UE2TP[chosen_c]        
387 |         #1) channel------------------------------------------------------------
388 |         self.channel()
389 |         #2) SubchannelAllocation-----------------------------------------------
390 |         self.SubchannelAllocation()
391 |         #3) R------------------------------------------------------------------
392 |         I = self._Interference(P)        
393 |         SINR = self._SINR(I,P) #array
394 |         SINR = np.clip(SINR,self.SINR_min,self.SINR_max)
395 |         Throughput = self._Throughput(P,SINR)
396 |         n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0])             
397 |         # 3-1) check backhaul constraint
398 |         Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 
399 |         dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0}
400 |         Backhaul_cost_ori=0
401 |         # 3-2) correct throughput when violating backhaul constraint --> divide backhaul capacity based on the ratio of transmit power     
402 |         c_Throughput = Throughput.copy()
403 |         for i in dic_backhaul:
404 |             Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i]
405 |             if dic_backhaul[i]>0:
406 |                 i_UE = self.chosen_TP2UE[i]
407 |                 for k in i_UE:
408 |                     c_Throughput[k]=Throughput[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i]     
409 |         c_Throughput=np.array(c_Throughput)
410 |         Energy_Efficiency_ori = sum(c_Throughput)/(n*self.Pc_SBS+sum(P)) 
411 |         Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,self.delta_min,self.delta_max)
412 |         QoS_difference = c_Throughput-self.Throughput_UE_threshold       
413 |         QoS_good_ori = sum([i for i in QoS_difference if i>0])
414 |         QoS_bad_ori = sum([-i for i in QoS_difference if i<0])
415 |         QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori
416 |         sum_c_Throughput = sum(c_Throughput)
417 |         QoS_squaredD_ori = sum([i*i for i in QoS_difference])
418 |         # 4)------------------------------------------------------------------- 
419 |         info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_c_Throughput,QoS_squaredD_ori)     
420 |         return info_ori 
421 |         
422 |     def step_train(self,chosen_c,P,f_ori_c,f_subc,f_debug,episode,timestep):
423 |         # step for train
424 |         done=False # True if violate backhaul constraint
425 |         QoS_R=0    # 1 if satisfy all UEs' QoS requirement
426 |         #0) Determine cluster--------------------------------------------------
427 |         if f_ori_c ==False:
428 |             self.chosen_TP2UE=self.TP2UE[chosen_c]
429 |             self.chosen_UE2TP=self.UE2TP[chosen_c]  
430 |         else:
431 |             self.chosen_TP2UE=self.ori_TP2UE[chosen_c]
432 |             self.chosen_UE2TP=self.ori_UE2TP[chosen_c]        
433 |         #1) channel for same channel in 1 episode------------------------------
434 |         #self.channel()
435 |         #2) SubchannelAllocation for different cluster-------------------------
436 |         if f_subc == True:
437 |             self.SubchannelAllocation()
438 |         #3) R------------------------------------------------------------------
439 |         if f_debug:
440 |             I = self.debug_Interference(P,episode,'1')  
441 |         else:
442 |             I = self._Interference(P)
443 |         SINR = self._SINR(I,P) #array
444 |         SINRdb = 10*np.log10(np.clip(SINR,self.delta_min,SINR)) 
445 |         Throughput_ori = self._Throughput(P,SINR)
446 |         n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0]) 
447 |         # 3-1) check backhaul constraint
448 |         Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 
449 |         dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0}
450 |         Backhaul_cost_ori=0
451 |         # 3-2) correct throughput when violating backhaul constraint --> divide backhaul capacity based on the ratio of transmit power
452 |         c_Throughput_ori = copy.deepcopy(Throughput_ori)
453 |         for i in dic_backhaul:
454 |             Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i]   
455 |             if dic_backhaul[i]>0:
456 |                 i_UE = self.chosen_TP2UE[i]
457 |                 for k in i_UE:
458 |                     c_Throughput_ori[k]=Throughput_ori[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i]      
459 |         c_Throughput_ori=np.array(c_Throughput_ori)
460 |         sum_Throughput_ori =sum(c_Throughput_ori)
461 |         Energy_Efficiency_ori = sum(c_Throughput_ori)/(n*self.Pc_SBS+sum(P)) 
462 |         Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,-self.delta_max,self.delta_max)
463 |         QoS_difference = c_Throughput_ori-self.Throughput_UE_threshold 
464 |         QoS_good_ori = sum([i for i in QoS_difference if i>0])
465 |         QoS_bad_ori = sum([-i for i in QoS_difference if i<0])
466 |         QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori 
467 |         QoS_squaredD_ori = sum([i*i for i in QoS_difference])
468 |         # 3-3) standardize
469 |         Energy_Efficiency = (Energy_Efficiency_ori-self.dic_mean['Energy Efficiency'])/self.dic_std['Energy Efficiency']
470 |         Backhaul_cost = Backhaul_cost_ori#(Backhaul_cost_ori-self.dic_mean['Backhaul Cost'])/self.dic_std['Backhaul Cost']
471 |         QoS_gurantee= (QoS_gurantee_ori-self.dic_mean['QoS Gurantee'])/self.dic_std['QoS Gurantee']
472 |         QoS_bad = (QoS_bad_ori-self.dic_mean['QoS Bad'])/self.dic_std['QoS Bad']  
473 |         QoS_good =(QoS_good_ori-self.dic_mean['QoS Good'])/self.dic_std['QoS Good']
474 |         sum_Throughput =(sum_Throughput_ori-self.dic_mean['System Throughput'])/self.dic_std['System Throughput']
475 |         QoS_squaredD =(QoS_squaredD_ori-self.dic_mean['QoS Squared Difference'])/self.dic_std['QoS Squared Difference']
476 |         # check QoS
477 |         if QoS_bad_ori==0:
478 |             QoS_R=1
479 |         else:
480 |             self.debug_QoS['1'][str(episode)][str(timestep)]=[i_UE for i_UE,i in enumerate(QoS_difference) if i>0]
481 |         # check Backhaul
482 |         if Backhaul_cost_ori>0:
483 |             done = True
484 |             self.debug_backhaul['1'][str(episode)][str(timestep)]=[i for i in dic_backhaul if dic_backhaul[i]>0]        
485 |         # 3-4) reward  
486 |         if Backhaul_cost_ori>0:
487 |             done = True 
488 |             R = self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD  - 0.1
489 |         else:
490 |             R=self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD          
491 |         # 4) next state--------------------------------------------------------
492 |         Ths_=copy.deepcopy(c_Throughput_ori)
493 |         #Ths_=np.clip((Ths_-np.mean(Ths_))/(Ths_.var()**0.5),self.delta_min,self.delta_max)
494 |         #Gs_ = self.G.T.flatten()
495 |         #s_ = np.concatenate((Ths_,  Gs_),axis=0)
496 |         s_ = Ths_
497 |         # 5) info--------------------------------------------------------------        
498 |         info = (R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,sum_Throughput,QoS_squaredD)
499 |         info_lis=(list(Backhaul_difference),list(SINRdb),list(QoS_difference),list(c_Throughput_ori))
500 |         info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_Throughput_ori,QoS_squaredD_ori)
501 |         debug_info=(self.Throughput_SBS_threshold,self.Throughput_BS)
502 |         # 6) debug--------------------------------------------------------------
503 |         if f_debug:
504 |             self.debug_UE_throughput['1'][str(episode)].append(c_Throughput_ori)
505 |             self.debug_SBS_throughput['1'][str(episode)].append([ sum(c_Throughput_ori[BS]) for BS in self.chosen_TP2UE  ] )
506 |             self.debug_SBS_threshold['1'][str(episode)].append(self.Throughput_SBS_threshold )
507 |             self.debug_c['1'][str(episode)].append(self.chosen_UE2TP )
508 |             self.debug_p['1'][str(episode)].append(10*np.log10(P*1000))
509 |         return info,info_lis,s_,info_ori,done,debug_info,QoS_R
510 |     
511 |     def step(self,chosen_c,P,f_ori_c,f_subc,baseline,episode,timestep):
512 |         # step for test
513 |         done=False # True if violate backhaul constraint
514 |         QoS_R=0    # 1 if satisfy all UEs' QoS requirement
515 |         #0) Determine cluster--------------------------------------------------
516 |         if f_ori_c ==False:
517 |             self.chosen_TP2UE=self.TP2UE[chosen_c]
518 |             self.chosen_UE2TP=self.UE2TP[chosen_c]  
519 |         else:
520 |             self.chosen_TP2UE=self.ori_TP2UE[chosen_c]
521 |             self.chosen_UE2TP=self.ori_UE2TP[chosen_c]        
522 |         #1) channel for same channel in 1 episode------------------------------
523 |         #self.channel()
524 |         #2) SubchannelAllocation for different cluster-------------------------
525 |         if f_subc == True:
526 |             self.SubchannelAllocation()
527 |         #3) R------------------------------------------------------------------
528 |         I = self.debug_Interference(P,episode,baseline)  
529 |         SINR = self._SINR(I,P) #array
530 |         SINRdb = 10*np.log10(np.clip(SINR,self.delta_min,SINR)) # cannot np.clip(SINR,self.delta_min,self.delta_max)       
531 |         Throughput_ori = self._Throughput(P,SINR)
532 |         n=self.nSBS-sum([1 for i in self.chosen_TP2UE if len(i)==0]) 
533 |         # 3-1) backhaul
534 |         Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 
535 |         dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0}
536 |         Backhaul_cost_ori=0
537 |         # 3-2) correct throughput
538 |         c_Throughput_ori = copy.deepcopy(Throughput_ori)
539 |         for i in dic_backhaul:
540 |             Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i]   
541 |             if dic_backhaul[i]>0:
542 |                 i_UE = self.chosen_TP2UE[i]
543 |                 for k in i_UE:
544 |                     c_Throughput_ori[k]=Throughput_ori[k]*self.Throughput_SBS_threshold[i]/self.Throughput_BS[i]      
545 |         c_Throughput_ori=np.array(c_Throughput_ori)
546 |         sum_Throughput_ori =sum(c_Throughput_ori)
547 |         Energy_Efficiency_ori = sum(c_Throughput_ori)/(n*self.Pc_SBS+sum(P)) 
548 |         self.debug_system_throughput[baseline][str(episode)].append(sum(c_Throughput_ori))
549 |         self.debug_system_energy[baseline][str(episode)].append([n*self.Pc_SBS+sum(P),n*self.Pc_SBS,sum(P)]) # overall,operation,transmit
550 |         Energy_Efficiency_ori = np.clip(Energy_Efficiency_ori,-self.delta_max,self.delta_max)
551 |         QoS_difference = c_Throughput_ori-self.Throughput_UE_threshold 
552 |         QoS_good_ori = sum([i for i in QoS_difference if i>0])
553 |         QoS_bad_ori = sum([-i for i in QoS_difference if i<0 ])
554 |         
555 |         QoS_gurantee_ori=QoS_good_ori-QoS_bad_ori 
556 |         QoS_squaredD_ori = sum([i*i for i in QoS_difference])
557 |         # 3-3) standardize
558 |         Energy_Efficiency = (Energy_Efficiency_ori-self.dic_mean['Energy Efficiency'])/self.dic_std['Energy Efficiency']
559 |         Backhaul_cost = Backhaul_cost_ori#(Backhaul_cost_ori-self.dic_mean['Backhaul Cost'])/self.dic_std['Backhaul Cost']
560 |         QoS_gurantee= (QoS_gurantee_ori-self.dic_mean['QoS Gurantee'])/self.dic_std['QoS Gurantee']
561 |         QoS_bad = (QoS_bad_ori-self.dic_mean['QoS Bad'])/self.dic_std['QoS Bad']  
562 |         QoS_good =(QoS_good_ori-self.dic_mean['QoS Good'])/self.dic_std['QoS Good']
563 |         sum_Throughput =(sum_Throughput_ori-self.dic_mean['System Throughput'])/self.dic_std['System Throughput']
564 |         QoS_squaredD =(QoS_squaredD_ori-self.dic_mean['QoS Squared Difference'])/self.dic_std['QoS Squared Difference']
565 |         # check QoS
566 |         if QoS_bad_ori==0:
567 |             QoS_R=1
568 |         else:
569 |             self.debug_QoS['1'][str(episode)][str(timestep)]=[i_UE for i_UE,i in enumerate(QoS_difference) if i>0]
570 |         # check Backhaul
571 |         if Backhaul_cost_ori>0:
572 |             done = True
573 |             self.debug_backhaul['1'][str(episode)][str(timestep)]=[i for i in dic_backhaul if dic_backhaul[i]>0]        
574 |         # 3-4) reward  
575 |         if Backhaul_cost_ori>0:
576 |             done = True 
577 |             R = self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD  - 0.1
578 |         else:
579 |             R=self.lambda1*Energy_Efficiency-self.lambda2*QoS_squaredD          
580 |         # 4) next state--------------------------------------------------------
581 |         Ths_=copy.deepcopy(c_Throughput_ori)
582 |         #Ths_=np.clip((Ths_-np.mean(Ths_))/(Ths_.var()**0.5),self.delta_min,self.delta_max)
583 |         #Gs_ = self.G.T.flatten()
584 |         #s_ = np.concatenate((Ths_,  Gs_),axis=0)
585 |         s_ = Ths_
586 |         # 5) info--------------------------------------------------------------        
587 |         info = (R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,sum_Throughput,QoS_squaredD)
588 |         info_lis=(list(Backhaul_difference),list(SINRdb),list(QoS_difference),list(c_Throughput_ori))
589 |         info_ori=(Energy_Efficiency_ori,Backhaul_cost_ori,QoS_good_ori,QoS_gurantee_ori,QoS_bad_ori,sum_Throughput_ori,QoS_squaredD_ori)
590 |         debug_info=(self.Throughput_SBS_threshold,self.Throughput_BS)
591 |         # 6) debug--------------------------------------------------------------
592 |         self.debug_UE_throughput[baseline][str(episode)].append(c_Throughput_ori)
593 |         self.debug_SBS_throughput[baseline][str(episode)].append([ sum(c_Throughput_ori[BS]) for BS in self.chosen_TP2UE  ] )
594 |         self.debug_SBS_threshold[baseline][str(episode)].append(self.Throughput_SBS_threshold )
595 |         self.debug_c[baseline][str(episode)].append(self.chosen_UE2TP )
596 |         self.debug_p[baseline][str(episode)].append(10*np.log10(P*1000))
597 |         return info,info_lis,s_,info_ori,done,debug_info,QoS_R 
598 |     
599 |     def _pInterference(self,P,iUE,k):
600 |         #UEs use the same subchannel
601 |         iUE=[key for key in iUE if self.B_UE2B[key]==self.B_UE2B[k] ] 
602 |         #3)iG
603 |         iG=[self.G[self.chosen_UE2TP[j],k] for j in iUE]
604 |         #4)iP
605 |         iP=[P[i] for i in iUE]
606 |         #5)I
607 |         interference = np.sum( np.array(iG)*np.array(iP) )
608 |         return interference
609 |     
610 |     def _Interference(self,P):    
611 |         """Interference for ALL UEs"""
612 |         I=[]
613 |         for k in range(self.nUE):
614 |             """1)inter-cell interference"""
615 |             #1)iTP
616 |             iTP=self.chosen_UE2TP[k]
617 |             #2)iUE 
618 |             #UEs in different clusters            
619 |             inter_iUE=[i for i in range(self.nUE)]
620 |             for i in self.chosen_TP2UE[iTP]:
621 |                 inter_iUE.remove(i)
622 |             inter_interference=self._pInterference(P,inter_iUE,k)
623 |             """2)intra-cell interference"""
624 |             #UEs in same clusters   
625 |             intra_iUE = self.chosen_TP2UE[iTP].copy()
626 |             intra_iUE.remove(k)
627 |             intra_interference=self._pInterference(P,intra_iUE,k)
628 |             """3)interference"""
629 |             interference = inter_interference+intra_interference
630 |             I.append(interference)
631 |         return I
632 |     
633 |     def debug_Interference(self,P,episode,baseline):    
634 |         """Interference for ALL UEs"""
635 |         I=[]
636 |         for k in range(self.nUE):
637 |             """1)inter-cell interference"""
638 |             #1)iTP
639 |             iTP=self.chosen_UE2TP[k]
640 |             #2)iUE 
641 |             #UEs in different clusters            
642 |             inter_iUE=[i for i in range(self.nUE)]
643 |             for i in self.chosen_TP2UE[iTP]:
644 |                 inter_iUE.remove(i)
645 |             inter_interference=self._pInterference(P,inter_iUE,k)
646 |             """2)intra-cell interference"""
647 |             #UEs in same clusters   
648 |             intra_iUE = self.chosen_TP2UE[iTP].copy()
649 |             intra_iUE.remove(k)
650 |             intra_interference=self._pInterference(P,intra_iUE,k)
651 |             """3)interference"""
652 |             interference = inter_interference+intra_interference
653 |             I.append(interference)
654 |             ##########################
655 |             self.debug_I[baseline][str(episode)]['UE'+str(k)].append([interference,intra_interference,inter_interference])
656 |         return I
657 |     
658 |     def _SINR(self,I,P):
659 |         G_UE=[self.G[self.chosen_UE2TP[i],i] for i in range(self.nUE)]
660 |         SINR=np.array(G_UE)*np.array(P)/(self.Noise*self.subB+np.array(I))#np.clip(np.array(G_UE)*np.array(P)/(self.Noise*self.subB+np.array(I)),-self.delta_max,self.delta_max)
661 |         #signal_part=np.array(G_UE)*np.array(P)
662 |         return SINR
663 | 
664 |     def _Throughput(self,P,SINR):
665 |         # Method 2. ratio
666 |         Throughput=np.log2(1+SINR)      
667 |         self.Throughput_BS=[ sum(Throughput[BS]) for BS in self.chosen_TP2UE  ]        
668 |         """ 1) Equal constraint
669 |         #Throughput_SBS_threshold=np.log2(1+(self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.B_MBS2SBS))/self.nSBS####################
670 |         #Throughput_SBS_threshold=self.B_MBS2SBS*np.log2(1+(self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.B_MBS2SBS))       
671 |         """
672 |         # 2) MIMO constraint
673 |         self.Throughput_SBS_threshold = np.log2(1+((self.NT-self.Ng+1)/self.Ng)*( (self.G[:,-1]*self.P_Max_MBS)/(self.Noise*self.subB) ) )      
674 |         return Throughput   
675 | 
676 |     def baseline1(self):
677 |         # 1)UE choose the nearst SBS
678 |         #the nearest SBS index
679 |         dUE2SBS=[ ((np.array(self.xSBS)-x)**2+(np.array(self.ySBS)-y)**2)**0.5 for x,y in zip(self.xUE,self.yUE)]  
680 |         chosen_UE2TP=[np.argmin(i) for i in dUE2SBS]  
681 |         #the chosen_c
682 |         for key,value in self.ori_UE2TP.items():
683 |             if value==chosen_UE2TP:
684 |                 b1_chosen_c=key
685 |         return b1_chosen_c
686 |     
687 |     def baseline2(self):
688 |         # 2)UE choose the SBS with the best channel state  
689 |         chosen_UE2TP=[np.argmax(self.G[:,i]) for i in range(self.nUE)]
690 |         #the chosen_c
691 |         for key,value in self.ori_UE2TP.items():
692 |             if value==chosen_UE2TP:
693 |                 b2_chosen_c=key
694 |         return b2_chosen_c
695 |  
696 |     def checkBackhaul(self,P):       
697 |         I = self._Interference(P)        
698 |         SINR = self._SINR(I,P) #array
699 |         _ = self._Throughput(P,SINR)
700 |         # calculate sum rate for all SBSs
701 |         Backhaul_difference = np.array(self.Throughput_BS)-np.array(self.Throughput_SBS_threshold) 
702 |         dic_backhaul={i:dif for i,dif in enumerate(Backhaul_difference) if dif>0}
703 |         Backhaul_cost_ori=0
704 |         # check backhaul constraint   
705 |         for i in dic_backhaul:
706 |             Backhaul_cost_ori=Backhaul_cost_ori+dic_backhaul[i]
707 |         if Backhaul_cost_ori>0:
708 |             violate = True
709 |         else:
710 |             violate = False
711 |         return  violate
712 |     
713 |     def checkQoS(self,Throughput):
714 |         violate = False
715 |         QoS_difference = Throughput - self.Throughput_UE_threshold
716 |         for i in QoS_difference:
717 |             if i<0:
718 |                violate=True
719 |                break
720 |         return violate
721 |     
722 |     def randomP(self,chosen_c,f_ori_c):
723 |         # determine random power that satisfies backhaul constraint
724 |         #0) Determine cluster--------------------------------------------------
725 |         if f_ori_c ==False:
726 |             self.chosen_TP2UE=self.TP2UE[chosen_c]
727 |             self.chosen_UE2TP=self.UE2TP[chosen_c]  
728 |         else:
729 |             self.chosen_TP2UE=self.ori_TP2UE[chosen_c]
730 |             self.chosen_UE2TP=self.ori_UE2TP[chosen_c]
731 |         # 1) SubchannelAllocation----------------------------------------------
732 |         self.SubchannelAllocation()
733 |         # 2) checkBackhaul-----------------------------------------------------
734 |         violate=True
735 |         p_limit=1
736 |         while violate:
737 |             n_step=0   
738 |             if p_limit<0:
739 |                 break
740 |             while violate:
741 |                 n_step=n_step+1
742 |                 P=np.random.uniform(0,self.P_Max_SBS*p_limit,self.nUE)
743 |                 violate = self.checkBackhaul(P)
744 |                 if n_step >100:
745 |                     break 
746 |             p_limit=p_limit-0.1
747 |         return P
748 |     
749 |     def randomC(self,P):
750 |         # determine random association that satisfies backhaul constraint
751 |         # 1) SubchannelAllocation----------------------------------------------
752 |         self.SubchannelAllocation()
753 |         # 2) checkBackhaul-----------------------------------------------------
754 |         violate=True
755 |         lis=[i for i in range(self.ori_sizeTable)]
756 |         while violate:
757 |             if len(lis)==0:
758 |                 return np.random.choice([i for i in range(self.ori_sizeTable)])
759 |             chosen_c=np.random.choice(lis)
760 |             lis.remove(chosen_c)
761 |             self.chosen_TP2UE=self.ori_TP2UE[chosen_c]
762 |             self.chosen_UE2TP=self.ori_UE2TP[chosen_c]
763 |             violate = self.checkBackhaul(P)
764 |         return chosen_c
765 | #%%        
766 | if __name__ == '__main__':
767 | 
768 | 
769 |     #%% 1)  (a)create new SBS-UE distribution and (b) calculate mean and standard deviation  
770 |     lambda1=0.43#0.53#1
771 |     lambda2=0.16#0.05#0.42#0.8
772 |     lambda3=0#0.1#0.3#0 
773 |     mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv'
774 |     scenario_name = 'EnvInfo_3'
775 |     mean_flage=True 
776 |     env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3)
777 |     env.load(name=scenario_name)
778 |     #env.new(name=scenario_name)
779 |     env.channel()
780 |     env.writeCSV(scenario_name)
781 |     env.mean_std(10**6,mean_flage,mean_name)
782 |     #%% 2) load (a)the SBS-UE distribution and (b) mean and standard deviation  
783 |     #lambda1=1#0.53#1
784 |     #lambda2=0#0.05#0.42#0.8
785 |     #lambda3=0#0.1#0.3#0 
786 |     #mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv'
787 |     #scenario_name = 'EnvInfo_11'
788 |     #mean_flage=False
789 |     #env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3)
790 |     #env.load(name=scenario_name)
791 |     #env.mean_std(10**6,mean_flage,mean_name)
792 | 
793 |     


--------------------------------------------------------------------------------
/mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv:
--------------------------------------------------------------------------------
1 | 14.27290812882412,0.0,21.789321971591978,1.4636380189483575,23.252959990540347,26.78932197159197,351.89412811801566
2 | 5.142182013770176,0.0,8.94766154843589,0.7804016158448511,8.679482600019545,8.947661548435889,152.70494406459784
3 | 


--------------------------------------------------------------------------------
/memory/__init__.py:
--------------------------------------------------------------------------------
1 | from memory.memory import Memory
2 | 
3 | __all__ = ["Memory"]
4 | 


--------------------------------------------------------------------------------
/memory/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/memory/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/memory/__pycache__/memory.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/memory/__pycache__/memory.cpython-37.pyc


--------------------------------------------------------------------------------
/memory/memory.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Source: https://github.com/openai/baselines/blob/master/baselines/ddpg/ddpg.py
  3 | """
  4 | import numpy as np
  5 | 
  6 | 
  7 | class RingBuffer(object):
  8 |     def __init__(self, maxlen, shape, dtype='float32'):
  9 |         self.maxlen = maxlen
 10 |         self.start = 0
 11 |         self.length = 0
 12 |         self.data = np.zeros((maxlen,) + shape).astype(dtype)
 13 | 
 14 |     def __len__(self):
 15 |         return self.length
 16 | 
 17 |     def __getitem__(self, idx):
 18 |         if idx < 0 or idx >= self.length:
 19 |             raise KeyError()
 20 |         return self.data[(self.start + idx) % self.maxlen]
 21 | 
 22 |     def get_batch(self, idxs):
 23 |         return self.data[(self.start + idxs) % self.maxlen]
 24 | 
 25 |     def append(self, v):
 26 |         if self.length < self.maxlen:
 27 |             # We have space, simply increase the length.
 28 |             self.length += 1
 29 |         elif self.length == self.maxlen:
 30 |             # No space, "remove" the first item.
 31 |             self.start = (self.start + 1) % self.maxlen
 32 |         else:
 33 |             # This should never happen.
 34 |             raise RuntimeError()
 35 |         self.data[(self.start + self.length - 1) % self.maxlen] = v
 36 | 
 37 |     def clear(self):
 38 |         self.start = 0
 39 |         self.length = 0
 40 |         self.data[:] = 0  # unnecessary, not freeing any memory, could be slow
 41 | 
 42 | 
 43 | def array_min2d(x):
 44 |     x = np.array(x)
 45 |     if x.ndim >= 2:
 46 |         return x
 47 |     return x.reshape(-1, 1)
 48 | 
 49 | 
 50 | class Memory(object):
 51 |     def __init__(self, limit, observation_shape, action_shape, next_actions=False):
 52 |         self.limit = limit #replay_memory_size
 53 | 
 54 |         self.states = RingBuffer(limit, shape=observation_shape)
 55 |         self.actions = RingBuffer(limit, shape=action_shape)
 56 |         self.rewards = RingBuffer(limit, shape=(1,))
 57 |         self.next_states = RingBuffer(limit, shape=observation_shape)
 58 |         self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None
 59 |         self.terminals = RingBuffer(limit, shape=(1,))
 60 | 
 61 |     def sample(self, batch_size, random_machine=np.random):
 62 |         # Draw such that we always have a proceeding element.
 63 |         # batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size)
 64 |         batch_idxs = random_machine.random_integers(low=0, high=self.nb_entries-1, size=batch_size)
 65 | 
 66 |         '''states_batch = array_min2d(self.states.get_batch(batch_idxs))
 67 |         actions_batch = array_min2d(self.actions.get_batch(batch_idxs))
 68 |         rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs))
 69 |         next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs))
 70 |         terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))'''
 71 |         states_batch = self.states.get_batch(batch_idxs)
 72 |         actions_batch = self.actions.get_batch(batch_idxs)
 73 |         rewards_batch = self.rewards.get_batch(batch_idxs)
 74 |         next_states_batch = self.next_states.get_batch(batch_idxs)
 75 |         next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None
 76 |         terminals_batch = self.terminals.get_batch(batch_idxs)
 77 | 
 78 |         if next_actions is not None:
 79 |             return states_batch, actions_batch, rewards_batch, next_states_batch, next_actions, terminals_batch
 80 |         else:
 81 |             return states_batch, actions_batch, rewards_batch, next_states_batch, terminals_batch
 82 | 
 83 |     def append(self, state, action, reward, next_state, next_action=None, terminal=False, training=True):
 84 |         if not training:
 85 |             return
 86 | 
 87 |         self.states.append(state)
 88 |         self.actions.append(action)
 89 |         self.rewards.append(reward)
 90 |         self.next_states.append(next_state)
 91 |         if self.next_actions:
 92 |             self.next_actions.append(next_action)
 93 |         self.terminals.append(terminal)
 94 | 
 95 |     def clear(self):
 96 |         self.states.clear()
 97 |         self.actions.clear()
 98 |         self.rewards.clear()
 99 |         self.next_states.clear()
100 |         self.next_actions.clear()
101 |         self.terminals.clear()
102 | 
103 |     @property
104 |     def nb_entries(self):
105 |         return len(self.states)
106 | 
107 | 
108 | class MemoryV2(object):
109 |     def __init__(self, limit, observation_shape, action_shape, next_actions=False, time_steps=False):
110 |         self.limit = limit
111 | 
112 |         self.states = RingBuffer(limit, shape=observation_shape)
113 |         self.actions = RingBuffer(limit, shape=action_shape)
114 |         self.rewards = RingBuffer(limit, shape=(1,))
115 |         self.next_states = RingBuffer(limit, shape=observation_shape)
116 |         self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None
117 |         self.time_steps = RingBuffer(limit, shape=(1,)) if time_steps else None
118 |         self.terminals = RingBuffer(limit, shape=(1,))
119 | 
120 |     def sample(self, batch_size, random_machine=np.random):
121 |         # Draw such that we always have a proceeding element.
122 |         #batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size)
123 |         batch_idxs = random_machine.choice(self.nb_entries, size=batch_size)
124 |         # batch_idxs = random_machine.choice(self.nb_entries, weights=[i/self.nb_entries for i in range(self.nb_entries)], size=batch_size)
125 | 
126 |         '''states_batch = array_min2d(self.states.get_batch(batch_idxs))
127 |         actions_batch = array_min2d(self.actions.get_batch(batch_idxs))
128 |         rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs))
129 |         next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs))
130 |         terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))'''
131 |         states_batch = self.states.get_batch(batch_idxs)
132 |         actions_batch = self.actions.get_batch(batch_idxs)
133 |         rewards_batch = self.rewards.get_batch(batch_idxs)
134 |         next_states_batch = self.next_states.get_batch(batch_idxs)
135 |         next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None
136 |         terminals_batch = self.terminals.get_batch(batch_idxs)
137 |         time_steps = self.time_steps.get_batch(batch_idxs) if self.time_steps is not None else None
138 | 
139 |         ret = [states_batch, actions_batch, rewards_batch, next_states_batch]
140 |         if next_actions is not None:
141 |             ret.append(next_actions)
142 |         ret.append(terminals_batch)
143 |         if time_steps is not None:
144 |             ret.append(time_steps)
145 |         return tuple(ret)
146 | 
147 |     def append(self, state, action, reward, next_state, next_action=None, terminal=False, time_steps=None):
148 |         self.states.append(state)
149 |         self.actions.append(action)
150 |         self.rewards.append(reward)
151 |         self.next_states.append(next_state)
152 |         if self.next_actions is not None:
153 |             self.next_actions.append(next_action)
154 |         self.terminals.append(terminal)
155 |         if self.time_steps is not None:
156 |             self.time_steps.append(time_steps)
157 | 
158 |     @property
159 |     def nb_entries(self):
160 |         return len(self.states)
161 | 
162 | 
163 | class MemoryNStepReturns(object):
164 |     def __init__(self, limit, observation_shape, action_shape, next_actions=False, time_steps=False, n_step_returns=False):
165 |         self.limit = limit
166 | 
167 |         self.states = RingBuffer(limit, shape=observation_shape)
168 |         self.actions = RingBuffer(limit, shape=action_shape)
169 |         self.rewards = RingBuffer(limit, shape=(1,))
170 |         self.next_states = RingBuffer(limit, shape=observation_shape)
171 |         self.next_actions = RingBuffer(limit, shape=action_shape) if next_actions else None
172 |         self.time_steps = RingBuffer(limit, shape=(1,)) if time_steps else None
173 |         self.terminals = RingBuffer(limit, shape=(1,))
174 |         self.n_step_returns = RingBuffer(limit, shape=(1,)) if n_step_returns else None
175 | 
176 |     def sample(self, batch_size, random_machine=np.random):
177 |         # Draw such that we always have a proceeding element.
178 |         #batch_idxs = random_machine.random_integers(self.nb_entries - 2, size=batch_size)
179 |         batch_idxs = random_machine.choice(self.nb_entries, size=batch_size)
180 |         # batch_idxs = random_machine.choice(self.nb_entries, weights=[i/self.nb_entries for i in range(self.nb_entries)], size=batch_size)
181 | 
182 |         '''states_batch = array_min2d(self.states.get_batch(batch_idxs))
183 |         actions_batch = array_min2d(self.actions.get_batch(batch_idxs))
184 |         rewards_batch = array_min2d(self.rewards.get_batch(batch_idxs))
185 |         next_states_batch = array_min2d(self.next_states.get_batch(batch_idxs))
186 |         terminals_batch = array_min2d(self.terminals.get_batch(batch_idxs))'''
187 |         states_batch = self.states.get_batch(batch_idxs)
188 |         actions_batch = self.actions.get_batch(batch_idxs)
189 |         rewards_batch = self.rewards.get_batch(batch_idxs)
190 |         next_states_batch = self.next_states.get_batch(batch_idxs)
191 |         next_actions = self.next_actions.get_batch(batch_idxs) if self.next_actions is not None else None
192 |         terminals_batch = self.terminals.get_batch(batch_idxs)
193 |         time_steps = self.time_steps.get_batch(batch_idxs) if self.time_steps is not None else None
194 |         n_step_returns = self.n_step_returns.get_batch(batch_idxs) if self.n_step_returns is not None else None
195 | 
196 |         ret = [states_batch, actions_batch, rewards_batch, next_states_batch]
197 |         if next_actions is not None:
198 |             ret.append(next_actions)
199 |         ret.append(terminals_batch)
200 |         if time_steps is not None:
201 |             ret.append(time_steps)
202 |         if n_step_returns is not None:
203 |             ret.append(n_step_returns)
204 |         return tuple(ret)
205 | 
206 |     def append(self, state, action, reward, next_state, next_action=None, terminal=False, time_steps=None,
207 |                n_step_return=None):
208 |         self.states.append(state)
209 |         self.actions.append(action)
210 |         self.rewards.append(reward)
211 |         self.next_states.append(next_state)
212 |         if self.next_actions is not None:
213 |             self.next_actions.append(next_action)
214 |         self.terminals.append(terminal)
215 |         if self.time_steps is not None:
216 |             assert time_steps is not None
217 |             self.time_steps.append(time_steps)
218 |         if self.n_step_returns is not None:
219 |             assert n_step_return is not None
220 |             self.n_step_returns.append(n_step_return)
221 | 
222 |     @property
223 |     def nb_entries(self):
224 |         return len(self.states)
225 | 


--------------------------------------------------------------------------------
/test_DQN.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | import numpy as np
  7 | import random
  8 | from collections import Counter
  9 | from torch.autograd import Variable
 10 | import time
 11 | import scipy.stats as st
 12 | import copy
 13 | import matplotlib.pyplot as plt
 14 | import os 
 15 | os.chdir('/home/chan/PDQN/') 
 16 | os.environ['CUDA_VISIBLE_DEVICES']='1'
 17 | from agent import Agent
 18 | from memory.memory import Memory
 19 | #from memory import Memory
 20 | from utils import soft_update_target_network, hard_update_target_network
 21 | from utils.noise import OrnsteinUhlenbeckActionNoise
 22 | from env import env_PowerAllocation
 23 | import tool as t
 24 | from pdqn import PDQNAgent
 25 | from DQN import DQNAgent
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | #%%
 33 | 
 34 | if __name__ == '__main__':
 35 |     # PDQN=====================================================================
 36 |     batch_size=128#32
 37 |     initial_memory_threshold=128#1000 # Number of transitions required to start learning.
 38 |     replay_memory_size=20000 # Replay memory transition capacity 
 39 |     epsilon_initial=1
 40 |     epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon
 41 |     epsilon_final=0.01 # Final epsilon value
 42 |     gamma=0.95
 43 |     clip_grad=1 # Parameter gradient clipping limit 
 44 |     use_ornstein_noise= False # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 
 45 |     inverting_gradients= True # Use inverting gradients scheme instead of squashing function
 46 |     seed=0 #Random seed
 47 |     save_freq = 100#0 # How often to save models (0 = never)
 48 |     # 1) ParamActor------------------------------------------------------------   
 49 |     learning_rate_actor_param=0.00001
 50 |     tau_actor_param=0.001
 51 |     """loss func for actor_parameter """
 52 |     average=False # Average weighted loss function  
 53 |     weighted=False # Naive weighted loss function
 54 |     random_weighted=False # Randomly weighted loss function
 55 |     indexed=False # Indexed loss function
 56 |     zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action
 57 |     # 2) Actor-----------------------------------------------------------------
 58 |     tau_actor=0.1  
 59 |     learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output
 60 |     action_input_layer=0# Which layer to input action parameters-- useless?  
 61 |     #--------------------------------------------------------------------------
 62 |     # Performance 
 63 |     dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput']
 64 |     dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6']  }
 65 |     dic_info_no_back={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6']  }
 66 |     dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference']
 67 |     dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6'] }
 68 |     dic_info_ori_no_back={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6'] }
 69 |     a_info={'c':[],'P':[]}
 70 |     dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]}
 71 |     dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]}
 72 |     num_back=0
 73 |     debug_QoSr={i:[] for i in ['1','2','3','4','5','6']}
 74 |     #--------------------------------------------------------------------------
 75 |     # debug
 76 |     debug_PNN=[]  
 77 |     debug_backhaul=[]
 78 |     debug_BSbackhaul=[]
 79 |     debug_channel_episode=[]
 80 |     debug_episode_back=[]
 81 |     debug_s=[]
 82 |     
 83 |     #%% Need to modify
 84 |     ###########################################################################
 85 |     scale_actions = True # True
 86 |     initialise_params = False#True#False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam
 87 |     MAXepisode = 100#1000
 88 |     MAXepisode_train = 1000
 89 |     MAXstep = 100#10#150
 90 |     realization=100#20
 91 |     title="PDQN1"#"PDQN_backhaul" # Prefix of output files
 92 |     #save_dir ="results" #Output directory 
 93 |     n_baseline=6
 94 |     load_dir ="results_PDQN_5v3/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 
 95 |     load_num="_done"#"400"#
 96 |     load_dirDQN ="results_DQN_5v3/PDQN_cc_s11_r11_0dB_N3_10"#PDQN_cc_s3_r9_1dB_new4_rebuild40" #Output directory 
 97 |     load_numDQN="_done"#"400"#
 98 |     layers_actor=[512,128,16] # 1055-- --5  # # Hidden layers
 99 |     actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"}
100 |     layers_actor_param =[256]#[64,256] # 5-- --1050
101 |     actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"}
102 |     name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_nct.csv'
103 |     scenario_name='EnvInfo_3'
104 |     lambda1=0.43#0.53#1
105 |     lambda2=0.16#0.05#0.42#0.8
106 |     lambda3=0#0.1#0.3#0
107 |     result_save=load_dirDQN+'/test_testChannel_block_fading'#'/test_all_'#'/test_testChannel'#'/test_last2000_'
108 |     ###########################################################################
109 |     #%% ENV
110 |     env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=n_baseline)
111 |     #-------------------------------------------------------------------------- Choose Network Geometry
112 |     #env.reset() # create a new one
113 |     env.load(name=scenario_name) # use the previous one
114 |     #-------------------------------------------------------------------------- mean_std   
115 |     env.mean_std(10**5,False,name)#calculate(True) or load(False)
116 |     num_actions = env.action_space[0]
117 |     s_dim = env.nUE
118 |     # use the same channel gain to test
119 |     read_train_channel_episode = t.readCSI('CSI',env.nSBS,env.nUE,MAXepisode)
120 | 
121 |     #%% PDQN
122 |     agent_class = PDQNAgent
123 |     agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
124 |                         batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param,  # 0.001
125 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
126 |                         clip_grad=clip_grad,indexed=indexed,average=average,
127 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
128 |                         tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold,
129 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
130 |                         actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs,
131 |                         zero_index_gradients=zero_index_gradients,seed=seed)
132 | 
133 |     power_level=5
134 |     agent_classDQN = DQNAgent
135 |     agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
136 |                         power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor,  # 0.001
137 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
138 |                         clip_grad=clip_grad,indexed=indexed,average=average,
139 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
140 |                         initial_memory_threshold=initial_memory_threshold,
141 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
142 |                         actor_kwargs=actor_kwargs,
143 |                         zero_index_gradients=zero_index_gradients,seed=seed)
144 |     # load the model 
145 |     agent.load_models(prefix = os.path.join(load_dir, load_num))
146 |     agentDQN.load_models(prefix = os.path.join(load_dirDQN, load_numDQN))
147 |     start_time = time.time()
148 |     total_step=0
149 |     done1 = True
150 |     s = env.reset()
151 |     s = np.array(list(s), dtype=np.float32, copy=False)
152 | 
153 | 
154 |     for episode in range(MAXepisode):
155 |         print(episode, 'episode-----------')
156 | 
157 |         #env.G=read_train_channel_episode[episode]
158 |     
159 |         for timestep in range(MAXstep):
160 |             total_step = total_step + 1
161 |             print('Iteration '+str(total_step)+'=======================================')
162 |             #==================================================================
163 |             # 1
164 |             """ 1) take an action--------------------------------------------"""
165 |             c1, PNN1, all_action_parameters = agent._act(s) # array
166 |             P1 = t.p_normalize(env.P_Max_SBS,PNN1)
167 |             """ 2) step -- next state, reward, done--------------------------"""
168 |             info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step(c1,P1,False,True,'1',episode,timestep)
169 |             debug_QoSr['1'].append(QoS_R1)
170 |             s_ = np.array(list(s_), dtype=np.float32, copy=False)
171 |             R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1, =info1  
172 |             Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD1_ori1 =info_ori1
173 |             Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1
174 |             Throughput_SBS_threshold,Throughput_BS = debug_info1
175 |             debug_backhaul.append(Throughput_SBS_threshold)
176 |             debug_BSbackhaul.append(Throughput_BS)
177 |             # 2 the nearst SBS + random power allocation-------------------------------------------
178 |             c2 = int(env.baseline1())
179 |             a2 = env.randomP(c2,True)
180 |             info2, lis_info2,_,info_ori2,_ ,_,QoS_R2 = env.step(c2,a2,True,False,'2',episode,timestep)
181 |             debug_QoSr['2'].append(QoS_R2)
182 |             # 3 the best channel + random power allocation ----------------------------------------
183 |             c3 = int(env.baseline2())
184 |             a3 = env.randomP(c3,True)
185 |             info3, lis_info3,_,info_ori3,_,_,QoS_R3= env.step(c3,a3,True,False,'3',episode,timestep)
186 |             debug_QoSr['3'].append(QoS_R3)
187 |             # 4 RL clustering + random power allocatin--------------------------------------------
188 |             c4 = copy.deepcopy(c1)
189 |             a4 = env.randomP(c4,False)
190 |             info4, lis_info4,_,info_ori4,_,_,QoS_R4 = env.step(c4,a4,False,False,'4',episode,timestep) 
191 |             debug_QoSr['4'].append(QoS_R4)
192 |             # 5 random clustering +  RL power---------------------------------------------------
193 |             a5 = copy.deepcopy(P1)
194 |             c5 = env.randomC(a5)
195 |             info5, lis_info5,_,info_ori5,_,_,QoS_R5= env.step(c5,a5,True,False,'5',episode,timestep) 
196 |             debug_QoSr['5'].append(QoS_R5)
197 |             # 6 DQN
198 |             a6 = agentDQN.act(s)   
199 |             c6,P6=agentDQN.action_decoder(a6, env.P_Max_SBS)
200 |             info6, lis_info6, _, info_ori6,_,_,QoS_R6 = env.step(c6,P6,False,True,'6',episode,timestep)
201 |             debug_QoSr['6'].append(QoS_R6)
202 |             #==================================================================
203 |             
204 |             """ 3) Print and store info--------------------------------------"""
205 |             # info=(R,Energy_Efficiency,Backhaul_cost,QoS_good,QoS_gurantee,QoS_bad,SINRdb)
206 |             # lis_info1=(list(Backhaul_difference),list(SINRdb),list(QoS_difference))          
207 |             key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']           
208 |             key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput']
209 |             
210 |             dic_info = t.test_inst_info(dic_info,(key_info,key_info_lis),((info1,info2,info3,info4,info5,info6),(lis_info1,lis_info2,lis_info3,lis_info4,lis_info5,lis_info6)),1)
211 |             dic_info_ori = t.test_inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori2,info_ori3,info_ori4,info_ori5,info_ori6),0)
212 |           
213 |             t.test_print_info(((env.UE2TP[c1],P1),(env.ori_UE2TP[c2],a2),(env.ori_UE2TP[c3],a3),(env.UE2TP[c4],a4),(env.ori_UE2TP[c5],a5),(env.ori_UE2TP[c6],P6)),s) # print p in dBm 
214 |             a_info['c'].append(env.UE2TP[c1])
215 |             a_info['P'].append(10*np.log10(P1*1000))
216 |             """ 4) update state ---------------------------------------------"""
217 |             s = s_
218 |             
219 |             # not end the episode at the test phase
220 |             if done1:
221 |                 num_back=num_back+1
222 |                 debug_episode_back.append(episode)
223 |                 print('violate backhaul')               
224 |             else:
225 |                 dic_info_no_back = t.test_inst_info(dic_info_no_back,(key_info,key_info_lis),((info1,info2,info3,info4,info5,info6),(lis_info1,lis_info2,lis_info3,lis_info4,lis_info5,lis_info6)),1)
226 |                 dic_info_ori_no_back = t.test_inst_info(dic_info_ori_no_back,dic_info_ori_key,(info_ori1,info_ori2,info_ori3,info_ori4,info_ori5,info_ori6),0)
227 | 
228 |             
229 |     end_time = time.time()
230 |     print('num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%')
231 |     print("Training took %.2f seconds" % (end_time - start_time))
232 |     for i in debug_QoSr:
233 |         num_QoS=sum([1 for k in debug_QoSr[i] if k==1 ])
234 |         print('[',i,']satify Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%')
235 |     
236 |     
237 |     #%%  debug for constraints about backhaul   
238 |     #t.plot_constraint(MAXepisode,debug_episode_back,'test',result_save,0)
239 |     #t.writeConstraintHistory(result_save+'test_',MAXepisode,debug_episode_back,0)
240 |     #t.plot_constraint(MAXepisode,debug_QoSr['1'],'test',result_save,1)
241 |     #t.writeConstraintHistory_v2(result_save+'test_',MAXepisode,debug_QoSr,1)
242 | 
243 |     #%%    
244 |     # 7) Average per realization steps and Save --------------------------------
245 |     key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']
246 |     dic_avg_info = t.test_avg_info(dic_info,key_avg,realization)
247 |     t.test_plot_avg(dic_avg_info,key_avg,realization,'normalize',result_save)
248 |     #---------------------------------------------------------------------------  
249 |     dic_avg_info_ori = t.test_avg_info(dic_info_ori,dic_info_ori_key,realization)
250 |     t.test_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',result_save)
251 |     #---------------------------------------------------------------------------
252 |     dic_avg_info_ori_no_back = t.test_avg_info(dic_info_ori_no_back,dic_info_ori_key,realization)
253 |     t.test_plot_avg(dic_avg_info_ori_no_back,dic_info_ori_key,realization,'no_back_original',result_save)
254 |     
255 |     #%%  
256 |     def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir):    
257 |         #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput']
258 |         nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE]
259 |         title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot]   
260 |         ylabel=key_plot
261 |         xlabel='Training Steps (x'+str(n)+')'   
262 |         label=['SBS','UE','UE','UE']
263 |         color=['r','b','g','c','m','y','k','b']
264 |         linestyle=['-','--',':',':','-']
265 |     
266 |         save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
267 |         save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
268 |         for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
269 |             difference = dic_info[key]
270 |             temp_list=[]
271 |             for y in range(nTerm[i]):
272 |                     temp_list.append([difference[x][y] for x in range(len(difference))])
273 |             for length in range(len(temp_list)):
274 |                 Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))]
275 |                 save[key][length]=Bl
276 |                 Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))]
277 |                 for ori in Bl_ori:
278 |                     save_ori[key][length]=save_ori[key][length]+ori
279 |                 plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0])
280 |     
281 |             plt.legend(loc='upper right')
282 |             plt.title(title[i])
283 |             plt.ylabel(ylabel[i])
284 |             plt.xlabel(xlabel)
285 |             plt.savefig(save_dir+title[i]+'.png')
286 |             plt.show()
287 |         return save,save_ori
288 | 
289 |     #realization=20
290 |     # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE --
291 |     key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput']
292 |     save=[]
293 |     ori_save=[]
294 |     for i in ['1','2','3','4','5','6']:
295 |         save_avg,save_ori=test_plot_individual(env,dic_info[i],i,key_individual,realization,result_save)
296 |         save.append(save_avg)
297 |         ori_save.append(save_ori)
298 |     # avg info
299 |     t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,result_save,1)
300 |     # original
301 |     t.writeCSV(dic_info,dic_info_ori,ori_save,dic_info_ori_key,key_individual,key_avg,result_save+'_original',1)
302 |     #%%
303 |     # no_backhaul + original -- don't use individual info
304 |     t.writeCSV_nobackhaul(dic_info_no_back,dic_avg_info_ori_no_back,dic_info_ori_key,key_avg,result_save+'_original_nobackhaul')
305 |     #%%   write CSI
306 |     #t.writeCSI('Rayleigh_CSIforTest_100episode_100timestep_s10',debug_channel_episode)
307 |     #read = readBackhaulHistory('test_HistoryforBackhaulViolation')
308 |     
309 |     #%% debug
310 |     test_debug_I = env.debug_I
311 |     test_debug_UE_throughput = env.debug_UE_throughput # each UE throughput
312 |     test_debug_SBS_throughput = env.debug_SBS_throughput
313 |     test_debug_SBS_threshold = env.debug_SBS_threshold
314 |     test_debug_c = env.debug_c
315 |     test_debug_p = env.debug_p
316 |     test_debug_backhaul = env.debug_backhaul
317 |     test_debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index
318 |     test_debug_system_throughput = env.debug_system_throughput
319 |     test_debug_system_energy = env.debug_system_energy
320 |     # 1) EE    
321 |     debug_dic_info_EE_key = ['System Power','Operational Power','Transmit Power','System Throughput']
322 |     debug_dic_info_EE = {key_dic_info:{name_EE:[] for name_EE in debug_dic_info_EE_key} for key_dic_info in ['1','2','3','4','5','6'] }
323 | 
324 |     for iMETHOD in ['1','2','3','4','5','6']:
325 |         for index,nameEE in enumerate(debug_dic_info_EE_key[:3]):
326 |             debug_dic_info_EE[iMETHOD][nameEE]=[episode_EE[index] for episode in range(MAXepisode) for episode_EE in test_debug_system_energy[iMETHOD][str(episode)] ]          
327 |         debug_dic_info_EE[iMETHOD]['System Throughput']=[episode_EE for episode in range(MAXepisode) for episode_EE in test_debug_system_throughput[iMETHOD][str(episode)] ]
328 | 
329 |     t.writeEE(debug_dic_info_EE,debug_dic_info_EE_key,result_save)
330 |     
331 |     # 2) Interference
332 |     debug_dic_info_I_key = ['Interference','Intra-cluster Interference','Inter-cluster Interference']
333 |     debug_I={i:{'UE'+str(j):[] for j in range(env.nUE)} for i in debug_dic_info_I_key} # I, intra-cluster, inter-cluster
334 |     debug_dic_info_I = {key_dic_info:copy.deepcopy(debug_I) for key_dic_info in ['1','2','3','4','5','6'] }
335 |     
336 |     for index,name_I in enumerate(debug_dic_info_I_key):
337 |         for iUE in ['UE'+str(i) for i in range(env.nUE) ]:
338 |             for iMETHOD in [str(k+1) for k in range(6)]:
339 |                 debug_dic_info_I[iMETHOD][name_I][iUE]=[episode_I[index] for episode in range(MAXepisode) for episode_I in copy.deepcopy(test_debug_I[iMETHOD][str(episode)][iUE])]
340 | 
341 |     t.writeI(debug_dic_info_I,debug_dic_info_I_key,env.nUE,result_save)
342 |     
343 |     # 3) action
344 |     debug_dic_info_action_key = ['Association','Power Allocation']
345 |     debug_dic_info_action = {key_dic_info:{name_action:[] for name_action in debug_dic_info_action_key} for key_dic_info in [str(i+1) for i in range(n_baseline)]  }
346 | 
347 |     for iMETHOD in [str(i+1) for i in range(n_baseline)] :
348 |         debug_dic_info_action[iMETHOD]['Association'] = [ test_debug_c[iMETHOD][str(episode)] for episode in range(MAXepisode) ]
349 |         debug_dic_info_action[iMETHOD]['Power Allocation'] = [ test_debug_p[iMETHOD][str(episode)] for episode in range(MAXepisode) ]
350 |                           
351 |     t.writeAction(debug_dic_info_action,debug_dic_info_action_key,result_save)
352 | 


--------------------------------------------------------------------------------
/tool.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | #!/usr/bin/env python3
  3 | # -*- coding: utf-8 -*-
  4 | """
  5 | Created on Mon Jun  3 21:27:58 2019
  6 | 
  7 | @author: kuokuo
  8 | """
  9 | import numpy as np
 10 | import matplotlib.pyplot as plt
 11 | import math
 12 | import csv
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 
 18 | def inst_info(dic_info,key,data,op):  
 19 |     # key and data should be in order !!!!!!
 20 |     if op==1: #normalized
 21 |         key_info,key_info_lis=key
 22 |         info,lis_info1 = data
 23 |         n=[str(i+1) for i in range(len(info))] #['1','2','3','4','5','6','7']
 24 |         # store key_info
 25 |         for i,content in zip(n,info):
 26 |             cnt=0
 27 |             for value in content:
 28 |                 dic_info[i][key_info[cnt]].append(value)
 29 |                 cnt=cnt+1         
 30 |         # store key_info_lis
 31 |         for i,value in zip(key_info_lis,lis_info1):
 32 |             dic_info['1'][i].append(value)
 33 |     else: #original
 34 |         n=[str(i+1) for i in range(len(data))]
 35 |         for i,content in zip(n,data):
 36 |             cnt=0
 37 |             for value in content:
 38 |                 dic_info[i][key[cnt]].append(value)
 39 |                 cnt=cnt+1
 40 |     return dic_info
 41 | 
 42 | def test_inst_info(dic_info,key,data,op):  
 43 |     # key and data should be in order !!!!!!
 44 |     
 45 |     if op==1: #normalized
 46 |         key_info,key_info_lis=key
 47 |         info,lis_info = data
 48 |         n=[str(i+1) for i in range(len(info))]
 49 |         #lis_info1,lis_info2,lis_info3,lis_info4,lis_info5 = lis_info
 50 |         # store key_info
 51 |         for i,content in zip(n,info):
 52 |             cnt=0
 53 |             for value in content:
 54 |                 dic_info[i][key_info[cnt]].append(value)
 55 |                 cnt=cnt+1         
 56 |         # store key_info_lis
 57 |         for i,content in zip(n,lis_info):
 58 |             #cnt=0
 59 |             for j,value in zip(key_info_lis,content):
 60 |                 dic_info[i][j].append(value)
 61 |     else: #original
 62 |         n=[str(i+1) for i in range(len(data))]
 63 |         for i,content in zip(n,data):
 64 |             cnt=0
 65 |             for value in content:
 66 |                 dic_info[i][key[cnt]].append(value)
 67 |                 cnt=cnt+1
 68 |     return dic_info
 69 | 
 70 | 
 71 | def train_avg_info(dic_info,key_avg,n):
 72 |     lis=[str(i+1) for i in range(1)] #['1','2','3','4','5','6','7']
 73 |     dic_avg_info={key_dic_info:{term: [] for term in key_avg} for key_dic_info in lis  }
 74 |     for key_dic_info in lis:
 75 |         dic = dic_info[key_dic_info]
 76 |         for key in key_avg:
 77 |             dic_avg_info[key_dic_info][key]=[sum(dic[key][i*n:(i+1)*n])/n for i in range(int(len(dic[key])/n))]
 78 |     return dic_avg_info
 79 | 
 80 | def test_avg_info(dic_info,key_avg,n):
 81 |     lis=[str(i+1) for i in range(len(dic_info))] #['1','2','3','4','5','6','7']
 82 |     dic_avg_info={key_dic_info:{term: [] for term in key_avg} for key_dic_info in lis  }
 83 |     for key_dic_info, dic in dic_info.items(): #key_dic_info=['1','2','3','4','5']
 84 |         for key in key_avg:
 85 |             dic_avg_info[key_dic_info][key]=[sum(dic[key][i*n:(i+1)*n])/n for i in range(int(len(dic[key])/n))]
 86 |     return dic_avg_info
 87 | 
 88 | def train_plot_avg(dic_avg_info,key_avg,realization,name,save_dir):
 89 |     title=['('+name+')Average '+i+' with '+str(realization)+' Realizations' for i in key_avg]   
 90 |     ylabel=['Average '+i for i in key_avg]
 91 |     xlabel='Training Steps (x'+str(realization)+')' 
 92 |     n=1
 93 |     label=[str(i+1) for i in range(n)]
 94 | 
 95 |     for i,key in enumerate(key_avg): 
 96 |         for j in range(n):
 97 |             plt.plot(np.arange(len(dic_avg_info[label[j]][key])), dic_avg_info[label[j]][key],label=label[j])
 98 |         plt.legend(loc='upper right')
 99 |         plt.title(title[i])
100 |         plt.ylabel(ylabel[i])
101 |         plt.xlabel(xlabel)
102 |         plt.savefig(save_dir+title[i]+'.png') 
103 |         plt.show() 
104 | 
105 | def test_plot_avg(dic_avg_info,key_avg,realization,name,save_dir):
106 |     title=['('+name+')Average '+i+' with '+str(realization)+' Realizations' for i in key_avg]   
107 |     ylabel=['Average '+i for i in key_avg]
108 |     xlabel='Training Steps (x'+str(realization)+')' 
109 |     n=len(dic_avg_info)
110 |     label=[str(i+1) for i in range(n)]
111 | 
112 |     for i,key in enumerate(key_avg): 
113 |         for j in range(n):
114 |             plt.plot(np.arange(len(dic_avg_info[label[j]][key])), dic_avg_info[label[j]][key],label=label[j])
115 |         plt.legend(loc='upper right')
116 |         plt.title(title[i])
117 |         plt.ylabel(ylabel[i])
118 |         plt.xlabel(xlabel)
119 |         plt.savefig(save_dir+title[i]+'.png') 
120 |         plt.show()
121 | 
122 | 
123 | def plot(start,lisRL,n,title,ylabel,xlabel):
124 |     lisRL=lisRL[start::]
125 |     avg = [sum(lisRL[i*n:(i+1*n)])/n for i in range(int(len(lisRL)/n))]
126 |     plt.plot(np.arange(len(avg)), avg)
127 |     plt.title(title)
128 |     plt.ylabel(ylabel)
129 |     plt.xlabel(xlabel)
130 |     #plt.savefig(title+'.png') 
131 |     plt.show()
132 | 
133 | def plot_individual(env,dic_info,key_plot,n):    
134 |     #key_plot=['Backhaul Difference','SINR','QoS Difference']
135 |     #n=100
136 |     #n=10
137 |     nTerm = [env.nSBS, env.nUE, env.nUE]
138 |     title=[i+' with '+str(n)+' Realizations' for i in key_plot]   
139 |     ylabel=key_plot
140 |     xlabel='Training Steps (x'+str(n)+')'   
141 |     label1=['(DDPG1)SBS','(DDPG1)UE','(DDPG1)UE']
142 |     label2=['(DDPG2)SBS','(DDPG2)UE','(DDPG2)UE']
143 |     label3=['(DDPG3)SBS','(DDPG3)UE','(DDPG3)UE']
144 |     label4=['(DDPG4)SBS','(DDPG4)UE','(DDPG4)UE']
145 |     label5=['(DDPG5)SBS','(DDPG5)UE','(DDPG5)UE']
146 |     label=[label1,label2,label3,label4,label5]
147 |     color=['r','b','g','c','m']
148 |     linestyle=['-','--',':',':','-']
149 |     lis=['1']
150 |     save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
151 |     '''
152 |     lis=['1','2','3','4','5']
153 |     
154 |     for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
155 |         difference_list=[dic_info[n][key] for n in lis] #[Method1 Method2 .. ]
156 |         temp_list=[[]for i in range(5)] # number of Methods(5)
157 |         for z,difference in enumerate(difference_list):
158 |             for y in range(nTerm[i]):
159 |                 temp_list[z].append([difference[x][y] for x in range(len(difference))])
160 |         for length in range(len(temp_list[0])):
161 |             for k in range(5):
162 |                 #print(len(temp_list[k][length]))#400
163 |                 Bl=[sum(temp_list[k][length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[k][length])/n))]
164 |                 plt.plot(np.arange(len(Bl)), Bl,label=label[k][i]+str(length),color=color[length],linestyle=linestyle[k])
165 |     '''
166 |     for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
167 |         difference_list=[dic_info[n][key] for n in lis] #[Method1 Method2 .. ]
168 |         temp_list=[[]for i in range(1)] #----------------------JUST PLOT rl
169 |         for z,difference in enumerate(difference_list):
170 |             for y in range(nTerm[i]):
171 |                 temp_list[z].append([difference[x][y] for x in range(len(difference))])
172 |         for length in range(len(temp_list[0])):
173 |             for k in range(1):
174 |                 #print(len(temp_list[k][length]))#400
175 |                 Bl=[sum(temp_list[k][length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[k][length])/n))]
176 |                 save[key][length]=Bl
177 |                 plt.plot(np.arange(len(Bl)), Bl,label=label[k][i]+str(length),color=color[length],linestyle=linestyle[k])
178 | 
179 |         plt.legend(loc='upper right')
180 |         plt.title(title[i])
181 |         plt.ylabel(ylabel[i])
182 |         plt.xlabel(xlabel)
183 |         #plt.savefig(title[i]+'.png')
184 |         plt.show()
185 |     return save 
186 | 
187 | def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir):    
188 |     #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput']
189 |     nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE]
190 |     title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot]   
191 |     ylabel=key_plot
192 |     xlabel='Training Steps (x'+str(n)+')'   
193 |     label=['SBS','UE','UE','UE']
194 |     color=['r','b','g','c','m','y','k','b']
195 |     linestyle=['-','--',':',':','-']
196 | 
197 |     save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
198 |     save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
199 |     for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
200 |         difference = dic_info[key]
201 |         temp_list=[]
202 |         for y in range(nTerm[i]):
203 |                 temp_list.append([difference[x][y] for x in range(len(difference))])
204 |         for length in range(len(temp_list)):
205 |             Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))]
206 |             save[key][length]=Bl
207 |             Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))]
208 |             for ori in Bl_ori:
209 |                 save_ori[key][length]=save_ori[key][length]+ori
210 |             plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0])
211 | 
212 |         plt.legend(loc='upper right')
213 |         plt.title(title[i])
214 |         plt.ylabel(ylabel[i])
215 |         plt.xlabel(xlabel)
216 |         plt.savefig(save_dir+title[i]+'.png')
217 |         plt.show()
218 |     return save,save_ori
219 | 
220 |         
221 | def print_info(info,s):   
222 |     c, P = info
223 |     print('s ',s)
224 |     print('c = ',c,' P in dBm=',10*np.log10(P*1000))
225 |         
226 | def test_print_info(info,s):     
227 |     print('s ',s)    
228 |     for i,data in enumerate(info):
229 |         c, P = data
230 |         print('[',i+1,'] c = ',c,' P in dBm=',10*np.log10(P*1000))
231 | 
232 | 
233 | def writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,title,op):
234 |     n=len(dic_avg_info)
235 |     with open(title+'_history.csv','w',newline='') as csvfile:
236 |         writer = csv.writer(csvfile) 
237 |         # 1)normalized
238 |         for key in key_avg:
239 |             for i in [str(i+1) for i in range(n)] :             
240 |                 writer.writerow([i, '(normlaized)Average '+key ]+dic_avg_info[i][key])
241 |         # 2)original
242 |         for key in dic_info_ori_key:
243 |             for i in [str(i+1) for i in range(n)] :             
244 |                 writer.writerow([i, '(original)Average '+key ]+dic_avg_info_ori[i][key])
245 |         # 3)individual
246 |         if op==1: #test mode, all baselines 
247 |             for n_model in range(n):
248 |                 content=save[n_model]
249 |                 for key in key_individual:
250 |                     for i,v in enumerate(content[key]):             
251 |                         writer.writerow(['['+str(n_model+1)+']'+str(i),key]+v)
252 |         else:
253 |             for key in key_individual:
254 |                 for i,v in enumerate(save[key]):             
255 |                     writer.writerow([i,key]+v)
256 | 
257 | def writeCSV_nobackhaul(dic_avg_info,dic_avg_info_ori,dic_info_ori_key,key_avg,title):
258 |     n=len(dic_avg_info)
259 |     with open(title+'_history.csv','w',newline='') as csvfile:
260 |         writer = csv.writer(csvfile) 
261 |         # 1)normalized
262 |         for key in key_avg:
263 |             for i in [str(i+1) for i in range(n)] :             
264 |                 writer.writerow([i, '(normlaized)Average '+key ]+dic_avg_info[i][key])
265 |         # 2)original
266 |         for key in dic_info_ori_key:
267 |             for i in [str(i+1) for i in range(n)] :             
268 |                 writer.writerow([i, '(original)Average '+key ]+dic_avg_info_ori[i][key])
269 |                 
270 | def writeEE(debug_dic_info_EE,debug_dic_info_EE_key,title):
271 |     n=len(debug_dic_info_EE)
272 |     with open(title+'_EE.csv','w',newline='') as csvfile:
273 |         writer = csv.writer(csvfile) 
274 |         # 1)normalized
275 |         for key in debug_dic_info_EE_key:
276 |             for iMethod in [str(i+1) for i in range(n)] :             
277 |                 writer.writerow([iMethod, key ]+debug_dic_info_EE[iMethod][key])
278 |                 
279 | def writeI(debug_dic_info_I,debug_dic_info_I_key,nUE,title):
280 |     n=len(debug_dic_info_I)
281 |     with open(title+'_I.csv','w',newline='') as csvfile:
282 |         writer = csv.writer(csvfile) 
283 |         # 1)normalized
284 |         for key in debug_dic_info_I_key:
285 |             for iMETHOD in [str(i+1) for i in range(n)] :    
286 |                 for iUE in ['UE'+str(k) for k in range(nUE) ]:
287 |                     writer.writerow([ key,iMETHOD,iUE ]+debug_dic_info_I[iMETHOD][key][iUE])
288 | 
289 | def writeAction(debug_dic_info_action,debug_dic_info_action_key,title):
290 |         n_baseline=len(debug_dic_info_action)
291 |         with open(title+'_Action.csv','w',newline='') as csvfile:
292 |             writer = csv.writer(csvfile) 
293 |             for iMethod in [str(i+1) for i in range(n_baseline)] :    
294 |                 for episdoe,content in enumerate (debug_dic_info_action[iMethod]['Association']):
295 |                     for tstep,this in enumerate(content):
296 |                         writer.writerow([iMethod, 'Association', episdoe, tstep ]+this)
297 |                         writer.writerow([iMethod, 'Power Allocation', episdoe, tstep ]+list(debug_dic_info_action[iMethod]['Power Allocation'][episdoe][tstep]))
298 |     
299 | 
300 | def findinf_list(lis):
301 |     dic={'inf_index_list':[],'non_inf_list':[]}
302 |     for i,value in enumerate(lis):
303 |         if math.isinf(float(str(value))):
304 |             dic['inf_index_list'].append(i)
305 |         else:
306 |             dic['non_inf_list'].append(value)
307 |     return dic
308 | 
309 | def writeCSI(name,train_channel_episode):
310 |     with open(name+'.csv','w',newline='') as csvfile:
311 |         writer = csv.writer(csvfile)   
312 |         for G in train_channel_episode:
313 |             for i in list(G):
314 |                 writer.writerow(i)
315 |                 
316 | def readCSI(name,nSBS,nUE,episode):
317 |     with open(name+'.csv', newline='') as csvfile:
318 |         rows = csv.reader(csvfile)
319 |         rows = list(rows)
320 |         lis_G=[]
321 |         start=0
322 |         for i in range(episode):
323 |             G=np.array([float(i) for lis in rows[start:start+nSBS] for i in lis]).reshape(nSBS,-1)
324 |             G=G[:,0:nUE+1]
325 |             start=start+nSBS
326 |             lis_G.append(G)
327 |     return lis_G
328 | 
329 | 
330 | 
331 | 
332 | '''
333 | def writeBackhaulHistory(name,MAXepisode,debug_episode_back):
334 |     back=np.zeros((MAXepisode,))
335 |     back[debug_episode_back]=1
336 |     with open(name+'.csv','w',newline='') as csvfile:
337 |         writer = csv.writer(csvfile)  
338 |         writer.writerow(back)
339 | 
340 |                 
341 |             
342 | def readBackhaulHistory(name):
343 |     with open('test_HistoryforBackhaulViolation.csv', newline='') as csvfile:
344 |         rows = csv.reader(csvfile)
345 |         rows = list(rows)[0]
346 |         rows = [float(i) for i in rows]
347 |         return rows
348 | 
349 | def plot_violateBackhaul(MAXepisode,debug_episode_back,name,save_dir):
350 |     title='('+name+')History of Backhaul Constraint Violation'
351 |     xlabel='Steps'
352 |     back=np.zeros((MAXepisode,))
353 |     back[debug_episode_back]=1
354 |     plt.plot(np.arange(MAXepisode),back)
355 |     plt.title(title)
356 |     #plt.ylabel(ylabel[i])
357 |     plt.xlabel(xlabel)
358 |     plt.savefig(save_dir+title+'.png') 
359 |     plt.show() 
360 | '''    
361 | def writeConstraintHistory(name,episode,debug_episode_back,mode):
362 |     if mode == 0: # backhaul
363 |         back=np.zeros((episode,))
364 |         back[debug_episode_back]=1
365 |         with open(name+'_HistoryforBackhaulViolation.csv','w',newline='') as csvfile:
366 |             writer = csv.writer(csvfile)  
367 |             writer.writerow(back)
368 |     else:  #QoS
369 |         with open(name+'_HistoryforQoSsatisfication.csv','w',newline='') as csvfile:
370 |             writer = csv.writer(csvfile)  
371 |             writer.writerow(debug_episode_back)
372 |             
373 | def writeConstraintHistory_v2(name,episode,debug_episode_back,mode):
374 |     if mode == 0: # backhaul
375 |         back=np.zeros((episode,))
376 |         back[debug_episode_back]=1
377 |         with open(name+'_HistoryforBackhaulViolation.csv','w',newline='') as csvfile:
378 |             writer = csv.writer(csvfile)  
379 |             writer.writerow(back)
380 |     else:  #QoS
381 |         with open(name+'_HistoryforQoSsatisfication.csv','w',newline='') as csvfile:
382 |             writer = csv.writer(csvfile)  
383 |             for i in debug_episode_back:
384 |                 writer.writerow(debug_episode_back[i])            
385 |         
386 | def readConstraintHistory(name,mode):
387 |     if mode == 0: # backhaul
388 |         filename=name+'_HistoryforBackhaulViolation'
389 |     else:#QoS
390 |         filename=name+'_HistoryforQoSsatisfication'
391 |     with open(filename+'.csv', newline='') as csvfile:
392 |         rows = csv.reader(csvfile)
393 |         rows = list(rows)[0]
394 |         rows = [float(i) for i in rows]
395 |         return rows
396 |         
397 | def plot_constraint(episode,debug_episode_back,name,save_dir,mode):
398 |     if mode ==0: # backhaul
399 |         title='('+name+')History of Backhaul Constraint Violation'
400 |         back=np.zeros((episode,))
401 |         back[debug_episode_back]=1
402 |         xlabel='Steps'
403 |         plt.plot(np.arange(episode),back)
404 |         plt.title(title)
405 |     else:#QoS
406 |         title='('+name+')History of QoS Satisfication'
407 |         xlabel='Steps'
408 |         plt.plot(np.arange(len(debug_episode_back)),debug_episode_back)
409 |         plt.title(title)
410 |     plt.xlabel(xlabel)
411 |     plt.savefig(save_dir+title+'.png') 
412 |     plt.show()     
413 |     
414 |     
415 | def p_normalize(clip,P_NN):    
416 |     # for sigmoid: have added noise -------------------------------------------
417 |     P = np.array([  np.clip(power*clip, 0, clip)  for power in P_NN ])
418 |     return P
419 |     
420 | 


--------------------------------------------------------------------------------
/train_DQN.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | """
  3 | Created on Sat Jun  1 16:54:41 2019
  4 | 
  5 | @author: kuo
  6 | """
  7 | 
  8 | import time
  9 | import numpy as np
 10 | import os
 11 | import scipy.stats as st
 12 | import copy 
 13 | os.chdir('/home/chan/PDQN/') 
 14 | from pdqn import PDQNAgent
 15 | from DQN import DQNAgent
 16 | from env import env_PowerAllocation
 17 | import tool as t
 18 | 
 19 | #import tool
 20 | os.environ['CUDA_VISIBLE_DEVICES']='0'
 21 | 
 22 | 
 23 | 
 24 | 
 25 | #%%
 26 | if __name__ == '__main__':
 27 |  # PDQN=====================================================================
 28 |     batch_size=128
 29 |     initial_memory_threshold=128 #1000 # Number of transitions required to start learning.
 30 |     replay_memory_size=20000     # Replay memory transition capacity 
 31 |     epsilon_initial=1
 32 |     epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon
 33 |     epsilon_final=0.01 # Final epsilon value
 34 |     gamma=0.95
 35 |     clip_grad=1 # Parameter gradient clipping limit 
 36 |     inverting_gradients=True # Use inverting gradients scheme instead of squashing function
 37 |     seed=0 #0 #Random seed
 38 |     # 1) ParamActor------------------------------------------------------------
 39 |     layers_actor_param =[256]#[64,256]#(256,) # 5-- --1050
 40 |     actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"}
 41 |     learning_rate_actor_param=0.00001
 42 |     tau_actor_param=0.001
 43 |     """loss func for actor_parameter """
 44 |     average=False # Average weighted loss function  
 45 |     weighted=False # Naive weighted loss function
 46 |     random_weighted=False # Randomly weighted loss function
 47 |     indexed=False # Indexed loss function
 48 |     zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action
 49 |     # 2) Actor-----------------------------------------------------------------
 50 |     tau_actor=0.1  
 51 |     learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output
 52 |     action_input_layer=0# Which layer to input action parameters-- useless?
 53 |     layers_actor=[512,128,16]#(256,)# # 1055-- --5  # # Hidden layers
 54 |     actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"}
 55 |     #--------------------------------------------------------------------------
 56 |     # Performance
 57 |     dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput']
 58 |     dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6','7','8']  }
 59 |     dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference']
 60 |     dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] }
 61 |     
 62 |     a_info={'c':[],'P':[]}
 63 |     dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]}
 64 |     dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]}
 65 |     num_back=0
 66 |     QoS_R=[]
 67 |     #--------------------------------------------------------------------------
 68 |     # debug
 69 |     debug_PNN=[]  
 70 |     debug_backhaul=[]
 71 |     debug_BSbackhaul=[]
 72 |     debug_episode_back=[]
 73 |     #train_channel_episode=[]
 74 |     ############################################################################ change this    
 75 |     scale_actions = True
 76 |     initialise_params = False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam
 77 |     use_ornstein_noise=True#True # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 
 78 |     save_freq = 100#0 # How often to save models (0 = never)
 79 |     title="PDQN_cc_s11_r11_0dB_N3_1"#"PDQN2"#"PDQN_backhaul" # Prefix of output files
 80 |     save_dir ="results_DQN_5v3" #Output directory
 81 |     load = False 
 82 |     load_dir ="results/"+title+"0"
 83 |     load_num="999"
 84 |     threshold = 0.005#1e-3
 85 |     start_episode=0
 86 |     MAXepisode = 80000#600#20000
 87 |     MAXstep = 100#150
 88 |     # evaluation_episodes=1000 # Episodes over which to evaluate after training
 89 |     realization=500#100
 90 |     lambda1=0.43#0.53#1
 91 |     lambda2=0.16#0.05#0.42#0.8
 92 |     lambda3=0#0.1#0.3#0
 93 |     mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv'
 94 |     scenario_name = 'EnvInfo_3'
 95 |     mean_flage=False
 96 |     ###########################################################################
 97 |     #%% ENV
 98 |     env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=1)
 99 |     #-------------------------------------------------------------------------- Choose Network Geometry
100 |     env.load(name=scenario_name) # use the previous one 
101 |     #-------------------------------------------------------------------------- mean_std
102 |     env.mean_std(10**6,mean_flage,mean_name)#calculate(True) or load(False)
103 |     num_actions = env.action_space[0]
104 |     s_dim = env.nUE
105 |     #%% PDQN
106 |     # save model --------------------------------------------------------------
107 |     if save_freq > 0 and save_dir:
108 |         save_dir = os.path.join(save_dir, title + "{}".format(str(seed)))
109 |         os.makedirs(save_dir, exist_ok=True)
110 |         
111 |     agent_class = PDQNAgent
112 |     agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
113 |                         batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param,  # 0.001
114 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
115 |                         clip_grad=clip_grad,indexed=indexed,average=average,
116 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
117 |                         tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold,
118 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
119 |                         actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs,
120 |                         zero_index_gradients=zero_index_gradients,seed=seed)
121 |     
122 |     power_level=5
123 |     agent_classDQN = DQNAgent
124 |     agentDQN = agent_classDQN(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
125 |                         power_level=power_level,batch_size=batch_size,learning_rate_actor=learning_rate_actor,  # 0.001
126 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
127 |                         clip_grad=clip_grad,indexed=indexed,average=average,
128 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
129 |                         initial_memory_threshold=initial_memory_threshold,
130 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
131 |                         actor_kwargs=actor_kwargs,
132 |                         zero_index_gradients=zero_index_gradients,seed=seed)
133 |     #agentDQN.action_decoder(5, env.P_Max_SBS)
134 | 
135 |     # 0) add bias to ActorParm by initialize bias of paaathrough --------------
136 |     # desired bias 
137 |     initial_params_ = list(np.random.uniform(0,env.P_Max_SBS,num_actions*5)) 
138 |     # change the original parameter range to [-1,1]
139 |     if scale_actions:
140 |         for a in range (num_actions*5):
141 |             initial_params_[a] = 2. * (initial_params_[a] - 0) / (env.P_Max_SBS - 0) - 1.
142 |     # initilize bias
143 |     if initialise_params:
144 |         initial_weights = np.zeros((num_actions*5,s_dim))#np.zeros((env.action_space.spaces[0].n, env.observation_space.spaces[0].shape[0]))
145 |         initial_bias = np.zeros(num_actions*5)#np.zeros(env.action_space.spaces[0].n)
146 |         for a in range (num_actions*5):#(env.action_space.spaces[0].n):
147 |             initial_bias[a] = initial_params_[a]
148 |         agent.set_action_parameter_passthrough_weights(initial_weights, initial_bias)
149 |     
150 | 
151 |     start_time = time.time()
152 |     total_step=start_episode*MAXstep
153 |     cont = True  
154 |     episode=0
155 |     episode_r_list=[]
156 |     #=========================================================================== load existing model to train
157 |     #load_dir='results_53/PDQN_cc_s11_r9_0dB_N3_20'
158 |     #load_num='1_done'
159 |     #agent.load_models(prefix = os.path.join(load_dir, load_num))
160 |     #===========================================================================
161 |     while cont: # episode
162 |         episode=episode+1
163 |         print(episode, 'episode--------------------------')
164 |         # save model
165 |         if save_freq > 0 and save_dir and episode % save_freq == 0:
166 |             agentDQN.save_models(os.path.join(save_dir, str(episode)))
167 |         # reset  
168 |         s = env.reset()
169 |         s = np.array(list(s), dtype=np.float32, copy=False)
170 |         # 1) take an action----------------------------------------------------
171 |         a1 = agentDQN.act(s)   
172 |         c1,P1=agentDQN.action_decoder(a1, env.P_Max_SBS)
173 | 
174 |         #train_channel_episode.append(env.G) 
175 |         episode_r=[]
176 |         tstep=0
177 |         if total_step>100000:
178 |               break
179 |         while True:  # step 
180 |             tstep = tstep + 1
181 |             total_step = total_step + 1
182 |             print('Iteration '+str(total_step)+'=======================================')
183 |             # 2) step -- next state, reward, done------------------------------   
184 |             #==================================================================
185 |             info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step_train(c1,P1,False,True,True,episode-1,tstep-1)
186 |             s_ = np.array(list(s_), dtype=np.float32, copy=False)
187 |             R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1 =info1  
188 |             Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD_ori1=info_ori1
189 |             Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1
190 |             Throughput_SBS_threshold,Throughput_BS = debug_info1
191 |             debug_backhaul.append(Throughput_SBS_threshold)
192 |             debug_BSbackhaul.append(Throughput_BS)
193 |             QoS_R.append(QoS_R1)
194 |             #==================================================================
195 |             # 3) take an action------------------------------------------------
196 |             a1_ = agentDQN.act(s_)
197 |             c1_,P1_=agentDQN.action_decoder(a1_, env.P_Max_SBS)
198 |             # 4) learn---------------------------------------------------------
199 |             agentDQN.step(s, (a1), R1, s_,  (a1_), done1 )
200 |             dic_store['a'].append([c1]+P1)
201 |             dic_store['r'].append(R1)
202 |             dic_store['ddpg_s'].append(s)
203 | 
204 |             # 5) Print and store info ------------------------------------------
205 |             key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']           
206 |             key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput']
207 |             
208 |             dic_info = t.inst_info(dic_info,(key_info,key_info_lis),((info1,info1),lis_info1),1)
209 |             dic_info_ori = t.inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori1),0)
210 |           
211 |             key_inst=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']
212 |             t.print_info((env.UE2TP[c1],P1),s) # print p in dB 
213 |             a_info['c'].append(env.UE2TP[c1])
214 |             a_info['P'].append(10*np.log10(P1*1000))
215 |             
216 |             episode_r.append(R1)
217 |             
218 |             # 6) update --------------------------------------------------------
219 |             c1, P1 = c1_, P1_
220 |             s = s_
221 |             # number of backhaul constraint violation
222 |             if done1:
223 |                 num_back=num_back+1
224 |                 debug_episode_back.append(episode)
225 |             # check if end the episode
226 |             if (tstep>=MAXstep) or done1:
227 |                 break
228 |             
229 |         agentDQN.end_episode()
230 |         episode_r_list.append(np.mean([episode_r])) 
231 |         # check if end the training   
232 |         if (episode>=MAXepisode) :
233 |             print('MAXepisode')
234 |             cont=False
235 |         if (episode>100):
236 |             m = np.mean([episode_r_list[episode-100:episode-1]])
237 |             not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold]
238 |             if sum(not_convergence)==0:
239 |                 print('Convergence')
240 |                 cont=False
241 |     #%% end training        
242 |     end_time = time.time()
243 |     if episode>=MAXepisode:
244 |         print('MAXepisode')
245 |     else:
246 |         print('episode=',episode)
247 |        
248 |     print("Training took %.2f seconds" % (end_time - start_time))
249 |     print('(violate)num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%')
250 |     num_QoS=sum([1 for k in QoS_R if k==1 ])
251 |     print('(follow) Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%')
252 |     
253 |     # debug for constraints about backhaul 
254 |     #debug_episode_back = [i-1 for i in debug_episode_back]
255 |     #t.plot_constraint(MAXepisode,debug_episode_back,'train',save_dir+'/',0)
256 |     #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,debug_episode_back,0)
257 |     #t.plot_constraint(MAXepisode,QoS_R,'train',save_dir+'/',1)
258 |     #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,QoS_R,1)
259 |     
260 |     #%%
261 |     # save model
262 |     if save_freq > 0 and save_dir:
263 |         agentDQN.save_models(os.path.join(save_dir, '_done'))   
264 |    
265 |     #%%
266 |     # 7) Average per realization steps and Save --------------------------------
267 |     key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']
268 |     dic_avg_info = t.train_avg_info(dic_info,key_avg,realization)
269 |     t.train_plot_avg(dic_avg_info,key_avg,realization,'normalize',save_dir+'/train_')
270 |     #-------------------------------------------------------------------------  
271 |     dic_avg_info_ori = t.train_avg_info(dic_info_ori,dic_info_ori_key,realization)
272 |     t.train_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',save_dir+'/train_')
273 |     
274 |     
275 | 
276 |     #%%  
277 |     import matplotlib.pyplot as plt
278 |     def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir):    
279 |         #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput']
280 |         nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE]
281 |         title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot]   
282 |         ylabel=key_plot
283 |         xlabel='Training Steps (x'+str(n)+')'   
284 |         label=['SBS','UE','UE','UE']
285 |         color=['r','b','g','c','m','y','k','b']
286 |         linestyle=['-','--',':',':','-']
287 |     
288 |         save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
289 |         save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
290 |         for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
291 |             difference = dic_info[key]
292 |             temp_list=[]
293 |             for y in range(nTerm[i]):
294 |                     temp_list.append([difference[x][y] for x in range(len(difference))])
295 |             for length in range(len(temp_list)):
296 |                 Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))]
297 |                 save[key][length]=Bl
298 |                 Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))]
299 |                 for ori in Bl_ori:
300 |                     save_ori[key][length]=save_ori[key][length]+ori
301 |                 plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0])
302 |     
303 |             plt.legend(loc='upper right')
304 |             plt.title(title[i])
305 |             plt.ylabel(ylabel[i])
306 |             plt.xlabel(xlabel)
307 |             plt.savefig(save_dir+title[i]+'.png')
308 |             plt.show()
309 |         return save,save_ori
310 |     
311 |     # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE--------------
312 |     key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput']
313 |     save,save_ori= test_plot_individual(env,dic_info['1'],'1',key_individual,realization,save_dir+'/train_')
314 |     # 9) write info ------------------------------------------------------------
315 |     # average info
316 |     t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,save_dir+'/train',0)
317 |     # original
318 |     #t.writeCSV(dic_info,dic_info_ori,save_ori,dic_info_ori_key,key_individual,key_avg,save_dir+'/train_original',0)
319 |       
320 |     #%% debug
321 |     #t.writeCSI(save_dir+'/CSI',train_channel_episode)
322 |     debug_I = env.debug_I
323 |     debug_UE_throughput = env.debug_UE_throughput # each UE throughput
324 |     debug_SBS_throughput = env.debug_SBS_throughput
325 |     debug_SBS_threshold = env.debug_SBS_threshold
326 |     debug_c = env.debug_c
327 |     debug_p = env.debug_p
328 |     debug_backhaul = env.debug_backhaul
329 |     debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index
330 |     #%% test actual converage range
331 |     threshold=7.5
332 |     m = np.mean([episode_r_list[episode-100:episode-1]])
333 |     not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold]
334 |     if sum(not_convergence)==0:
335 |         print('Convergence')
336 |     else:
337 |         print('not')


--------------------------------------------------------------------------------
/train_PDQN.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | """
  3 | Created on Sat Jun  1 16:54:41 2019
  4 | 
  5 | @author: kuo
  6 | """
  7 | 
  8 | import time
  9 | import numpy as np
 10 | import os
 11 | import scipy.stats as st
 12 | import copy 
 13 | os.chdir('/home/chan/PDQN/') 
 14 | from pdqn import PDQNAgent
 15 | from env import env_PowerAllocation
 16 | import tool as t
 17 | 
 18 | #import tool
 19 | os.environ['CUDA_VISIBLE_DEVICES']='0'
 20 | 
 21 | 
 22 | 
 23 | 
 24 | #%%
 25 | if __name__ == '__main__':
 26 |  # PDQN=====================================================================
 27 |     batch_size=128
 28 |     initial_memory_threshold=128 #1000 # Number of transitions required to start learning.
 29 |     replay_memory_size=20000     # Replay memory transition capacity 
 30 |     epsilon_initial=1
 31 |     epsilon_steps=1000 # Number of episodes over which to linearly anneal epsilon
 32 |     epsilon_final=0.01 # Final epsilon value
 33 |     gamma=0.95
 34 |     clip_grad=1 # Parameter gradient clipping limit 
 35 |     inverting_gradients=True # Use inverting gradients scheme instead of squashing function
 36 |     seed=0 #0 #Random seed
 37 |     # 1) ParamActor------------------------------------------------------------
 38 |     layers_actor_param =[256]#[64,256]#(256,) # 5-- --1050
 39 |     actor_param_kwargs={'hidden_layers': layers_actor_param, 'output_layer_init_std': 1e-5,'squashing_function': False,'activation': "relu"}
 40 |     learning_rate_actor_param=0.00001
 41 |     tau_actor_param=0.001
 42 |     """loss func for actor_parameter """
 43 |     average=False # Average weighted loss function  
 44 |     weighted=False # Naive weighted loss function
 45 |     random_weighted=False # Randomly weighted loss function
 46 |     indexed=False # Indexed loss function
 47 |     zero_index_gradients=False # Whether to zero all gradients for action-parameters not corresponding to the chosen action
 48 |     # 2) Actor-----------------------------------------------------------------
 49 |     tau_actor=0.1  
 50 |     learning_rate_actor=0.00001#0.0001#0.001 # reduce lr can avoid nan output
 51 |     action_input_layer=0# Which layer to input action parameters-- useless?
 52 |     layers_actor=[512,128,16]#(256,)# # 1055-- --5  # # Hidden layers
 53 |     actor_kwargs={'hidden_layers': layers_actor, 'output_layer_init_std': 1e-5,'action_input_layer': action_input_layer,'activation': "relu"}
 54 |     #--------------------------------------------------------------------------
 55 |     # Performance
 56 |     dic_info_key = ['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference','Backhaul Difference','SINRdb','QoS Difference','Throughput']
 57 |     dic_info={key_dic_info:{term: [] for term in dic_info_key} for key_dic_info in ['1','2','3','4','5','6','7','8']  }
 58 |     dic_info_ori_key = ['Energy Efficiency','Backhaul Cost','QoS Good', 'QoS Gurantee', 'QoS Bad','System Throughput','QoS Squared Difference']
 59 |     dic_info_ori={key_dic_info:{term: [] for term in dic_info_ori_key} for key_dic_info in ['1','2','3','4','5','6','7','8'] }
 60 |     
 61 |     a_info={'c':[],'P':[]}
 62 |     dic_store={'a':[],'ddpg_s':[],'r':[],'dqn_s':[],'dqn_Q':[]}
 63 |     dic_NN_output={'actor':[],'critic':[],'dqn_q_eval':[],'dqn_q_target':[]}
 64 |     num_back=0
 65 |     QoS_R=[]
 66 |     #--------------------------------------------------------------------------
 67 |     # debug
 68 |     debug_PNN=[]  
 69 |     debug_backhaul=[]
 70 |     debug_BSbackhaul=[]
 71 |     debug_episode_back=[]
 72 |     train_channel_episode=[]
 73 |     ############################################################################ change this    
 74 |     scale_actions = True
 75 |     initialise_params = False # True:add pass-through layer to ActorParam and initilize them / False: not add pass-through layer to ActorParam
 76 |     use_ornstein_noise=True#True # False: Uniformly sample parameters & add noise to taken parameters / True: greedy parameters 
 77 |     save_freq = 100#0 # How often to save models (0 = never)
 78 |     title="PDQN_cc_s11_r11_0dB_N3_1"#"PDQN2"#"PDQN_backhaul" # Prefix of output files
 79 |     save_dir ="results_PDQN_5v3" #Output directory
 80 |     load = False 
 81 |     load_dir ="results/"+title+"0"
 82 |     load_num="999"
 83 |     threshold = 0.005#1e-3
 84 |     start_episode=0
 85 |     MAXepisode = 100000#600#20000
 86 |     MAXstep = 100#150
 87 |     # evaluation_episodes=1000 # Episodes over which to evaluate after training
 88 |     realization=500#100
 89 |     lambda1=0.43#0.53#1
 90 |     lambda2=0.16#0.05#0.42#0.8
 91 |     lambda3=0#0.1#0.3#0
 92 |     mean_name='mean_std_cc_ct_0dB_s11_nv51_nobackhaul_new_N3_SBS3_UE5_3v3.csv'#'mean_std_cc_ct_0dB_s3_nv21_oldChannel_nobackhaul.csv'
 93 |     scenario_name = 'EnvInfo_3'
 94 |     mean_flage=False
 95 |     ###########################################################################
 96 |     #%% ENV
 97 |     env = env_PowerAllocation(lambda1=lambda1,lambda2=lambda2,lambda3=lambda3,MAXepisode=MAXepisode,n_baseline=1)
 98 |     #-------------------------------------------------------------------------- Choose Network Geometry
 99 |     env.load(name=scenario_name) # use the previous one 
100 |     #-------------------------------------------------------------------------- mean_std
101 |     env.mean_std(10**6,mean_flage,mean_name)#calculate(True) or load(False)
102 |     num_actions = env.action_space[0]
103 |     s_dim = env.nUE
104 |     #%% PDQN
105 |     # save model --------------------------------------------------------------
106 |     if save_freq > 0 and save_dir:
107 |         save_dir = os.path.join(save_dir, title + "{}".format(str(seed)))
108 |         os.makedirs(save_dir, exist_ok=True)
109 |         
110 |     agent_class = PDQNAgent
111 |     agent = agent_class(s_dim=s_dim, action_space=env.action_space,nUE=env.nUE,#observation_space=env.observation_space.spaces[0], action_space=env.action_space,
112 |                         batch_size=batch_size,learning_rate_actor=learning_rate_actor,learning_rate_actor_param=learning_rate_actor_param,  # 0.001
113 |                         epsilon_steps=epsilon_steps,epsilon_final=epsilon_final,gamma=gamma,
114 |                         clip_grad=clip_grad,indexed=indexed,average=average,
115 |                         random_weighted=random_weighted,tau_actor=tau_actor,weighted=weighted,
116 |                         tau_actor_param=tau_actor_param,initial_memory_threshold=initial_memory_threshold,
117 |                         use_ornstein_noise=use_ornstein_noise,replay_memory_size=replay_memory_size,inverting_gradients=inverting_gradients,
118 |                         actor_kwargs=actor_kwargs,actor_param_kwargs=actor_param_kwargs,
119 |                         zero_index_gradients=zero_index_gradients,seed=seed)
120 |     
121 | 
122 |     # 0) add bias to ActorParm by initialize bias of paaathrough --------------
123 |     # desired bias 
124 |     initial_params_ = list(np.random.uniform(0,env.P_Max_SBS,num_actions*5)) 
125 |     # change the original parameter range to [-1,1]
126 |     if scale_actions:
127 |         for a in range (num_actions*5):
128 |             initial_params_[a] = 2. * (initial_params_[a] - 0) / (env.P_Max_SBS - 0) - 1.
129 |     # initilize bias
130 |     if initialise_params:
131 |         initial_weights = np.zeros((num_actions*5,s_dim))#np.zeros((env.action_space.spaces[0].n, env.observation_space.spaces[0].shape[0]))
132 |         initial_bias = np.zeros(num_actions*5)#np.zeros(env.action_space.spaces[0].n)
133 |         for a in range (num_actions*5):#(env.action_space.spaces[0].n):
134 |             initial_bias[a] = initial_params_[a]
135 |         agent.set_action_parameter_passthrough_weights(initial_weights, initial_bias)
136 |     
137 | 
138 |     start_time = time.time()
139 |     total_step=start_episode*MAXstep
140 |     cont = True  
141 |     episode=0
142 |     episode_r_list=[]
143 |     #=========================================================================== load existing model to train
144 |     #load_dir='results_53/PDQN_cc_s11_r9_0dB_N3_20'
145 |     #load_num='1_done'
146 |     #agent.load_models(prefix = os.path.join(load_dir, load_num))
147 |     #===========================================================================
148 |     while cont: # episode
149 |         
150 |         episode=episode+1
151 |         print(episode, 'episode--------------------------')
152 |         # save model
153 |         if save_freq > 0 and save_dir and episode % save_freq == 0:
154 |             agent.save_models(os.path.join(save_dir, str(episode)))
155 |         # reset  
156 |         s = env.reset()
157 |         s = np.array(list(s), dtype=np.float32, copy=False)
158 |         # 1) take an action----------------------------------------------------
159 |         c1, PNN1, all_action_parameters = agent.act(s)   
160 |         P1 = t.p_normalize(env.P_Max_SBS,PNN1)
161 | 
162 |         train_channel_episode.append(env.G) 
163 |         episode_r=[]
164 |         tstep=0
165 |         if total_step>50000:
166 |               break
167 |         while True:  # step 
168 |             tstep = tstep + 1
169 |             total_step = total_step + 1
170 |             print('Iteration '+str(total_step)+'=======================================')
171 |             # 2) step -- next state, reward, done------------------------------   
172 |             #==================================================================
173 |             info1, lis_info1, s_, info_ori1, done1,debug_info1,QoS_R1 = env.step_train(c1,P1,False,True,True,episode-1,tstep-1)
174 |             s_ = np.array(list(s_), dtype=np.float32, copy=False)
175 |             R1,Energy_Efficiency1,Backhaul_cost1,QoS_good1,QoS_gurantee1,QoS_bad1,sum_c_Throughput1,QoS_squaredD1 =info1  
176 |             Energy_Efficiency_ori1,Backhaul_cost_ori1,QoS_good_ori1,QoS_gurantee_ori1,QoS_bad_ori1,sum_c_Throughput_ori1,QoS_squaredD_ori1=info_ori1
177 |             Backhaul_difference1,SINRdb1,QoS_difference1,c_Throughput_ori1 = lis_info1
178 |             Throughput_SBS_threshold,Throughput_BS = debug_info1
179 |             debug_backhaul.append(Throughput_SBS_threshold)
180 |             debug_BSbackhaul.append(Throughput_BS)
181 |             QoS_R.append(QoS_R1)
182 |             #==================================================================
183 |             # 3) take an action------------------------------------------------
184 |             c1_, PNN1_, all_action_parameters_ = agent.act(s_)
185 |             P1_ = t.p_normalize(env.P_Max_SBS,PNN1_)
186 |             # 4) learn---------------------------------------------------------
187 |             agent.step(s, (c1, all_action_parameters), R1, s_,  (c1_, all_action_parameters_), done1 )
188 |             dic_store['a'].append([c1]+P1)
189 |             dic_store['r'].append(R1)
190 |             dic_store['ddpg_s'].append(s)
191 |             debug_PNN.append(PNN1)
192 |             # 5) Print and store info ------------------------------------------
193 |             key_info=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']           
194 |             key_info_lis=['Backhaul Difference','SINRdb','QoS Difference','Throughput']
195 |             
196 |             dic_info = t.inst_info(dic_info,(key_info,key_info_lis),((info1,info1),lis_info1),1)
197 |             dic_info_ori = t.inst_info(dic_info_ori,dic_info_ori_key,(info_ori1,info_ori1),0)
198 |           
199 |             key_inst=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']
200 |             t.print_info((env.UE2TP[c1],P1),s) # print p in dB 
201 |             a_info['c'].append(env.UE2TP[c1])
202 |             a_info['P'].append(10*np.log10(P1*1000))
203 |             
204 |             episode_r.append(R1)
205 |             
206 |             # 6) update --------------------------------------------------------
207 |             c1, P1, all_action_parameters = c1_, P1_, all_action_parameters_
208 |             s = s_
209 |             # number of backhaul constraint violation
210 |             if done1:
211 |                 num_back=num_back+1
212 |                 debug_episode_back.append(episode)
213 |             # check if end the episode
214 |             if (tstep>=MAXstep) or done1:
215 |                 break
216 |             
217 |         agent.end_episode()
218 |         episode_r_list.append(np.mean([episode_r])) 
219 |         # check if end the training   
220 |         if (episode>=MAXepisode) :
221 |             print('MAXepisode')
222 |             cont=False
223 |         if (episode>100):
224 |             m = np.mean([episode_r_list[episode-100:episode-1]])
225 |             not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold]
226 |             if sum(not_convergence)==0:
227 |                 print('Convergence')
228 |                 cont=False
229 |     #%% end training        
230 |     end_time = time.time()
231 |     if episode>=MAXepisode:
232 |         print('MAXepisode')
233 |     else:
234 |         print('episode=',episode)
235 |        
236 |     print("Training took %.2f seconds" % (end_time - start_time))
237 |     print('(violate)num_back=',num_back,'/',total_step,' ',num_back/total_step*100,'%')
238 |     num_QoS=sum([1 for k in QoS_R if k==1 ])
239 |     print('(follow) Qos',num_QoS,'/',total_step,' ',num_QoS/total_step*100,'%')
240 |     
241 |     # debug for constraints about backhaul 
242 |     #debug_episode_back = [i-1 for i in debug_episode_back]
243 |     #t.plot_constraint(MAXepisode,debug_episode_back,'train',save_dir+'/',0)
244 |     #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,debug_episode_back,0)
245 |     #t.plot_constraint(MAXepisode,QoS_R,'train',save_dir+'/',1)
246 |     #t.writeConstraintHistory(save_dir+'/train_',MAXepisode,QoS_R,1)
247 |     
248 |     #%%
249 |     # save model
250 |     if save_freq > 0 and save_dir:
251 |         agent.save_models(os.path.join(save_dir, '_done'))   
252 |    
253 |     #%%
254 |     # 7) Average per realization steps and Save --------------------------------
255 |     key_avg=['R','Energy Efficiency','Backhaul Cost','QoS Good','QoS Gurantee','QoS Bad','System Throughput','QoS Squared Difference']
256 |     dic_avg_info = t.train_avg_info(dic_info,key_avg,realization)
257 |     t.train_plot_avg(dic_avg_info,key_avg,realization,'normalize',save_dir+'/train_')
258 |     #-------------------------------------------------------------------------  
259 |     dic_avg_info_ori = t.train_avg_info(dic_info_ori,dic_info_ori_key,realization)
260 |     t.train_plot_avg(dic_avg_info_ori,dic_info_ori_key,realization,'original',save_dir+'/train_')
261 |     
262 |     
263 | 
264 |     #%%  
265 |     import matplotlib.pyplot as plt
266 |     def test_plot_individual(env,dic_info,method_index,key_plot,n,save_dir):    
267 |         #key_plot=['Backhaul Difference','SINR','QoS Difference','Throughput']
268 |         nTerm = [env.nSBS, env.nUE, env.nUE, env.nUE]
269 |         title=['['+method_index+']'+i+' with '+str(n)+' Realizations' for i in key_plot]   
270 |         ylabel=key_plot
271 |         xlabel='Training Steps (x'+str(n)+')'   
272 |         label=['SBS','UE','UE','UE']
273 |         color=['r','b','g','c','m','y','k','b']
274 |         linestyle=['-','--',':',':','-']
275 |     
276 |         save={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
277 |         save_ori={name:[[] for i in range(num)] for name,num in zip(key_plot,nTerm)}
278 |         for i,key in enumerate(key_plot):#i:0-2 same term(backhaul,SINR,QoS) different methods
279 |             difference = dic_info[key]
280 |             temp_list=[]
281 |             for y in range(nTerm[i]):
282 |                     temp_list.append([difference[x][y] for x in range(len(difference))])
283 |             for length in range(len(temp_list)):
284 |                 Bl=[sum(temp_list[length][j*n:(j+1)*n])/n for j in range(int(len(temp_list[length])/n))]
285 |                 save[key][length]=Bl
286 |                 Bl_ori=[temp_list[length][j*n:(j+1)*n] for j in range(int(len(temp_list[length])/n))]
287 |                 for ori in Bl_ori:
288 |                     save_ori[key][length]=save_ori[key][length]+ori
289 |                 plt.plot(np.arange(len(Bl)), Bl,label=label[i]+str(length),color=color[length],linestyle=linestyle[0])
290 |     
291 |             plt.legend(loc='upper right')
292 |             plt.title(title[i])
293 |             plt.ylabel(ylabel[i])
294 |             plt.xlabel(xlabel)
295 |             plt.savefig(save_dir+title[i]+'.png')
296 |             plt.show()
297 |         return save,save_ori
298 |     
299 |     # 8) plot results of each SBS or UE, e.g.Backhaul_difference-SBS, SINR-UE--------------
300 |     key_individual=['Backhaul Difference','SINRdb','QoS Difference', 'Throughput']
301 |     save,save_ori= test_plot_individual(env,dic_info['1'],'1',key_individual,realization,save_dir+'/train_')
302 |     # 9) write info ------------------------------------------------------------
303 |     # average info
304 |     t.writeCSV(dic_avg_info,dic_avg_info_ori,save,dic_info_ori_key,key_individual,key_avg,save_dir+'/train',0)
305 |     # original
306 |     #t.writeCSV(dic_info,dic_info_ori,save_ori,dic_info_ori_key,key_individual,key_avg,save_dir+'/train_original',0)
307 |       
308 |     #%% debug
309 |     t.writeCSI(save_dir+'/CSI',train_channel_episode)
310 |     debug_I = env.debug_I
311 |     debug_UE_throughput = env.debug_UE_throughput # each UE throughput
312 |     debug_SBS_throughput = env.debug_SBS_throughput
313 |     debug_SBS_threshold = env.debug_SBS_threshold
314 |     debug_c = env.debug_c
315 |     debug_p = env.debug_p
316 |     debug_backhaul = env.debug_backhaul
317 |     debug_QoS = env.debug_QoS # which episode and step violate QoS & UE index
318 |     #%% test actual converage range
319 |     threshold=7.5
320 |     m = np.mean([episode_r_list[episode-100:episode-1]])
321 |     not_convergence = [1 for i in episode_r_list[episode-100:episode-1] if abs(i-m)> threshold]
322 |     if sum(not_convergence)==0:
323 |         print('Convergence')
324 |     else:
325 |         print('not')


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | def soft_update_target_network(source_network, target_network, tau):
2 |     for target_param, param in zip(target_network.parameters(), source_network.parameters()):
3 |         target_param.data.copy_(tau * param.data + (1.0 - tau) * target_param.data)
4 | 
5 | 
6 | def hard_update_target_network(source_network, target_network):
7 |     for target_param, param in zip(target_network.parameters(), source_network.parameters()):
8 |         target_param.data.copy_(param.data)
9 | 


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/noise.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chikaihsieh/Power-Allocation-and-User-Device-Association-with-Deep-Reinforcement-Learning/ce226cd530664d8d8d257f1231860653164027e3/utils/__pycache__/noise.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/noise.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class OrnsteinUhlenbeckActionNoise(object):
 5 |     """
 6 |     Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
 7 |     Source: https://github.com/vy007vikas/PyTorch-ActorCriticRL/blob/master/utils.py
 8 |     """
 9 | 
10 |     def __init__(self, action_dim, mu=0, theta=0.15, sigma=0.2, random_machine=np.random):
11 |         super(OrnsteinUhlenbeckActionNoise, self).__init__()
12 |         self.random = random_machine
13 |         self.action_dim = action_dim
14 |         self.mu = mu
15 |         self.theta = theta
16 |         self.sigma = sigma
17 |         self.X = np.ones(self.action_dim) * self.mu
18 | 
19 |     def reset(self):
20 |         self.X = np.ones(self.action_dim) * self.mu
21 | 
22 |     def sample(self):
23 |         dx = self.theta * (self.mu - self.X)
24 |         dx = dx + self.sigma * self.random.randn(len(self.X))
25 |         self.X = self.X + dx
26 |         return self.X
27 | 


--------------------------------------------------------------------------------