├── README.md ├── DQN_agent.py ├── main.py └── Environment.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep-Reinforcement-Learning-for-resource-allocation-in-wireless-networks 2 | 3 | ### This project focuses on improving resource allocation in wireless networks using deep reinforcement learning (DRL) techniques. The goal is to enhance the learning of DRL models for power allocation in small-cell and ultra-dense wireless cellular networks. To accelerate learning and improve network throughput, experience replay mechanisms were implemented. The simulations to evaluate the performance of different training strategies with transfer learning were conducted in Python using libraries such as Numpy, Pytorch, and Matplotlib. The results showed that the conventional Deep Q-Network model's learning was accelerated by up to 77%, and the network performance improved by up to 24.7%. 4 | 5 | ### The project resulted in several research articles published in top-tier journals and conferences in the computer science area. 6 | ### Research articles: 7 | * Ángel G. Andrade and Alexis Anzaldo. "[Accelerated resource allocation based on experience retention for B5G networks](https://www.sciencedirect.com/science/article/abs/pii/S1084804523000127)." Journal of Network and Computer Applications (2023): 103593. 8 | * Alexis Anzaldo and Ángel G. Andrade. "[Experience Replay-Based Power Control for Sum-Rate Maximization in Multi-Cell Networks](https://ieeexplore.ieee.org/abstract/document/9870169)." IEEE Wireless Communications Letters 11.11 (2022): 2350-2354. 9 | * Alexis Anzaldo and Ángel G. Andrade. "[Buffer transference strategy for power control in B5G-ultra-dense wireless cellular networks](https://link.springer.com/article/10.1007/s11276-022-03087-6)." Wireless Networks 28.8 (2022): 3613-3620. 10 | ### Conference articles: 11 | * Alexis Anzaldo and Angel G. Andrade. "[Deep Reinforcement Learning for Power control in Multi-tasks Wireless Cellular Networks](https://ieeexplore.ieee.org/abstract/document/9928617)." 2022 IEEE International Mediterranean Conference on Communications and Networking (MeditCom). IEEE, 2022. 12 | * Alexis Anzaldo and Ángel G. Andrade. "[Training Effect on AI-based Resource Allocation in small-cell networks](https://ieeexplore.ieee.org/abstract/document/9647736)." 2021 IEEE Latin-American Conference on Communications (LATINCOM). IEEE, 2021. 13 | 14 | ### Additionally, as a part of the project, a systematic review methodology was conducted to identify the most relevant research works that implemented DRL for resource allocation in wireless networks. The review identified 56 relevant research works and provided valuable insights into the current state of the art in this area. 15 | ### The systematic review submitted version is found at Research Square: 16 | * [Intelligence-learning driven resource allocation for B5G Ultra-Dense Networks: A structured literature review](https://www.researchsquare.com/article/rs-2763206/v1) 17 | 18 | ### Overall, this project has contributed to advancing the field of wireless network resource allocation by leveraging DRL techniques. The proposed learning strategies and experience replay mechanisms can be applied to other network settings, providing a more efficient and effective approach to network optimization. 19 | -------------------------------------------------------------------------------- /DQN_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch as T 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | 8 | 9 | class ReplayBuffer(): 10 | def __init__(self, max_size, input_shape, n_actions): 11 | self.input_shape = input_shape 12 | self.mem_size = max_size 13 | self.mem_cntr = 0 14 | self.state_memory = np.zeros((self.mem_size, *input_shape), 15 | dtype=np.float32) 16 | self.new_state_memory = np.zeros((self.mem_size, *input_shape), 17 | dtype=np.float32) 18 | self.action_memory = np.zeros((self.mem_size, n_actions), dtype=np.uint8) 19 | self.reward_memory = np.zeros(self.mem_size, dtype=np.float32) 20 | self.n_actions = n_actions 21 | 22 | def store_transition(self, state, action, reward, state_): 23 | index = self.mem_cntr % self.mem_size 24 | self.state_memory[index] = state 25 | self.new_state_memory[index] = state_ 26 | actions = np.zeros(self.n_actions) 27 | actions[action] = 1.0 28 | self.action_memory[index] = actions 29 | self.reward_memory[index] = reward 30 | self.mem_cntr += 1 31 | 32 | def sample_buffer(self, batch_size): 33 | max_mem = min(self.mem_cntr, self.mem_size) 34 | batch = np.random.choice(max_mem, batch_size, replace=False) 35 | states = self.state_memory[batch] 36 | actions = self.action_memory[batch] 37 | rewards = self.reward_memory[batch] 38 | states_ = self.new_state_memory[batch] 39 | 40 | return states, actions, rewards, states_ 41 | 42 | 43 | class DeepQNetwork(nn.Module): 44 | def __init__(self, lr, n_actions, name, input_dims, chkpt_dir): 45 | super(DeepQNetwork, self).__init__() 46 | self.checkpoint_dir = chkpt_dir 47 | self.checkpoint_file = os.path.join(self.checkpoint_dir, name) 48 | self.fc1 = nn.Linear(*input_dims, 128) 49 | self.fc2 = nn.Linear(128, 64) 50 | self.A = nn.Linear(64, n_actions) 51 | self.optimizer = optim.Adam(self.parameters(), lr=lr) 52 | self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu') 53 | self.to(self.device) 54 | 55 | def forward(self, state): 56 | flat1 = F.relu(self.fc1(state)) 57 | flat2 = F.relu(self.fc2(flat1)) 58 | A = self.A(flat2) # A is the action set 59 | return A 60 | 61 | def save_checkpoint(self): 62 | print('... saving checkpoint ...') 63 | T.save(self.state_dict(), self.checkpoint_file) 64 | 65 | def load_checkpoint(self): 66 | print('... loading checkpoint ...') 67 | #self.load_state_dict(T.load(self.checkpoint_file)) 68 | self.load_state_dict(T.load(self.checkpoint_file,map_location=T.device('cpu'))) 69 | 70 | class Agent(): 71 | def __init__(self, gamma, epsilon, lr, n_actions, input_dims, 72 | mem_size, batch_size, eps_min, 73 | replace, chkpt_dir, q_eval_name, q_next_name, instance): 74 | # path='D:\PyCharmCommunityEdition2020.2.1\Projects\GoogleColab' 75 | self.gamma = gamma 76 | self.epsilon = epsilon 77 | self.epsilon_init = epsilon 78 | self.epsilon_min = eps_min 79 | self.lr = lr 80 | self.n_actions = n_actions 81 | self.input_dims = input_dims 82 | self.batch_size = batch_size 83 | self.replace_target_cnt = replace 84 | self.chkpt_dir = chkpt_dir 85 | self.action_space = [i for i in range(self.n_actions)] 86 | self.learn_step_counter = 0 87 | 88 | self.mem_size = mem_size 89 | self.memory = ReplayBuffer(mem_size, input_dims, self.n_actions) 90 | 91 | T.manual_seed(instance) # Fixed the initial random parameters 92 | #Local Network* 93 | self.q_eval = DeepQNetwork(self.lr, self.n_actions, 94 | input_dims=self.input_dims, 95 | name=q_eval_name, 96 | chkpt_dir=self.chkpt_dir) 97 | # Target Network* 98 | self.q_next = DeepQNetwork(self.lr, self.n_actions, 99 | input_dims=self.input_dims, 100 | name=q_next_name, 101 | chkpt_dir=self.chkpt_dir) 102 | 103 | def set_loadchkpt_dir(self,chkpt_dir, q_eval_name, q_next_name): 104 | self.q_eval.checkpoint_dir = chkpt_dir 105 | self.q_eval.checkpoint_file = os.path.join(chkpt_dir, q_eval_name) 106 | self.q_next.checkpoint_dir = chkpt_dir 107 | self.q_next.checkpoint_file = os.path.join(chkpt_dir, q_next_name) 108 | 109 | def set_savechkpt_dir(self,chkpt_dir, q_eval_name, q_next_name): 110 | self.q_eval.checkpoint_dir = chkpt_dir 111 | self.q_eval.checkpoint_file = os.path.join(chkpt_dir, q_eval_name) 112 | self.q_next.checkpoint_dir = chkpt_dir 113 | self.q_next.checkpoint_file = os.path.join(chkpt_dir, q_next_name) 114 | 115 | def initialization_FIFO(self, buffer): 116 | for idx in np.arange(0,len(buffer['obs'])): 117 | #self.store_transition(buffer['obs'][idx], np.array([buffer['action'][idx]]), 118 | # np.array([buffer['reward'][idx]]), buffer['next_obs'][idx]) 119 | index = self.memory.mem_cntr % self.mem_size 120 | self.memory.state_memory[index] = buffer['obs'][idx] 121 | self.memory.new_state_memory[index] = buffer['next_obs'][idx] 122 | actions = np.zeros(self.n_actions) 123 | actions[np.array([buffer['action'][idx]])] = 1.0 124 | self.memory.action_memory[index] = actions 125 | self.memory.reward_memory[index] = np.array([buffer['reward'][idx]]) 126 | self.memory.mem_cntr += 1 127 | 128 | def reset_buffer(self): 129 | self.memory = ReplayBuffer(self.mem_size, self.input_dims, self.n_actions) 130 | 131 | def choose_action(self, observation, random_epsilon): 132 | if random_epsilon >= self.epsilon: 133 | state = T.tensor([observation], dtype=T.float).to(self.q_eval.device) 134 | advantage = self.q_eval.forward(state) 135 | action = T.argmax(advantage).item() 136 | return action 137 | else: 138 | action = np.random.choice(self.action_space) 139 | return action 140 | 141 | def choose_action_test(self, observation): 142 | state = T.tensor([observation], dtype=T.float).to(self.q_eval.device) 143 | advantage = self.q_eval.forward(state) 144 | action = T.argmax(advantage).item() 145 | return action 146 | 147 | def store_transition(self, state, action, reward, state_): 148 | self.memory.store_transition(state, action, reward, state_) 149 | 150 | def replace_target_network(self): 151 | if self.learn_step_counter % self.replace_target_cnt == 0: 152 | self.q_next.load_state_dict(self.q_eval.state_dict()) 153 | 154 | def decay_epsfix(self, episodes, episode_step): 155 | decay_rate = max((episodes - episode_step)/episodes,0) 156 | self.epsilon = (self.epsilon_init - self.epsilon_min)* decay_rate + self.epsilon_min 157 | 158 | def epsilon_reset(self,epsilon_aux): 159 | self.epsilon = epsilon_aux 160 | 161 | def save_models(self): 162 | self.q_eval.save_checkpoint() 163 | self.q_next.save_checkpoint() 164 | 165 | def load_models(self): 166 | self.q_eval.load_checkpoint() 167 | self.q_next.load_checkpoint() 168 | 169 | def learn(self): 170 | if self.memory.mem_cntr <= self.batch_size: 171 | return 172 | state, action, reward, new_state = \ 173 | self.memory.sample_buffer(self.batch_size) 174 | 175 | 176 | states = T.tensor(state).to(self.q_eval.device) 177 | rewards = T.tensor(reward).to(self.q_eval.device) 178 | states_ = T.tensor(new_state).to(self.q_eval.device) 179 | 180 | action_values = np.array(self.action_space, dtype=np.int32) 181 | action_indices = np.dot(action, action_values) 182 | 183 | q_eval = self.q_eval.forward(states).to(self.q_eval.device) 184 | q_next = self.q_next.forward(states_).to(self.q_eval.device) 185 | 186 | batch_index = np.arange(self.batch_size, dtype=np.int32) 187 | 188 | 'state_action_values' 189 | state_action_values = q_eval[batch_index, action_indices] 190 | 'expected Q values' 191 | expected_state_action_values = rewards + self.gamma * T.max(q_next, dim=1)[0] 192 | 193 | 194 | TD = expected_state_action_values - state_action_values 195 | loss = TD.pow(2).to(self.q_eval.device) 196 | loss = loss.mean() 197 | 198 | # Optimize the model 199 | self.q_eval.optimizer.zero_grad() 200 | loss.backward() 201 | self.q_eval.optimizer.step() 202 | 203 | 204 | self.replace_target_network() # Update Network 205 | self.learn_step_counter += 1 206 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from DQN_agent import Agent 3 | from Environment import Env_cellular 4 | 5 | # ---------------------Environment params / DQN params 6 | ''' 7 | Envrionment from Power Allocation in Multi-User Cellular Networks with Deep Q Learning Approach 8 | DOI: 10.1109/ICC.2019.8761431 9 | https://github.com/mengxiaomao 10 | ''' 11 | 12 | 'System Parameters ------------------------------------------------------------------------------' 13 | n_x = 4 # BS on axis x 14 | n_y = 4 # BS on axis y 15 | maxM = 3 # Number of UE at each cell 16 | min_dis = 0.01 # UE minimum distance between UE-BS 17 | max_dis = 1.0 # BS minimum distance between UE-BS 18 | max_p = 38. # BS maximum transmission power (in DB) 19 | p_n = -114. # Noise power in dBm 20 | power_num = 10 # Number power levels available (Action space) 21 | '--------------------------------------------------------------------------------------------------' 22 | 23 | 'Requirement for modeling de Jakes Model -------------------------------------------------------------------' 24 | fd = 10 # Maximum Doppler Frequency ( Reger to Eq. 2) 25 | Ts = 20e-3 # Time intervals between intervals (Refer to Eq. 2) 26 | '------------------------------------------------------------------------------------------------------------' 27 | 28 | 'Considerations ----------------- ----------------------------------------------------------------------------' 29 | L = 3 # Represents the number of clusters to consider as adjacent (BSs L1=7BS, L2=19BS, L3=37BS) 30 | C = 16 # (Ic) Number of interferers taked account for the localized reward (48 input state neurons) 31 | '-------------------------------------------------------------------------------------------------------------' 32 | 33 | # ------------------------------------- Other params 34 | ExpTag = 'FIFO10K' 35 | max_reward = 0 36 | 37 | 'DQN parameters -----------------------------------------------------------------------------------------------' 38 | Gamma = 0.5 # Discount factor 39 | epsilon = 0.9 # Initial epsilon value 40 | eps_min = 0.01 # Final Epsilon value 41 | batch_size = 256 # Batch Size 42 | lr = 0.001 # Learning Rate 43 | mem_size = 50000 # Memory size 44 | '--------------------------------------------------------------------------------------------------------------' 45 | 46 | '---------------------------------------' 47 | train_interval = 10 # Training interval 48 | interval = 500 # Result print interval 49 | replace = 100 # Update target network interval 50 | Ns = 30001 # Number of intervals per episodes 51 | test_intervals = 500 # Test interval 52 | episodes = 1 # Episodes (Each episode randomly deploy UEs with new propagation conditions) 53 | instances = 10 # Instances to average the results 54 | source_seed = 0 # Fixed seed to intialize environment (for replicability) 55 | '---------------------------------------' 56 | 57 | save_testing = True # Turn on to evaluate training and testing performance 58 | saving = False # Data {training performance, testing permormance, ...} 59 | save_model = False # Save trained model 60 | load_model = False # Load model for additional training (parameter Transfer Learning) 61 | 62 | filename = '' 63 | Source_Name = '' 64 | chkpt_dir = '' 65 | 66 | 67 | def Train(env, env_testing, agent, episodes, Ns, interval, max_reward, instance, source_seed): 68 | Nbs = env.M # This is the number of agents( i.e. links between BS-UE) 69 | 'Initialization --------------------------------------------------------------------------' 70 | action = np.zeros(Nbs, dtype=np.int32) 71 | test_action = np.zeros(Nbs, dtype=np.int32) 72 | terminal = Ns - 2 73 | terminal2 = Ns - 2 - interval 74 | 'available Power (According to LTE, this is the minimum power of a Picocell)' 75 | av_pow = env.get_power_set(5) # 5 (in dB) is the minimum transmission power 76 | env.set_Ns(Ns) # Set the number of intervals for training environment 77 | env_testing.set_Ns(Ns) # Set the number of intervals for testing environment 78 | Rate_dqn_list = list() 79 | Reward_dqn_list = list() 80 | Test_Rate_dqn_list = list() 81 | Test_Reward_dqn_list = list() 82 | 83 | if load_model: 84 | agent.load_models() 85 | '--------------------------------------------------------------------------------------------' 86 | 87 | for k in range(1, episodes + 1): 88 | state, _, _ = env.reset(seed=source_seed) 89 | state_testing, _, _ = env_testing.reset(seed=source_seed) 90 | 91 | # np.random.seed(instance) 92 | '-----------------------------------------------------------------------------------------------' 93 | for i in range(int(Ns) - 1): 94 | random_epsilon = np.random.random((Nbs)) # Individual Exploration 95 | 96 | for i_agent_BS in np.arange(Nbs): 97 | action[i_agent_BS] = agent.choose_action(state[i_agent_BS, :], random_epsilon[i_agent_BS]) 98 | 99 | 'For Testing -----------------------------------------------------------------------------' 100 | if save_testing: 101 | # if (i % 10 == 0): # Testing Cada 10 intervalos de tiempo 102 | for i_agent_BS in np.arange(Nbs): 103 | test_action[i_agent_BS] = agent.choose_action_test(state_testing[i_agent_BS, :]) 104 | new_state_testing, _, Test_reward, Test_sumrate = env_testing.step(av_pow[test_action]) 105 | Test_Rate_dqn_list.append(Test_sumrate) # Vector of mean rates for interval 106 | Test_Reward_dqn_list.append(np.mean(Test_reward)) 107 | state_testing = np.copy(new_state_testing) 108 | 'For Testing -----------------------------------------------------------------------------' 109 | 110 | new_state, _, reward, sumrate = env.step(av_pow[action]) 111 | 112 | for i_agent_BS in np.arange(Nbs): 113 | agent.store_transition(state[i_agent_BS, :], np.array([action[i_agent_BS]]), 114 | np.array([reward[i_agent_BS]]), new_state[i_agent_BS, :]) 115 | 116 | state = np.copy(new_state) 117 | 118 | Rate_dqn_list.append(sumrate) # Vector of mean rates for interval 119 | Reward_dqn_list.append(np.mean(reward)) 120 | 121 | if i % train_interval == 0: 122 | agent.learn() 123 | 124 | agent.decay_epsfix(int(0.5 * (Ns - 1)), i) 125 | 126 | if (i % interval == 0): 127 | reward = np.mean(Reward_dqn_list[-100:]) 128 | test_reward = np.mean(Test_Reward_dqn_list[-10:]) 129 | Average_rate = np.mean(Rate_dqn_list[-100:]) 130 | Test_average_rate = np.mean(Test_Rate_dqn_list[-10:]) 131 | 132 | if (i > terminal2) and save_model: 133 | agent.save_models() 134 | print( 135 | "Episode(train):%d interval:%d Rate: %.3f Test_Rate: %.3f Reward: %.3f Test_Reward:%.3f Epsilon: %.4f" 136 | % (k, i, Average_rate, Test_average_rate, reward, test_reward, agent.epsilon)) 137 | 138 | return Rate_dqn_list, Reward_dqn_list, Test_Rate_dqn_list, Test_Reward_dqn_list 139 | 140 | 141 | Rates_matrix = np.zeros((instances, episodes * (Ns - 1))) 142 | Reward_matrix = np.zeros((instances, episodes * (Ns - 1))) 143 | Test_Rates_matrix = np.zeros((instances, episodes * (Ns - 1))) 144 | Test_Rewards_matrix = np.zeros((instances, episodes * (Ns - 1))) 145 | 146 | x_axis = np.arange(0, episodes * (Ns - 1)) 147 | 148 | 149 | env = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) 150 | env_testing = Env_cellular(fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num) 151 | 152 | max_reward = 0 153 | for i in np.arange(instances): 154 | q_eval_name = Source_Name + '_' + str(i) + '_eval' 155 | q_next_name = Source_Name + '_' + str(i) + '_next' 156 | 157 | agent = Agent(gamma=Gamma, epsilon=epsilon, lr=lr, input_dims=[env.state_num], n_actions=power_num, 158 | mem_size=mem_size, eps_min=eps_min, batch_size=batch_size, replace=replace, 159 | chkpt_dir=chkpt_dir, q_eval_name=q_eval_name, q_next_name=q_next_name, instance=i) 160 | 161 | X_Rate, X_Reward, Y_Rate, Y_Reward = Train(env=env, env_testing=env_testing, agent=agent, episodes=episodes, 162 | Ns=Ns, interval=interval, max_reward=max_reward, instance=i, 163 | source_seed=source_seed) 164 | 165 | Rates_matrix[i, :] = X_Rate 166 | Reward_matrix[i, :] = X_Reward 167 | if save_testing: 168 | Test_Rates_matrix[i, :] = Y_Rate 169 | Test_Rewards_matrix[i, :] = Y_Reward 170 | 171 | if saving: 172 | np.savez(filename, Training_Rates=Rates_matrix, Training_Rew=Reward_matrix, 173 | Test_Rates=Test_Rates_matrix, Test_Rew=Test_Rewards_matrix) 174 | 175 | 176 | import matplotlib.pyplot as plt 177 | 178 | x_axis_1 = np.arange(0, len(Rates_matrix[0])) 179 | 180 | plt.plot(x_axis_1, np.mean(Rates_matrix, axis=0), label='Training', color='tab:green', linewidth=1.0, 181 | linestyle='dashed') 182 | plt.fill_between(x_axis_1, np.max(Rates_matrix, axis=0), np.min(Rates_matrix, axis=0), alpha=.3, color='tab:green') 183 | if save_testing: 184 | plt.plot(x_axis_1, np.mean(Test_Rates_matrix, axis=0), label='Testing', color='tab:orange', linewidth=2.0, 185 | linestyle='dashed') 186 | plt.fill_between(x_axis_1, np.max(Test_Rates_matrix, axis=0), np.min(Test_Rates_matrix, axis=0), alpha=.3, 187 | color='tab:orange') 188 | plt.grid() 189 | plt.legend() 190 | plt.ylabel('average spectral efficiency (bps/Hz)') 191 | plt.xlabel('Time intervals') 192 | plt.title('Network Performance (Exploration with exponential decay : 0.9 -> 0.001)') 193 | plt.show() 194 | 195 | plt.plot(x_axis_1, np.mean(Reward_matrix, axis=0), label='Training', color='tab:blue', linewidth=1.0, 196 | linestyle='dashed') 197 | plt.fill_between(x_axis_1, np.max(Reward_matrix, axis=0), np.min(Reward_matrix, axis=0), alpha=.3, color='tab:blue') 198 | if save_testing: 199 | plt.plot(x_axis_1, np.mean(Test_Rewards_matrix, axis=0), label='Testing', color='tab:orange', linewidth=2.0, 200 | linestyle='dashed') 201 | plt.fill_between(x_axis_1, np.max(Test_Rewards_matrix, axis=0), np.min(Test_Rewards_matrix, axis=0), alpha=.3, 202 | color='tab:orange') 203 | plt.grid() 204 | plt.legend() 205 | plt.ylabel('Reward Function') 206 | plt.xlabel('Time intervals') 207 | plt.title('Model Performance (Exploration with exponential decay : 0.9 -> 0.001) ') 208 | plt.show() 209 | 210 | def moving_average(a, n=50): 211 | ret = np.cumsum(a, dtype=float) 212 | ret[n:] = ret[n:] - ret[:-n] 213 | return ret[n - 1:] / n 214 | 215 | a = 50 216 | Training = moving_average(np.mean(Rates_matrix, axis=0),a) 217 | Testing = moving_average(np.mean(Test_Rates_matrix, axis=0),a) 218 | x_axis = np.arange(0, len(Training)) 219 | plt.plot(x_axis, Training, label='Training', color='tab:green', linewidth=1.0, 220 | linestyle='dashed') 221 | if save_testing: 222 | plt.plot(x_axis, Testing, label='Testing', color='tab:orange', linewidth=2.0, 223 | linestyle='dashed') 224 | plt.grid() 225 | plt.legend() 226 | plt.ylabel('average spectral efficiency (bps/Hz)') 227 | plt.xlabel('Time intervals') 228 | plt.title('Network Performance (Exploration with exponential decay : 0.9 -> 0.001)') 229 | plt.show() -------------------------------------------------------------------------------- /Environment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Oct 26 14:57:21 2018 4 | minimum transmit power: 5dBm/ maximum: 38dBm 5 | bandwidth 10MHz 6 | AWGN power -114dBm 7 | path loss 120.9+37.6log10(d) (dB) d: transmitting distance (km) 8 | using interferers' set and therefore reducing the computation complexity 9 | multiple users / single BS 10 | downlink 11 | localized reward function 12 | @author: mengxiaomao 13 | """ 14 | 15 | import scipy 16 | import numpy as np 17 | from scipy import special 18 | 19 | dtype = np.float32 20 | 21 | 22 | class Env_cellular(): 23 | def __init__(self, fd, Ts, n_x, n_y, L, C, maxM, min_dis, max_dis, max_p, p_n, power_num): 24 | self.fd = fd # Frecuencia Doppler maxima 25 | self.Ts = Ts # Tiempo de intervalo entre instantes adyacentes 26 | self.n_x = n_x # BS in axis x 27 | self.n_y = n_y # BS in axis y 28 | self.L = L # outer cluster in consideration 29 | self.C = C # Ic : Number of Interferers considered on the state 30 | self.maxM = maxM # user number in one BS 31 | self.min_dis = min_dis # Minimum distance in km 32 | self.max_dis = max_dis # Maximum distance in km 33 | self.max_p = max_p # Maximum Power in dBm 34 | self.p_n = p_n # Maximum Power in dBm 35 | self.power_num = power_num # Number of actions 36 | 37 | self.c = 3 * self.L * (self.L + 1) + 1 # adjascent BS 38 | self.K = self.maxM * self.c # maximum adjascent users, including itself 39 | # self.state_num = 2*self.C + 1 # 2*C + 1 40 | self.state_num = 3 * self.C + 2 # C + 1 # * * * * * * * * * * * * *Why the plus 2? 41 | self.N = self.n_x * self.n_y # Number of BS 42 | self.M = self.N * self.maxM # maximum number of users 43 | self.W = np.ones((self.M), dtype=dtype) # Bandwidth 44 | self.sigma2 = 1e-3 * pow(10., self.p_n / 10.) # Power noise in Watts 45 | self.maxP = 1e-3 * pow(10., self.max_p / 10.) # maxP in Watts 46 | self.p_array, self.p_list = self.generate_environment() # Positional indexes of UEs 47 | 48 | 49 | 50 | def get_power_set(self, min_p): 51 | power_set = np.hstack( 52 | [np.zeros((1), dtype=dtype), 1e-3 * pow(10., np.linspace(min_p, self.max_p, self.power_num - 1) / 10.)]) 53 | return power_set 54 | 55 | def set_Ns(self, Ns): 56 | self.Ns = int(Ns) 57 | 58 | def generate_H_set(self): 59 | ''' 60 | Jakes model 61 | ''' 62 | 63 | H_set = np.zeros([self.M, self.K, self.Ns], dtype=dtype) 64 | pho = np.float32(scipy.special.k0(2 * np.pi * self.fd * self.Ts)) 65 | 66 | H_set[:, :, 0] = np.kron( 67 | np.sqrt(0.5 * (np.random.randn(self.M, self.c) ** 2 + np.random.randn(self.M, self.c) ** 2)), 68 | np.ones((1, self.maxM), dtype=np.int32)) 69 | for i in range(1, self.Ns): 70 | H_set[:, :, i] = H_set[:, :, i - 1] * pho + np.sqrt( 71 | (1. - pho ** 2) * 0.5 * (np.random.randn(self.M, self.K) ** 2 + np.random.randn(self.M, self.K) ** 2)) 72 | 73 | path_loss = self.generate_path_loss() 74 | H2_set = np.square(H_set) * np.tile(np.expand_dims(path_loss, axis=2), [1, 1, self.Ns]) 75 | 76 | return H2_set 77 | 78 | 79 | def generate_environment(self): 80 | path_matrix = self.M * np.ones((self.n_y + 2 * self.L, self.n_x + 2 * self.L, self.maxM), dtype=np.int32) 81 | 82 | for i in range(self.L, self.n_y + self.L): # Loop for generatin adjacent UEs index 83 | for j in range(self.L, self.n_x + self.L): 84 | for l in range(self.maxM): 85 | path_matrix[i, j, l] = ((i - self.L) * self.n_x + ( 86 | j - self.L)) * self.maxM + l # matrix of positions by index 87 | p_array = np.zeros((self.M, self.K), dtype=np.int32) # adyacent K UEs by UE 88 | for n in range(self.N): 89 | i = n // self.n_x # ''//'' floor division 90 | j = n % self.n_x 91 | Jx = np.zeros((0), dtype=np.int32) 92 | Jy = np.zeros((0), dtype=np.int32) 93 | for u in range(i - self.L, i + self.L + 1): 94 | v = 2 * self.L + 1 - np.abs(u - i) 95 | jx = j - (v - i % 2) // 2 + np.linspace(0, v - 1, num=v, dtype=np.int32) + self.L 96 | jy = np.ones((v), dtype=np.int32) * u + self.L 97 | Jx = np.hstack((Jx, jx)) 98 | Jy = np.hstack((Jy, jy)) 99 | for l in range(self.maxM): 100 | for k in range(self.c): 101 | for u in range(self.maxM): 102 | p_array[n * self.maxM + l, k * self.maxM + u] = path_matrix[Jy[k], Jx[k], u] 103 | p_main = p_array[:, (self.c - 1) // 2 * self.maxM:(self.c + 1) // 2 * self.maxM] 104 | for n in range(self.N): 105 | for l in range(self.maxM): 106 | temp = p_main[n * self.maxM + l, l] 107 | p_main[n * self.maxM + l, l] = p_main[n * self.maxM + l, 0] 108 | p_main[n * self.maxM + l, 0] = temp 109 | p_inter = np.hstack([p_array[:, :(self.c - 1) // 2 * self.maxM], p_array[:, (self.c + 1) // 2 * self.maxM:]]) 110 | p_array = np.hstack([p_main, p_inter]) 111 | p_list = list() 112 | for m in range(self.M): 113 | p_list_temp = list() 114 | for k in range(self.K): 115 | p_list_temp.append([p_array[m, k]]) 116 | p_list.append(p_list_temp) 117 | 118 | return p_array, p_list 119 | 120 | def generate_path_loss(self): 121 | p_tx = np.zeros((self.n_y, self.n_x)) 122 | p_ty = np.zeros((self.n_y, self.n_x)) 123 | p_rx = np.zeros((self.n_y, self.n_x, self.maxM)) 124 | p_ry = np.zeros((self.n_y, self.n_x, self.maxM)) 125 | dis_rx = np.random.uniform(self.min_dis, self.max_dis, size=(self.n_y, self.n_x, self.maxM)) 126 | phi_rx = np.random.uniform(-np.pi, np.pi, size=(self.n_y, self.n_x, self.maxM)) 127 | for i in range(self.n_y): 128 | for j in range(self.n_x): 129 | p_tx[i, j] = 2 * self.max_dis * j + (i % 2) * self.max_dis 130 | p_ty[i, j] = np.sqrt(3.) * self.max_dis * i 131 | for k in range(self.maxM): 132 | p_rx[i, j, k] = p_tx[i, j] + dis_rx[i, j, k] * np.cos(phi_rx[i, j, k]) 133 | p_ry[i, j, k] = p_ty[i, j] + dis_rx[i, j, k] * np.sin(phi_rx[i, j, k]) 134 | dis = 1e10 * np.ones((self.p_array.shape[0], self.K), dtype=dtype) # This is a dummy value for nonexistent BSs 135 | lognormal = np.random.lognormal(size=(self.p_array.shape[0], self.K), sigma=8) 136 | 137 | for k in range(self.p_array.shape[0]): 138 | for i in range(self.c): 139 | for j in range(self.maxM): 140 | if self.p_array[k, i * self.maxM + j] < self.M: 141 | bs = self.p_array[k, i * self.maxM + j] // self.maxM 142 | dx2 = np.square((p_rx[k // self.maxM // self.n_x][k // self.maxM % self.n_x][k % self.maxM] - 143 | p_tx[bs // self.n_x][bs % self.n_x])) 144 | dy2 = np.square((p_ry[k // self.maxM // self.n_x][k // self.maxM % self.n_x][k % self.maxM] - 145 | p_ty[bs // self.n_x][bs % self.n_x])) 146 | distance = np.sqrt(dx2 + dy2) 147 | dis[k, i * self.maxM + j] = distance 148 | 149 | path_loss = lognormal * pow(10., -(128.1 + 37.6 * np.log10(dis)) / 10.) 150 | 151 | return path_loss 152 | 153 | def calculate_rate(self, P): 154 | 155 | H2 = self.H2_set[:, :, self.count] # This give us the H2 of each UEs interferer on interval self.count 156 | p_extend = np.concatenate([P, np.zeros((1), dtype=dtype)], axis=0) # vvvv p_extend vvvv 157 | p_matrix = p_extend[self.p_array] 158 | path_main = H2[:, 0] * p_matrix[:, 0] # The first column contain the gains and the power (1-to-maxMxBSs) 159 | path_inter = np.sum(H2[:, 1:] * p_matrix[:, 1:], axis=1) # Vector of interferences (1-to-maxMxBSs) 160 | maxC = 1000. # This Represent the capped sinr(30) = 10**(30/10) = 1000. 161 | sinr = np.minimum(path_main / (path_inter + self.sigma2), maxC) # capped sinr calculation (1-to-maxMxBSs) 162 | 163 | index = np.where(sinr <= 10 ** ((-3.8279) / 10))[0] # SINR THRESHOLD 164 | rate = self.W * np.log2(1. + sinr) # rates (1-to-maxMxBSs) 165 | rate[index] = 0 166 | 167 | rate_extend = np.concatenate([rate, np.zeros((1), dtype=dtype)], axis=0) # Adding 0 rate value for not 168 | 169 | reward_rate = np.zeros(self.M) 170 | for i in np.arange(0,self.M): 171 | X = self.p_array[i,1:] 172 | Neighbors = np.extract(X != self.M, X) 173 | index_aux = np.where(self.p_array[Neighbors,:] == i)[1] 174 | inter_substract = H2[Neighbors,index_aux] * p_matrix[Neighbors,index_aux] 175 | sinr_k = np.minimum((path_main[Neighbors] / ((path_inter[Neighbors] - inter_substract) + self.sigma2)),maxC) 176 | index_k = np.where(sinr_k <= 10 ** ((-3.8279) / 10))[0] # SINR THRESHOLD 177 | C_k = np.log2(1. + sinr_k) # rates (1-to-maxMxBSs) 178 | C_k[index_k] = 0 179 | 180 | 181 | pi_aux = (C_k-rate[Neighbors]) 182 | reward_rate[i] = rate[i] - np.sum(pi_aux) 183 | 184 | rate_matrix = rate_extend[self.p_array] # consider all UEs rates on state 185 | ''' 186 | Calculate reward, sum-rate 187 | ''' 188 | sum_rate = np.mean(rate) # average rate of all (UEsxBs) 189 | 190 | return p_matrix, rate_matrix, reward_rate, sum_rate 191 | 192 | def generate_next_state(self, H2, p_matrix, rate_matrix): 193 | sinr_norm_inv = H2[:, 1:] / np.tile(H2[:, 0:1], [1, self.K - 1]) 194 | sinr_norm_inv = np.log2(1. + sinr_norm_inv) # log representation 195 | indices1 = np.tile( 196 | np.expand_dims(np.linspace(0, p_matrix.shape[0] - 1, num=p_matrix.shape[0], dtype=np.int32), axis=1), 197 | [1, self.C]) 198 | indices2 = np.argsort(sinr_norm_inv, axis=1)[:, -self.C:] 199 | sinr_norm_inv = sinr_norm_inv[indices1, indices2] 200 | p_last = np.hstack([p_matrix[:, 0:1], p_matrix[indices1, indices2 + 1]]) 201 | rate_last = np.hstack([rate_matrix[:, 0:1], rate_matrix[indices1, indices2 + 1]]) 202 | s_actor_next = np.hstack([sinr_norm_inv, p_last, rate_last]) 203 | 204 | ''' 205 | Generate state for critic 206 | ''' 207 | s_critic_next = H2 208 | return s_actor_next, s_critic_next 209 | 210 | def counter_step(self): 211 | self.count += 1 212 | 213 | def reset(self, seed=None): 214 | np.random.seed(seed) 215 | 216 | self.count = 0 # Intervals Counter initialization 217 | self.H2_set = self.generate_H_set() # Reset UEs Location and generate correlated gains on Ns intervals 218 | P = np.zeros([self.M], dtype=dtype) # Power is set to 0s for the initial interval of size equals UEs 219 | 220 | p_matrix, rate_matrix, _, _ = self.calculate_rate(P) # Zero Matrix initialization, since P = zeros 221 | H2 = self.H2_set[:, :, self.count] # Channel gain at zero interval (maxM*BSxInterferers*MaxM) 222 | s_actor, s_critic = self.generate_next_state(H2, p_matrix, rate_matrix) 223 | 224 | 225 | return s_actor, s_critic, self.H2_set 226 | 227 | 228 | def testing_reset(self, seed=None): 229 | np.random.seed(seed) 230 | 231 | self.count = 0 # Intervals Counter initialization 232 | self.H2_set = self.generate_H_set() # Reset UEs Location and generate correlated gains on Ns intervals 233 | 234 | np.random.seed(42) 235 | rand_idx = np.random.permutation(self.H2_set.shape[2]) 236 | self.H2_set = shuffled_a = self.H2_set[:, :,rand_idx] 237 | 238 | P = np.zeros([self.M], dtype=dtype) # Power is set to 0s for the initial interval of size equals UEs 239 | 240 | p_matrix, rate_matrix, _, _ = self.calculate_rate(P) # Zero Matrix initialization, since P = zeros 241 | H2 = self.H2_set[:, :, self.count] # Channel gain at zero interval (maxM*BSxInterferers*MaxM) 242 | s_actor, s_critic = self.generate_next_state(H2, p_matrix, rate_matrix) 243 | 244 | return s_actor, s_critic, self.H2_set 245 | 246 | def reset_bench(self, seed=None): 247 | np.random.seed(seed) 248 | 249 | self.count = 0 # Intervals Counter initialization 250 | self.H2_set, self.H2_set_2 = self.generate_H_set_2() # Reset UEs Location and generate correlated gains on Ns intervals 251 | 252 | return self.H2_set, self.H2_set_2 253 | 254 | 255 | def reset__(self, seed=None): 256 | np.random.seed(seed) 257 | self.count = 0 258 | self.H2_set = self.generate_H_set() 259 | H2 = self.H2_set[:, :, self.count] 260 | 261 | return H2 262 | 263 | 264 | def step(self, P): 265 | p_matrix, rate_matrix, reward_rate, sum_rate = self.calculate_rate(P) 266 | self.count = self.count + 1 267 | H2_next = self.H2_set[:, :, self.count] 268 | s_actor_next, s_critic_next = self.generate_next_state(H2_next, p_matrix, rate_matrix) 269 | return s_actor_next, s_critic_next, reward_rate, sum_rate 270 | 271 | def calculate_sumrate_fp(self, P, H2): 272 | p_extend = np.concatenate([P, np.zeros((1), dtype=dtype)], axis=0) 273 | p_matrix = p_extend[self.p_array] 274 | path_main = H2[:, 0] * p_matrix[:, 0] 275 | path_inter = np.sum(H2[:, 1:] * p_matrix[:, 1:], axis=1) 276 | 277 | maxC = 1000. 278 | sinr = np.minimum(path_main / (path_inter + self.sigma2), maxC) #capped sinr 279 | index = np.where(sinr <= 10 ** ((-3.8279) / 10))[0] # SINR THRESHOLD 280 | 281 | rate = self.W * np.log2(1. + sinr) # rates (1-to-maxMxBSs) 282 | rate[index] = 0 283 | 284 | sum_rate = np.mean(rate) 285 | return sum_rate 286 | 287 | 288 | def calculate_sumrate(self, P): 289 | #maxC = 1000. 290 | 291 | H2 = self.H2_set[:, :, self.count] 292 | p_extend = np.concatenate([P, np.zeros((1), dtype=dtype)], axis=0) 293 | p_matrix = p_extend[self.p_array] 294 | path_main = H2[:, 0] * p_matrix[:, 0] 295 | path_inter = np.sum(H2[:, 1:] * p_matrix[:, 1:], axis=1) 296 | 297 | maxC = 1000. 298 | sinr = np.minimum(path_main / (path_inter + self.sigma2), maxC) #capped sinr 299 | 300 | index = np.where(sinr <= 10 ** ((-3.8279) / 10))[0] # SINR THRESHOLD 301 | 302 | rate = self.W * np.log2(1. + sinr) # rates (1-to-maxMxBSs) 303 | rate[index] = 0 304 | 305 | sum_rate = np.mean(rate) 306 | 307 | 308 | return sum_rate 309 | 310 | def step__(self, P): 311 | reward_rate = list() 312 | 313 | for p in P: 314 | reward_rate.append(self.calculate_sumrate(p)) 315 | self.count = self.count + 1 316 | H2_next = self.H2_set[:, :, self.count] 317 | 318 | return H2_next, reward_rate 319 | 320 | --------------------------------------------------------------------------------