├── .gitignore ├── Actor.py ├── Critic.py ├── Images ├── EVALUATION │ └── Enero_3top_15_B_NEW │ │ ├── Figure_5.png │ │ └── Figure_6.png └── TRAINING │ └── Enero_3top_15_B_NEW │ ├── ACTORLossEnero_3top_15_B_NEW.png │ ├── AvgRewardEnero_3top_15_B_NEW.png │ ├── AvgStdUtiEnero_3top_15_B_NEW.png │ ├── CRITICLossEnero_3top_15_B_NEW.png │ ├── ErrorLinksEnero_3top_15_B_NEW.png │ ├── Lr_Enero_3top_15_B_NEW.png │ ├── MaxLinkUtiEnero_3top_15_B_NEW.png │ └── MinLinkUtiEnero_3top_15_B_NEW.png ├── LICENSE ├── Logs └── expEnero_3top_15_B_NEWLogs.txt ├── README.md ├── actor_critic.py ├── defo_process_results.py ├── eval.py ├── eval_on_single_topology.py ├── figures_5_and_6.py ├── gym-graph ├── gym_graph │ ├── __init__.py │ └── envs │ │ ├── __init__.py │ │ ├── environment15.py │ │ ├── environment16.py │ │ └── environment20.py └── setup.py ├── gym_env ├── gym_env │ ├── __init__.py │ └── envs │ │ ├── __init__.py │ │ └── env1.py └── setup.py ├── main.py ├── modelsEnero_3top_15_B_NEW ├── actor_60.pt ├── actor_final.pt ├── critic_60.pt └── critic_final.pt ├── parse_PPO.py ├── requitrment.txt ├── runs ├── 1 │ ├── events.out.tfevents.1650360660.barry.182599.0 │ └── events.out.tfevents.1650363931.barry.184203.0 └── 2 │ └── events.out.tfevents.1650452288.barry.199430.0 └── script_eval_on_single_topology.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | test.py 3 | gym_env/gym_env.egg-info/ 4 | -------------------------------------------------------------------------------- /Actor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | 6 | class Actor(nn.Module): 7 | def __init__(self, feature_size=20, t=4, readout_units=20): 8 | super(Actor, self).__init__() 9 | self.feature_size = feature_size 10 | self.t = t 11 | self.readout_units = readout_units 12 | self.message = nn.Sequential( 13 | nn.Linear(feature_size*2, feature_size), 14 | nn.SELU() 15 | ) 16 | self.message.apply(self._init_hidden_weights) 17 | self.update = nn.GRUCell(input_size=feature_size, hidden_size=feature_size) 18 | self.update.apply(self._init_hidden_weights) 19 | self.readout = nn.Sequential( 20 | nn.Linear(feature_size, self.readout_units), 21 | nn.SELU(), 22 | nn.Linear(self.readout_units, self.readout_units), 23 | nn.SELU() 24 | ) 25 | self.readout.apply(self._init_hidden_weights) 26 | self.out_layer = nn.Linear(self.readout_units, 1) 27 | torch.nn.init.orthogonal_(self.out_layer.weight, gain=np.sqrt(0.01)) 28 | torch.nn.init.constant_(self.out_layer.bias, 0) 29 | 30 | def _init_hidden_weights(self, m): 31 | if isinstance(m, nn.Linear): 32 | torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2)) 33 | torch.nn.init.constant_(m.bias, 0) 34 | if isinstance(m, nn.GRUCell): 35 | torch.nn.init.xavier_uniform_(m.weight_ih) 36 | torch.nn.init.xavier_uniform_(m.weight_hh) 37 | torch.nn.init.constant_(m.bias_ih, 0) 38 | torch.nn.init.constant_(m.bias_hh, 0) 39 | 40 | def forward(self, x): 41 | state = x['link_state'] 42 | first = x['first'].unsqueeze(1).expand(-1, x['state_dim']) 43 | second = x['second'].unsqueeze(1).expand(-1, x['state_dim']) 44 | graph_id = x['graph_id'].unsqueeze(1).expand(-1, x['state_dim']) 45 | 46 | for _ in range(self.t): 47 | main_edges = torch.gather(state, 0, first) 48 | neigh_edges = torch.gather(state, 0, second) 49 | edges_concat = torch.cat((main_edges, neigh_edges), 1) 50 | m = self.message(edges_concat) 51 | 52 | m = torch.zeros(state.shape, dtype=m.dtype, device=state.device).scatter_add_(0, second, m) 53 | state = self.update(m, state) 54 | 55 | feature = torch.zeros((x['num_actions'], x['state_dim']), dtype=state.dtype, 56 | device=state.device).scatter_add_(0, graph_id, state) 57 | output = self.out_layer(self.readout(feature)) 58 | 59 | return output 60 | 61 | 62 | """ 63 | 64 | link_state: 65 | link_capacity: float 66 | link_utilization: float 67 | action: mark bw 68 | bw: float 69 | 70 | input: 71 | link_state 72 | pair: [0, 1] => [[0, 0], [1, 1]] 73 | 74 | """ -------------------------------------------------------------------------------- /Critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | 6 | class Critic(nn.Module): 7 | def __init__(self, feature_size=20, t=4, readout_units=20): 8 | super(Critic, self).__init__() 9 | self.feature_size = feature_size 10 | self.t = t 11 | self.readout_units = readout_units 12 | self.message = nn.Sequential( 13 | nn.Linear(feature_size*2, feature_size), 14 | nn.SELU() 15 | ) 16 | self.message.apply(self._init_hidden_weights) 17 | self.update = nn.GRUCell(input_size=feature_size, hidden_size=feature_size) 18 | self.update.apply(self._init_hidden_weights) 19 | self.readout = nn.Sequential( 20 | nn.Linear(feature_size, self.readout_units), 21 | nn.SELU(), 22 | nn.Linear(self.readout_units, self.readout_units), 23 | nn.SELU() 24 | ) 25 | self.readout.apply(self._init_hidden_weights) 26 | self.out_layer = nn.Linear(self.readout_units, 1) 27 | torch.nn.init.orthogonal_(self.out_layer.weight, gain=np.sqrt(1)) 28 | torch.nn.init.constant_(self.out_layer.weight, 0) 29 | 30 | def _init_hidden_weights(self, m): 31 | if isinstance(m, nn.Linear): 32 | torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2)) 33 | torch.nn.init.constant_(m.bias, 0) 34 | if isinstance(m, nn.GRUCell): 35 | torch.nn.init.xavier_uniform_(m.weight_ih) 36 | torch.nn.init.xavier_uniform_(m.weight_hh) 37 | torch.nn.init.constant_(m.bias_ih, 0) 38 | torch.nn.init.constant_(m.bias_hh, 0) 39 | 40 | def forward(self, x): 41 | state = x['link_state'] 42 | first = x['first'].unsqueeze(1).expand(-1, x['state_dim']) 43 | second = x['second'].unsqueeze(1).expand(-1, x['state_dim']) 44 | 45 | for _ in range(self.t): 46 | main_edges = torch.gather(state, 0, first) 47 | neigh_edges = torch.gather(state, 0, second) 48 | edges_concat = torch.cat((main_edges, neigh_edges), 1) 49 | m = self.message(edges_concat) 50 | 51 | m = torch.zeros(state.shape, dtype=m.dtype, device=state.device).scatter_add_(0, second, m) 52 | state = self.update(m, state) 53 | 54 | feature = torch.sum(state, 0) 55 | output = self.out_layer(self.readout(feature)) 56 | 57 | return output 58 | 59 | 60 | """ 61 | 62 | link_state: 63 | link_capacity: float 64 | link_utilization: float 65 | action: mark bw 66 | bw: float 67 | 68 | input: 69 | link_state 70 | pair: [0, 1] => [[0, 0], [1, 1]] 71 | 72 | """ -------------------------------------------------------------------------------- /Images/EVALUATION/Enero_3top_15_B_NEW/Figure_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_5.png -------------------------------------------------------------------------------- /Images/EVALUATION/Enero_3top_15_B_NEW/Figure_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_6.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/ACTORLossEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/ACTORLossEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/AvgRewardEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/AvgRewardEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/AvgStdUtiEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/AvgStdUtiEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/CRITICLossEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/CRITICLossEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/ErrorLinksEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/ErrorLinksEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/Lr_Enero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/Lr_Enero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/MaxLinkUtiEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/MaxLinkUtiEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /Images/TRAINING/Enero_3top_15_B_NEW/MinLinkUtiEnero_3top_15_B_NEW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/MinLinkUtiEnero_3top_15_B_NEW.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Barry0310 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | tags: Git 3 | --- 4 | # DRL-GNN-implement 5 | 6 | - ENERO DRL agent tensorflow ver. rewrite to pytorch 7 | 8 | - Reference: 9 | - [Towards Real-Time Routing Optimization with 10 | Deep Reinforcement Learning: Open Challenges](https://arxiv.org/pdf/2106.09754.pdf) 11 | - [ENERO: Efficient Real-Time WAN Routing 12 | Optimization with Deep Reinforcement Learning](https://arxiv.org/pdf/2109.10883.pdf) 13 | - https://github.com/BNN-UPC/ENERO 14 | 15 | - train: `python train.py` 16 | - eval: `python eval.py` -------------------------------------------------------------------------------- /actor_critic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from Actor import Actor 4 | from Critic import Critic 5 | import torch.optim as optim 6 | from collections import deque 7 | import gc 8 | 9 | 10 | class PPOAC: 11 | def __init__(self, hyper_parameter, device=None): 12 | H = hyper_parameter 13 | self.gae_gamma = H['gae_gamma'] 14 | self.gae_lambda = H['gae_lambda'] 15 | self.clip_value = H['clip_value'] 16 | self.mini_batch = H['mini_batch'] 17 | self.feature_size = H['feature_size'] 18 | self.entropy_beta = H['entropy_beta'] 19 | self.buffer_size = H['buffer_size'] 20 | self.update_times = H['update_times'] 21 | self.actor = Actor(feature_size=self.feature_size, t=H['t'], readout_units=H['readout_units']) 22 | self.critic = Critic(feature_size=self.feature_size, t=H['t'], readout_units=H['readout_units']) 23 | self.optimizer = optim.AdamW(list(self.actor.parameters()) + list(self.critic.parameters()), lr=H['lr']) 24 | self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=H['lr_decay_step'], 25 | gamma=H['lr_decay_rate']) 26 | 27 | self.buffer = deque(maxlen=self.buffer_size) 28 | self.buffer_index = np.arange(self.buffer_size) 29 | self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu") 30 | self.actor.to(self.device) 31 | self.critic.to(self.device) 32 | 33 | def old_cummax(self, alist, extractor): 34 | maxes = torch.tensor([torch.amax(extractor(v)) + 1 for v in alist]) 35 | cummaxes = [torch.zeros_like(maxes[0])] 36 | for i in range(len(maxes) - 1): 37 | cummaxes.append(torch.sum(maxes[0:i + 1])) 38 | return torch.tensor(cummaxes) 39 | 40 | def predict(self, env, src, dst): 41 | list_k_features = [] 42 | 43 | middle_point_list = env.src_dst_k_middlepoints[str(src) + ':' + str(dst)] 44 | for mid in range(len(middle_point_list)): 45 | env.mark_action_sp(src, middle_point_list[mid], src, dst) 46 | if middle_point_list[mid] != dst: 47 | env.mark_action_sp(middle_point_list[mid], dst, src, dst) 48 | features = self.actor_get_graph_features(env) 49 | list_k_features.append(features) 50 | env.edge_state[:, 2] = 0 51 | 52 | graph_ids = [torch.full([list_k_features[it]['link_state'].shape[0]], it) for it in range(len(list_k_features))] 53 | 54 | first_offset = self.old_cummax(list_k_features, lambda v: v['first']) 55 | second_offset = self.old_cummax(list_k_features, lambda v: v['second']) 56 | tensor = { 57 | 'graph_id': torch.cat([v for v in graph_ids], dim=0).to(self.device), 58 | 'link_state': torch.cat([v['link_state'] for v in list_k_features], dim=0).to(self.device), 59 | 'first': torch.cat([v['first'] + m for v, m in zip(list_k_features, first_offset)], dim=0,).to(self.device), 60 | 'second': torch.cat([v['second'] + m for v, m in zip(list_k_features, second_offset)], dim=0).to(self.device), 61 | 'state_dim': self.feature_size, 62 | 'num_actions': len(middle_point_list), 63 | } 64 | q_values = self.actor(tensor) 65 | q_values = torch.reshape(q_values, (-1, )) 66 | soft_max_q_values = torch.nn.functional.softmax(q_values, dim=0) 67 | 68 | return soft_max_q_values, tensor 69 | 70 | def actor_get_graph_features(self, env): 71 | temp = { 72 | 'num_edges': env.numEdges, 73 | 'length': env.firstTrueSize, 74 | 'capacity': env.link_capacity_feature, 75 | 'bw_allocated': env.edge_state[:,2], 76 | 'utilization': np.divide(env.edge_state[:,0], env.edge_state[:, 1]), 77 | 'first': env.first, 78 | 'second': env.second 79 | } 80 | 81 | temp['utilization'] = torch.reshape(torch.tensor(temp['utilization'][0:temp['num_edges']], dtype=torch.float32), 82 | (temp['num_edges'], 1)) 83 | temp['capacity'] = torch.reshape(torch.tensor(temp['capacity'][0:temp['num_edges']], dtype=torch.float32), 84 | (temp['num_edges'], 1)) 85 | temp['bw_allocated'] = torch.reshape(torch.tensor(temp['bw_allocated'][0:temp['num_edges']], 86 | dtype=torch.float32), (temp['num_edges'], 1)) 87 | 88 | hidden_states = torch.cat([temp['utilization'], temp['capacity'], temp['bw_allocated']], dim=1) 89 | link_state = torch.nn.functional.pad(hidden_states, (0, self.feature_size - 3), 'constant') 90 | 91 | inputs = {'link_state': link_state, 'first': torch.tensor(temp['first'][0:temp['length']]), 92 | 'second': torch.tensor(temp['second'][0:temp['length']])} 93 | 94 | return inputs 95 | 96 | def critic_get_graph_features(self, env): 97 | temp = { 98 | 'num_edges': env.numEdges, 99 | 'length': env.firstTrueSize, 100 | 'capacity': env.link_capacity_feature, 101 | 'utilization': np.divide(env.edge_state[:, 0], env.edge_state[:, 1]), 102 | 'first': env.first, 103 | 'second': env.second 104 | } 105 | 106 | temp['utilization'] = torch.reshape(torch.tensor(temp['utilization'][0:temp['num_edges']], dtype=torch.float32), 107 | [temp['num_edges'], 1]) 108 | temp['capacity'] = torch.reshape(torch.tensor(temp['capacity'][0:temp['num_edges']], dtype=torch.float32), 109 | [temp['num_edges'], 1]) 110 | 111 | hidden_states = torch.cat([temp['utilization'], temp['capacity']], dim=1) 112 | link_state = torch.nn.functional.pad(hidden_states, (0, self.feature_size - 2), 'constant') 113 | 114 | inputs = {'link_state': link_state.to(self.device), 115 | 'first': torch.tensor(temp['first'][0:temp['length']]).to(self.device), 116 | 'second': torch.tensor(temp['second'][0:temp['length']]).to(self.device), 117 | 'state_dim': self.feature_size} 118 | 119 | return inputs 120 | 121 | def compute_gae(self, values, masks, rewards): 122 | returns = [] 123 | gae = 0 124 | 125 | for i in reversed(range(len(rewards))): 126 | delta = rewards[i] + self.gae_gamma * values[i+1] * masks[i] - values[i] 127 | gae = delta + self.gae_gamma * self.gae_lambda * masks[i] * gae 128 | returns.insert(0, gae + values[i]) 129 | 130 | adv = np.array(returns) - values[:-1] 131 | 132 | return returns, (adv - np.mean(adv)) / (np.std(adv) + 1e-10) 133 | 134 | def _compute_actor_loss(self, adv, old_act, old_policy_probs, link_state, graph_id, 135 | first, second, state_dim, num_actions): 136 | old_policy_probs = old_policy_probs.detach() 137 | 138 | q_values = self.actor({ 139 | 'graph_id': graph_id, 140 | 'link_state': link_state, 141 | 'first': first, 142 | 'second': second, 143 | 'state_dim': state_dim, 144 | 'num_actions': num_actions, 145 | }) 146 | q_values = torch.reshape(q_values, (-1,)) 147 | new_policy_probs = torch.nn.functional.softmax(q_values, dim=0) 148 | 149 | ratio = torch.exp( 150 | torch.log(torch.sum(old_act * new_policy_probs)) - torch.log(torch.sum(old_act * old_policy_probs)) 151 | ) 152 | surr1 = -ratio*adv 153 | surr2 = -torch.clip(ratio, min=1-0.1, max=1+0.1) * adv 154 | 155 | loss = torch.max(surr1, surr2) 156 | entropy = -torch.sum(torch.log(new_policy_probs) * new_policy_probs) 157 | 158 | return loss, entropy 159 | 160 | def _compute_critic_loss(self, ret, link_state, first, second, state_dim): 161 | 162 | value = self.critic({ 163 | 'link_state': link_state, 164 | 'first': first, 165 | 'second': second, 166 | 'state_dim': state_dim 167 | })[0] 168 | loss = torch.square(ret - value) 169 | 170 | return loss 171 | 172 | def update(self, actions, actions_probs, tensors, critic_features, returns, advantages): 173 | 174 | for pos in range(self.buffer_size): 175 | tensor = tensors[pos] 176 | critic_feature = critic_features[pos] 177 | action = actions[pos] 178 | ret = returns[pos] 179 | adv = advantages[pos] 180 | action_dist = actions_probs[pos] 181 | 182 | update_tensor = { 183 | 'graph_id': tensor['graph_id'], 184 | 'link_state': tensor['link_state'], 185 | 'first': tensor['first'], 186 | 'second': tensor['second'], 187 | 'state_dim': tensor['state_dim'], 188 | 'num_actions': tensor['num_actions'], 189 | 'link_state_critic': critic_feature['link_state'], 190 | 'old_act': action.to(self.device), 191 | 'adv': adv, 192 | 'old_policy_probs': action_dist, 193 | 'first_critic': critic_feature['first'], 194 | 'second_critic': critic_feature['second'], 195 | 'ret': ret, 196 | } 197 | 198 | self.buffer.append(update_tensor) 199 | 200 | for i in range(self.update_times): 201 | np.random.shuffle(self.buffer_index) 202 | for start in range(0, self.buffer_size, self.mini_batch): 203 | end = start + self.mini_batch 204 | entropy = 0 205 | actor_loss = 0 206 | critic_loss = 0 207 | for index in self.buffer_index[start:end]: 208 | sample = self.buffer[index] 209 | 210 | sample_actor_loss, sample_entropy = self._compute_actor_loss(sample['adv'], sample['old_act'], 211 | sample['old_policy_probs'], 212 | sample['link_state'], 213 | sample['graph_id'], sample['first'], 214 | sample['second'], sample['state_dim'], 215 | sample['num_actions']) 216 | sample_critic_loss = self._compute_critic_loss(sample['ret'], sample['link_state_critic'], 217 | sample['first_critic'], sample['second_critic'], 218 | sample['state_dim']) 219 | entropy += sample_entropy 220 | actor_loss += sample_actor_loss 221 | critic_loss += sample_critic_loss 222 | 223 | entropy /= self.mini_batch 224 | actor_loss = actor_loss / self.mini_batch - self.entropy_beta * entropy 225 | critic_loss /= self.mini_batch 226 | 227 | total_loss = actor_loss + critic_loss 228 | self.optimizer.zero_grad() 229 | total_loss.backward() 230 | torch.nn.utils.clip_grad_norm_(list(self.actor.parameters())+list(self.critic.parameters()), 231 | max_norm=self.clip_value) 232 | self.optimizer.step() 233 | 234 | self.buffer.clear() 235 | gc.collect() 236 | return actor_loss, critic_loss 237 | 238 | 239 | -------------------------------------------------------------------------------- /defo_process_results.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import numpy as np 4 | import re 5 | import sys 6 | import networkx as nx 7 | 8 | node_to_index_dic = {} 9 | index_to_node_lst = [] 10 | 11 | def index_to_node(n): 12 | return(index_to_node_lst[n]) 13 | 14 | def node_to_index(node): 15 | return(node_to_index_dic[node]) 16 | 17 | 18 | class Defo_results: 19 | 20 | net_size = 0 21 | MP_matrix = None 22 | ecmp_routing_matrix = None 23 | routing_matrix = None 24 | links_bw = None 25 | links_weight = None 26 | Gbase = None 27 | node_to_index_dic_pvt = None 28 | index_to_node_lst_pvt = None 29 | pre_optim_max_load_link = None 30 | post_optim_max_load_link = None 31 | 32 | def __init__(self, graph_file, results_file): 33 | self.graph_file = graph_file 34 | # We comment it as we don't use the results for now. We focus on SP 35 | #self.results_file = results_file 36 | self.Gbase = nx.MultiDiGraph() 37 | self.process_graph_file() 38 | 39 | #self.process() 40 | 41 | def read_max_load_link (self, standard_out_file): 42 | with open(standard_out_file) as fd: 43 | while (True): 44 | line = fd.readline() 45 | if line.startswith("pre-optimization"): 46 | camps = line.split(" ") 47 | print(camps) 48 | self.pre_optim_max_load_link = float(camps[-1].split('\n')[0]) 49 | elif line.startswith("post-optimization"): 50 | camps = line.split(" ") 51 | self.post_optim_max_load_link = float(camps[-1].split('\n')[0]) 52 | break 53 | return (self.pre_optim_max_load_link, self.post_optim_max_load_link) 54 | 55 | def process_graph_file(self): 56 | with open(self.graph_file) as fd: 57 | line = fd.readline() 58 | camps = line.split(" ") 59 | self.net_size = int(camps[1]) 60 | # Remove : label x y 61 | line = fd.readline() 62 | 63 | for i in range (self.net_size): 64 | line = fd.readline() 65 | node = line[0:line.find(" ")] 66 | node_to_index_dic[node] = i 67 | index_to_node_lst.append(node) 68 | 69 | self.links_bw = [] 70 | self.links_weight = [] 71 | for i in range(self.net_size): 72 | self.links_bw.append({}) 73 | self.links_weight.append({}) 74 | for line in fd: 75 | if (not line.startswith("Link_") and not line.startswith("edge_")): 76 | continue 77 | camps = line.split(" ") 78 | src = int(camps[1]) 79 | dst = int(camps[2]) 80 | weight = int(camps[3]) 81 | bw = float(camps[4]) 82 | self.Gbase.add_edge(src, dst) 83 | self.links_bw[src][dst] = bw 84 | self.links_weight[src][dst] = weight 85 | self.node_to_index_dic_pvt = node_to_index_dic 86 | self.index_to_node_lst_pvt = index_to_node_lst 87 | 88 | def process (self): 89 | with open(self.results_file) as fd: 90 | while (True): 91 | line = fd.readline() 92 | if (line == ""): 93 | break 94 | if (line.startswith("*")): 95 | if (line == "***Next hops priority 2 (sr paths)***\n"): 96 | self._read_middle_points(fd) 97 | if (line == "***Next hops priority 3 (ecmp paths)***\n"): 98 | self._read_ecmp_routing(fd) 99 | break 100 | self._gen_routing_matrix() 101 | 102 | def _read_middle_points(self,fd): 103 | self.MP_matrix = np.zeros((self.net_size,self.net_size),dtype="object") 104 | while (True): 105 | pos = fd.tell() 106 | line = fd.readline() 107 | if (line.startswith("*")): 108 | fd.seek(pos) 109 | return 110 | if (not line.startswith("seq")): 111 | continue 112 | line = line[line.find(": ")+2:] 113 | if (line[-1]=='\n'): 114 | line = line[:-1] 115 | 116 | ptr = 0 117 | mp_path = [] 118 | while (True): 119 | prev_ptr = ptr 120 | ptr = line.find(" -> ",ptr) 121 | if (ptr == -1): 122 | mp_path.append(line[prev_ptr:]) 123 | break 124 | else: 125 | mp_path.append(line[prev_ptr:ptr]) 126 | ptr += 4 127 | src = node_to_index(mp_path[0]) 128 | dst = node_to_index(mp_path[-1]) 129 | self.MP_matrix[src,dst] = mp_path 130 | 131 | 132 | def _read_ecmp_routing(self,fd): 133 | self.ecmp_routing_matrix = np.zeros((self.net_size,self.net_size),dtype="object") 134 | next_node_matrix = np.zeros((self.net_size,self.net_size),dtype="object") 135 | dst_node = None 136 | while (True): 137 | line = fd.readline() 138 | if (line == ""): 139 | break 140 | if (line.startswith("Destination")): 141 | dst_node_str = line[line.find(" ")+1:-1] 142 | dst_node = node_to_index(dst_node_str) 143 | if (line.startswith("node")): 144 | src_node_str = line[6:line.find(", ")] 145 | src_node = node_to_index(src_node_str) 146 | sub_line = line[line.find("[")+1:line.find("]")] 147 | ptr = 0 148 | next_node_lst = [] 149 | while (True): 150 | prev_ptr = ptr 151 | ptr = sub_line.find(", ",ptr) 152 | if (ptr == -1): 153 | next_node_lst.append(sub_line[prev_ptr:]) 154 | break 155 | else: 156 | next_node_lst.append(sub_line[prev_ptr:ptr]) 157 | ptr += 2 158 | 159 | next_node_matrix[src_node,dst_node] = next_node_lst 160 | 161 | for i in range (self.net_size): 162 | for j in range (self.net_size): 163 | end_paths = [] 164 | paths_info = [{"path":[index_to_node(i)],"proportion":1.0}] 165 | while (len(paths_info) != 0): 166 | for path_info in paths_info: 167 | path = path_info["path"] 168 | if (node_to_index(path[-1]) == j): 169 | paths_info.remove(path_info) 170 | end_paths.append(path_info) 171 | continue 172 | next_lst = next_node_matrix[node_to_index(path[-1]),j] 173 | num_next_hops = len(next_lst) 174 | if (num_next_hops > 1): 175 | for next_node in next_lst: 176 | new_path = list(path) 177 | new_path.append(next_node) 178 | paths_info.append({"path":new_path,"proportion":path_info["proportion"]/num_next_hops}) 179 | paths_info.remove(path_info) 180 | else: 181 | path.append(next_lst[0]) 182 | self.ecmp_routing_matrix[i,j] = end_paths 183 | 184 | def _gen_routing_matrix(self): 185 | self.routing_matrix = np.zeros((self.net_size,self.net_size),dtype="object") 186 | for i in range(self.net_size): 187 | for j in range(self.net_size): 188 | if (i == j): 189 | continue 190 | end_path_info_list = [] 191 | mp_path = self.MP_matrix[i,j] 192 | #print (i,j,mp_path) 193 | src_mp = mp_path[0] 194 | for mp in mp_path: 195 | dst_mp = mp 196 | sub_path_info_lst = self.ecmp_routing_matrix[node_to_index(src_mp),node_to_index(dst_mp)] 197 | if (len(end_path_info_list) == 0): 198 | for sub_path_info in sub_path_info_lst: 199 | end_path_info_list.append({"path":sub_path_info["path"][:-1],"proportion":sub_path_info["proportion"]}) 200 | elif (len(sub_path_info_lst) > 1): 201 | aux_end_path_list = [] 202 | for path_info in end_path_info_list: 203 | for sub_path_info in sub_path_info_lst: 204 | new_path = list(path_info["path"]) 205 | new_path.extend(sub_path_info["path"][:-1]) 206 | aux_end_path_list.append({"path":new_path,"proportion":path_info["proportion"]*sub_path_info["proportion"]}) 207 | end_path_info_list = aux_end_path_list 208 | else: 209 | for path_info in end_path_info_list: 210 | path_info["path"].extend(sub_path_info_lst[0]["path"][:-1]) 211 | src_mp = dst_mp 212 | for path_info in end_path_info_list: 213 | path_info["path"].append(dst_mp) 214 | self.routing_matrix[i,j] = end_path_info_list 215 | 216 | def _get_traffic_matrix (self,traffic_file): 217 | tm = np.zeros((self.net_size,self.net_size)) 218 | with open(traffic_file) as fd: 219 | fd.readline() 220 | fd.readline() 221 | for line in fd: 222 | camps = line.split(" ") 223 | # We force that the bws are integers 224 | tm[int(camps[1]),int(camps[2])] = np.floor(float(camps[3])) 225 | return (tm) 226 | 227 | def _link_utilization(self, routing_matrix, traffic_file): 228 | link_utilization = [] 229 | traffic_matrix = self._get_traffic_matrix(traffic_file) 230 | for i in range(self.net_size): 231 | link_utilization.append({}) 232 | for i in range(self.net_size): 233 | for j in range (self.net_size): 234 | if (i==j): 235 | continue 236 | traffic_all_path = traffic_matrix[i,j] 237 | routings_lst = routing_matrix[i,j] 238 | for path_info in routings_lst: 239 | path = path_info["path"] 240 | traffic = traffic_all_path*path_info["proportion"] 241 | n0 = path[0] 242 | for n1 in path[1:]: 243 | N0 = node_to_index(n0) 244 | N1 = node_to_index(n1) 245 | if N1 in link_utilization[N0]: 246 | link_utilization[N0][N1] += traffic 247 | else: 248 | link_utilization[N0][N1] = traffic 249 | n0 = n1 250 | max_lu = (0,0,0) 251 | for i in range(self.net_size): 252 | for j in link_utilization[i].keys(): 253 | link_traffic = link_utilization[i][j] 254 | link_capacity = self.links_bw[i][j] 255 | link_utilization[i][j] = link_traffic / link_capacity 256 | if (link_utilization[i][j] > max_lu[0]): 257 | max_lu = (link_utilization[i][j], i, j) 258 | #return (link_utilization, max_lu) 259 | return (max_lu) 260 | 261 | def get_opt_link_utilization(self,traffic_file): 262 | return (self._link_utilization(self.routing_matrix,traffic_file)) 263 | 264 | def get_direct_link_utilization(self,traffic_file): 265 | return (self._link_utilization(self.ecmp_routing_matrix,traffic_file)) 266 | 267 | if (__name__ == "__main__"): 268 | 269 | args = sys.argv 270 | if ("-h" in args): 271 | print ("HELP: python3 ./defo_process_results.py ") 272 | exit() 273 | 274 | # graph_file = args[1] 275 | # results_file = args[2] 276 | # tm_file = args[3] 277 | 278 | # results = Defo_results(graph_file,results_file) 279 | 280 | # print ("============== Direct =====================") 281 | # print (results.get_direct_link_utilization(tm_file)) 282 | # print ("============== Optim =====================") 283 | # print (results.get_opt_link_utilization(tm_file)) 284 | 285 | for tm_id in range(1): 286 | graph_topology_name = "VisionNet" 287 | graph_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/"+graph_topology_name+".graph" 288 | results_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/res_"+graph_topology_name+"_"+str(tm_id) 289 | tm_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/"+graph_topology_name+"."+str(tm_id)+".demands" 290 | results = Defo_results(graph_file,results_file) 291 | num_demands_changed = 0 292 | for i in range(results.net_size): 293 | for j in range (results.net_size): 294 | if (i!=j): 295 | if len(results.MP_matrix[i,j])>2: 296 | num_demands_changed+=1 297 | print("For tm_id: ", tm_id, " we have changed ", num_demands_changed, " demands") 298 | 299 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | if __name__ == "__main__": 4 | topo = ["NEW_EliBackbone/EVALUATE", "NEW_Janetbackbone/EVALUATE", "NEW_HurricaneElectric/EVALUATE"] 5 | log = "Enero_3top_15_B_NEW" 6 | subprocess.call(["python", "parse_PPO.py", "-d", "./Logs/exp" + log + "Logs.txt"]) 7 | for t in topo: 8 | subprocess.call(["python", "eval_on_single_topology.py", 9 | "-max_edge", "100", "-min_edge", "5", 10 | "-max_nodes", "30", "-min_nodes", "1", 11 | "-n", "2", 12 | "-f1", "results_single_top", "-f2", t, 13 | "-d", f"./Logs/exp{log}Logs.txt"]) 14 | subprocess.call(["python", "figures_5_and_6.py", "-d", log]) 15 | -------------------------------------------------------------------------------- /eval_on_single_topology.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import argparse 4 | from multiprocessing import Pool 5 | 6 | def worker_execute(args): 7 | tm_id = args[0] 8 | model_id = args[1] 9 | drl_eval_res_folder = args[2] 10 | differentiation_str = args[3] 11 | graph_topology_name = args[4] 12 | general_dataset_folder = args[5] 13 | specific_dataset_folder = args[6] 14 | 15 | subprocess.call(["python", "script_eval_on_single_topology.py", "-t", str(tm_id), "-m", str(model_id), "-g", graph_topology_name, "-o", drl_eval_res_folder, "-d", differentiation_str, "-f", general_dataset_folder, "-f2", specific_dataset_folder]) 16 | 17 | if __name__ == "__main__": 18 | # First we execute this script to evaluate our drl agent over different topologies from the folder (argument -f2) 19 | # python eval_on_single_topology.py -max_edge 100 -min_edge 5 -max_nodes 30 -min_nodes 1 -n 2 -f1 results_single_top -f2 NEW_Garr199905/EVALUATE -d ./Logs/expSP_3top_15_B_NEWLogs.txt 20 | # To parse the results of this script, we must then execute the parse_middrouting_files.py file 21 | 22 | # Parse logs and get best model 23 | parser = argparse.ArgumentParser(description='Parse file and create plots') 24 | 25 | parser.add_argument('-d', help='logs data file', type=str, required=True, nargs='+') 26 | parser.add_argument('-f1', help='Dataset name within dataset_sing_top', type=str, required=True, nargs='+') 27 | parser.add_argument('-f2', help='specific dataset folder name of the topology to evaluate on', type=str, required=True, nargs='+') 28 | parser.add_argument('-max_edge', help='maximum number of edges the topology can have', type=int, required=True, nargs='+') 29 | parser.add_argument('-min_edge', help='minimum number of edges the topology can have', type=int, required=True, nargs='+') 30 | parser.add_argument('-max_nodes', help='minimum number of nodes the topology can have', type=int, required=True, nargs='+') 31 | parser.add_argument('-min_nodes', help='minimum number of nodes the topology can have', type=int, required=True, nargs='+') 32 | parser.add_argument('-n', help='number of processes to use for the pool (number of DEFO instances running at the same time)', type=int, required=True, nargs='+') 33 | 34 | args = parser.parse_args() 35 | 36 | aux = args.d[0].split(".") 37 | aux = aux[1].split("exp") 38 | differentiation_str = str(aux[1].split("Logs")[0]) 39 | 40 | # Point to the folder were the datasets of argument f2 are located 41 | general_dataset_folder = "../Enero_datasets/dataset_sing_top/data/results_my_3_tops_unif_05-1/"+args.f2[0]+"/" 42 | # In this folder we store the rewards that later will be parsed for plotting 43 | drl_eval_res_folder = "../Enero_datasets/dataset_sing_top/data/"+args.f1[0]+"/evalRes_"+args.f2[0]+"/" 44 | 45 | if not os.path.exists("./Images"): 46 | os.makedirs("./Images") 47 | 48 | if not os.path.exists(drl_eval_res_folder): 49 | os.makedirs(drl_eval_res_folder) 50 | 51 | if not os.path.exists(drl_eval_res_folder+differentiation_str): 52 | os.makedirs(drl_eval_res_folder+differentiation_str) 53 | else: 54 | os.system("rm -rf %s" % (drl_eval_res_folder+differentiation_str)) 55 | os.makedirs(drl_eval_res_folder+differentiation_str) 56 | 57 | model_id = 0 58 | # Load best model 59 | with open(args.d[0]) as fp: 60 | for line in reversed(list(fp)): 61 | arrayLine = line.split(":") 62 | if arrayLine[0]=='MAX REWD': 63 | model_id = int(arrayLine[2].split(",")[0]) 64 | break 65 | 66 | # Iterate over all topologies and evaluate our DRL agent on all TMs 67 | for subdir, dirs, files in os.walk(general_dataset_folder): 68 | for file in files: 69 | if file.endswith((".graph")): 70 | topology_num_nodes = 0 71 | with open(general_dataset_folder+file) as fd: 72 | # Loop to read the Number of NODES and EDGES 73 | while (True): 74 | line = fd.readline() 75 | if (line == ""): 76 | break 77 | if (line.startswith("NODES")): 78 | topology_num_nodes = int(line.split(' ')[1]) 79 | 80 | # If we are inside the range of number of nodes 81 | if topology_num_nodes>=args.min_nodes[0] and topology_num_nodes<=args.max_nodes[0]: 82 | if (line.startswith("EDGES")): 83 | topology_num_edges = int(line.split(' ')[1]) 84 | # If we are inside the range of number of edges 85 | if topology_num_edges<=args.max_edge[0] and topology_num_edges>=args.min_edge[0]: 86 | topology_Name = file.split('.')[0] 87 | print("*****") 88 | print("***** Evaluating on file: "+file+" with number of edges "+str(topology_num_edges)) 89 | print("*****") 90 | argums = [(tm_id, model_id, drl_eval_res_folder, differentiation_str, topology_Name, general_dataset_folder, args.f2[0]) for tm_id in range(50)] 91 | with Pool(processes=args.n[0]) as pool: 92 | pool.map(worker_execute, argums) 93 | else: 94 | break 95 | 96 | -------------------------------------------------------------------------------- /figures_5_and_6.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pandas as pd 4 | import seaborn as sns 5 | from itertools import cycle 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import pickle 9 | 10 | def smooth(scalars, weight): # Weight between 0 and 1 11 | last = scalars[0] # First value in the plot (first timestep) 12 | smoothed = list() 13 | for point in scalars: 14 | smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value 15 | smoothed.append(smoothed_val) # Save it 16 | last = smoothed_val # Anchor the last smoothed value 17 | 18 | return smoothed 19 | 20 | def frange(x, y, jump): 21 | while x < y: 22 | yield x 23 | x += jump 24 | 25 | #folders = ["../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_Garr199905/EVALUATE/"] 26 | folders = ["../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_EliBackbone/EVALUATE/","../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_Janetbackbone/EVALUATE/","../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_HurricaneElectric/EVALUATE/"] 27 | 28 | if __name__ == "__main__": 29 | # This script is to plot the Figures 5 and 6 from COMNET 2022 paper. 30 | 31 | # Before executing this file we must execute the eval_on_single_topology.py file to evaluate the DRL model and store the results 32 | # We also need to evaluate DEFO for these new topologies. To do this, I copy the corresponding 33 | # folder where it needs to be and I execute the script run_Defo_single_top.py for each topology. 34 | # python figures_5_and_6.py -d SP_3top_15_B_NEW 35 | parser = argparse.ArgumentParser(description='Parse files and create plots') 36 | 37 | # The flag 'd' indicates the directory where to store the figures 38 | parser.add_argument('-d', help='differentiation string for the model', type=str, required=True, nargs='+') 39 | 40 | args = parser.parse_args() 41 | 42 | differentiation_str = args.d[0] 43 | 44 | drl_top1_uti = [] 45 | ls_top1_uti = [] 46 | enero_top1_uti = [] 47 | cost_drl_top1 = [] 48 | cost_ls_top1 = [] 49 | cost_enero_top1 = [] 50 | 51 | drl_top2_uti = [] 52 | ls_top2_uti = [] 53 | enero_top2_uti = [] 54 | cost_drl_top2 = [] 55 | cost_ls_top2 = [] 56 | cost_enero_top2 = [] 57 | 58 | drl_top3_uti = [] 59 | ls_top3_uti = [] 60 | enero_top3_uti = [] 61 | cost_drl_top3 = [] 62 | cost_ls_top3 = [] 63 | cost_enero_top3 = [] 64 | 65 | if not os.path.exists("./Images"): 66 | os.makedirs("./Images") 67 | 68 | path_to_dir = "./Images/EVALUATION/"+differentiation_str+'/' 69 | 70 | if not os.path.exists(path_to_dir): 71 | os.makedirs(path_to_dir) 72 | 73 | dd_Eli = pd.DataFrame(columns=['AC','LS','Enero','Topologies']) 74 | dd_Janet = pd.DataFrame(columns=['AC','LS','Enero','Topologies']) 75 | dd_Hurricane = pd.DataFrame(columns=['AC','LS','Enero','Topologies']) 76 | 77 | # Iterate over all topologies and evaluate our DRL agent on all TMs 78 | for folder in folders: 79 | drl_eval_res_folder = folder+differentiation_str+'/' 80 | topology_eval_name = folder.split('NEW_')[1].split('/')[0] 81 | for subdir, dirs, files in os.walk(drl_eval_res_folder): 82 | it = 0 83 | for file in files: 84 | if file.endswith((".pckl")): 85 | results = [] 86 | path_to_pckl_rewards = drl_eval_res_folder + topology_eval_name + '/' 87 | with open(path_to_pckl_rewards+file, 'rb') as f: 88 | results = pickle.load(f) 89 | if folder==folders[0]: 90 | dd_Eli.loc[it] = [results[9],results[7],results[3],topology_eval_name] 91 | cost_ls_top1.append(results[15]) 92 | cost_drl_top1.append(results[14]) 93 | cost_enero_top1.append(results[16]) 94 | elif folder==folders[1]: 95 | dd_Janet.loc[it] = [results[9],results[7],results[3],topology_eval_name] 96 | cost_ls_top2.append(results[15]) 97 | cost_drl_top2.append(results[14]) 98 | cost_enero_top2.append(results[16]) 99 | else: 100 | dd_Hurricane.loc[it] = [results[9],results[7],results[3],topology_eval_name] 101 | cost_ls_top3.append(results[15]) 102 | cost_drl_top3.append(results[14]) 103 | cost_enero_top3.append(results[16]) 104 | it += 1 105 | 106 | plt.rcParams['axes.titlesize'] = 20 107 | plt.rcParams['figure.figsize'] = (11.5, 9) 108 | plt.rcParams['xtick.labelsize'] = 22 109 | plt.rcParams['ytick.labelsize'] = 22 110 | plt.rcParams['legend.fontsize'] = 17 111 | fig, ax = plt.subplots() 112 | 113 | n = np.arange(1,len(cost_ls_top1)+1) / np.float(len(cost_ls_top1)) 114 | Xs = np.sort(cost_ls_top1) 115 | ax.step(Xs,n, c='cyan', linestyle=(0, (1,1)), label="LS EliBackbone", linewidth=4) 116 | Xs = np.sort(cost_drl_top1) 117 | ax.step(Xs,n,c='darkgreen', linestyle='-', label="DRL EliBackbone", linewidth=4) 118 | Xs = np.sort(cost_enero_top1) 119 | ax.step(Xs,n,c='maroon', linestyle=(0, (2.5, 1)),label="Enero EliBackbone", linewidth=4) 120 | Xs = np.sort(cost_ls_top2) 121 | ax.step(Xs,n, c='dodgerblue', linestyle=(0, (1, 2.5)),label="LS Janetbackbone", linewidth=4) 122 | Xs = np.sort(cost_drl_top2) 123 | ax.step(Xs,n,c='lime', linestyle='-',label="DRL Janetbackbone", linewidth=4) 124 | Xs = np.sort(cost_enero_top2) 125 | ax.step(Xs,n,c='red', linestyle=(0, (2.5, 3)),label="Enero Janetbackbone", linewidth=4) 126 | Xs = np.sort(cost_ls_top3) 127 | ax.step(Xs,n, c='navy', linestyle=(0, (1,6)),label="LS HurricaneElectric", linewidth=4) 128 | Xs = np.sort(cost_drl_top3) 129 | ax.step(Xs,n,c='palegreen', linestyle='-',label="DRL HurricaneElectric", linewidth=4) 130 | Xs = np.sort(cost_enero_top3) 131 | ax.step(Xs,n,c='orange', linestyle=(0, (2.5, 6)),label="Enero HurricaneElectric", linewidth=4) 132 | 133 | plt.ylim((0, 1.005)) 134 | plt.xlim((0, 50.0)) 135 | plt.xticks(np.arange(0, 50, 8)) 136 | plt.ylabel('CDF', fontsize=22) 137 | plt.xlabel("Execution Cost (s)", fontsize=20) 138 | plt.grid(color='gray') 139 | plt.legend(loc='lower right', ncol=3, bbox_to_anchor=(1.03, -0.3)) 140 | plt.tight_layout() 141 | plt.savefig(path_to_dir+'Figure_6.png', bbox_inches='tight',pad_inches = 0) 142 | plt.close() 143 | 144 | 145 | # Define some hatches 146 | hatches = cycle(['-', '|', '']) 147 | cdf = pd.concat([dd_Eli,dd_Janet,dd_Hurricane]) 148 | mdf = pd.melt(cdf, id_vars=['Topologies'], var_name=['Topology']) # MELT 149 | ax = sns.boxplot(x="Topologies", y="value", hue="Topology", data=mdf, palette="mako") # RUN PLOT 150 | plt.rcParams['axes.grid'] = True 151 | plt.rcParams['figure.figsize'] = (3.47, 2.0) 152 | plt.rcParams['axes.titlesize'] = 22 153 | plt.rcParams['xtick.labelsize'] = 22 154 | plt.rcParams['ytick.labelsize'] = 22 155 | plt.rcParams['legend.fontsize'] = 24 156 | ax.set_xlabel("",fontsize=0) 157 | ax.set_ylabel("Maximum Link Utilization",fontsize=24) 158 | plt.rcParams["axes.labelweight"] = "bold" 159 | ax.grid(which='major', axis='y', linestyle='-') 160 | plt.rcParams.update({'font.size': 22}) 161 | plt.rcParams['pdf.fonttype'] = 42 162 | # Loop over the bars 163 | for i, patch in enumerate(ax.artists): 164 | # Boxes from left to right 165 | hatch = next(hatches) 166 | patch.set_hatch(hatch*2) 167 | col = patch.get_facecolor() 168 | #patch.set_edgecolor(col) 169 | patch.set_edgecolor("black") 170 | patch.set_facecolor('None') 171 | 172 | # Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.) 173 | # Loop over them here, and use the same colour as above 174 | for j in range(i * 6, i * 6 + 6): 175 | line = ax.lines[j] 176 | line.set_color("black") 177 | line.set_mfc("black") 178 | line.set_mec("black") 179 | # Change color of the median 180 | if j == i*6+4: 181 | line.set_color("orange") 182 | line.set_mfc("orange") 183 | line.set_mec("orange") 184 | 185 | for i, patch in enumerate(ax.patches): 186 | hatch = next(hatches) 187 | patch.set_hatch(hatch*2) 188 | col = patch.get_facecolor() 189 | #patch.set_edgecolor(col) 190 | patch.set_edgecolor("black") 191 | patch.set_facecolor('None') 192 | 193 | plt.legend(loc='upper left', ncol=3) 194 | plt.ylim((0.5, 1.2)) 195 | plt.tight_layout() 196 | plt.savefig(path_to_dir+'Figure_5.png', bbox_inches='tight',pad_inches = 0) 197 | plt.clf() 198 | plt.close() 199 | -------------------------------------------------------------------------------- /gym-graph/gym_graph/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | 4 | register( 5 | id='GraphEnv-v15', 6 | entry_point='gym_graph.envs:Env15', 7 | ) 8 | 9 | register( 10 | id='GraphEnv-v16', 11 | entry_point='gym_graph.envs:Env16', 12 | ) 13 | 14 | register( 15 | id='GraphEnv-v20', 16 | entry_point='gym_graph.envs:Env20', 17 | ) 18 | -------------------------------------------------------------------------------- /gym-graph/gym_graph/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_graph.envs.environment15 import Env15 2 | from gym_graph.envs.environment16 import Env16 3 | from gym_graph.envs.environment20 import Env20 -------------------------------------------------------------------------------- /gym-graph/gym_graph/envs/environment15.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import networkx as nx 4 | import random 5 | from gym import error, spaces, utils 6 | from random import choice 7 | import pandas as pd 8 | import pickle 9 | import json 10 | import os.path 11 | import gc 12 | import defo_process_results as defoResults 13 | 14 | class Env15(gym.Env): 15 | """ 16 | Environment used for the simulated annealing and hill climbing benchmarks in the 17 | script_eval_on_single_topology.py with SP only! No ecmp at all here! 18 | 19 | Environment used in the middlepoint routing problem using SP to reach a middlepoint. 20 | We are using bidirectional links in this environment! 21 | self.edge_state[:][0] = link utilization 22 | self.edge_state[:][1] = link capacity 23 | """ 24 | def __init__(self): 25 | self.graph = None # Here we store the graph as DiGraph (without repeated edges) 26 | self.source = None 27 | self.destination = None 28 | self.demand = None 29 | 30 | self.edge_state = None 31 | self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset 32 | self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 33 | 34 | self.diameter = None 35 | self.list_of_demands_to_change = None # Eligible demands coming from the DRL agent 36 | 37 | # Nx Graph where the nodes have features. Betweenness is allways normalized. 38 | # The other features are "raw" and are being normalized before prediction 39 | self.between_feature = None 40 | 41 | self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint 42 | self.shortest_paths = None # For each src,dst we store the shortest path to reach d 43 | 44 | # Mean and standard deviation of link betweenness 45 | self.mu_bet = None 46 | self.std_bet = None 47 | 48 | # Episode length in timesteps 49 | self.episode_length = None 50 | 51 | self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent. 52 | self.num_critical_links = 5 53 | 54 | # Error at the end of episode to evaluate the learning process 55 | self.error_evaluation = None 56 | # Ideal target link capacity: self.sumTM/self.numEdges 57 | self.target_link_capacity = None 58 | 59 | self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst 60 | self.meanTM = None 61 | self.stdTM = None 62 | self.sumTM = None 63 | self.routing = None # Loaded routing matrix 64 | self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair 65 | 66 | self.K = None 67 | self.nodes = None # List of nodes to pick randomly from them 68 | self.ordered_edges = None 69 | self.edgesDict = dict() # Stores the position id of each edge in order 70 | self.previous_path = None 71 | 72 | self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints 73 | self.node_to_index_dic = None # For each node from the real graph we store it's index 74 | self.index_to_node_lst = None # We store a list of nodes in an ordered fashion 75 | 76 | self.numNodes = None 77 | self.numEdges = None 78 | self.numSteps = 0 # As our problem can go forever, we limit it to 10 steps 79 | 80 | self.sameLink = False # Indicates if we are working with the same link 81 | 82 | # We store the edge that has maximum utilization 83 | # (src, dst, MaxUtilization) 84 | self.edgeMaxUti = None 85 | # We store the path with more bandwidth from the edge with maximum utilization 86 | # (src, dst, MaxBandwidth) 87 | self.patMaxBandwth = None 88 | self.maxBandwidth = None 89 | 90 | self.episode_over = True 91 | self.reward = 0 92 | self.allPaths = dict() # Stores the paths for each src:dst pair 93 | 94 | def seed(self, seed): 95 | random.seed(seed) 96 | np.random.seed(seed) 97 | 98 | def add_features_to_edges(self): 99 | incId = 1 100 | for node in self.graph: 101 | for adj in self.graph[node]: 102 | if not 'edgeId' in self.graph[node][adj][0]: 103 | self.graph[node][adj][0]['edgeId'] = incId 104 | if not 'numsp' in self.graph[node][adj][0]: 105 | self.graph[node][adj][0]['numsp'] = 0 106 | if not 'utilization' in self.graph[node][adj][0]: 107 | self.graph[node][adj][0]['utilization'] = 0 108 | if not 'capacity' in self.graph[node][adj][0]: 109 | self.graph[node][adj][0]['capacity'] = 0 110 | if not 'weight' in self.graph[node][adj][0]: 111 | self.graph[node][adj][0]['weight'] = 0 112 | if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge 113 | self.graph[node][adj][0]['crossing_paths'] = dict() 114 | incId = incId + 1 115 | 116 | def decrease_links_utilization_sp(self, src, dst, init_source, final_destination): 117 | # In this function we desallocate the bandwidth by segments. This funcion is used when we want 118 | # to desallocate from a src to a middlepoint and then from middlepoint to a dst using the sp 119 | 120 | # We obtain the demand from the original source,destination pair 121 | bw_allocated = self.TM[init_source][final_destination] 122 | currentPath = self.shortest_paths[src,dst] 123 | 124 | i = 0 125 | j = 1 126 | while (j < len(currentPath)): 127 | firstNode = currentPath[i] 128 | secondNode = currentPath[j] 129 | 130 | self.graph[firstNode][secondNode][0]['utilization'] -= bw_allocated 131 | if str(init_source)+':'+str(final_destination) in self.graph[firstNode][secondNode][0]['crossing_paths']: 132 | del self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] 133 | self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization'] 134 | i = i + 1 135 | j = j + 1 136 | 137 | def _generate_tm(self, tm_id): 138 | # Sample a file randomly to initialize the tm 139 | graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph" 140 | # This 'results_file' file is ignored! 141 | results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id) 142 | tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands" 143 | 144 | self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file) 145 | self.links_bw = self.defoDatasetAPI.links_bw 146 | self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file) 147 | 148 | self.maxBandwidth = np.amax(self.TM) 149 | 150 | traffic = np.copy(self.TM) 151 | # Remove diagonal from matrix 152 | traffic = traffic[~np.eye(traffic.shape[0], dtype=bool)].reshape(traffic.shape[0], -1) 153 | 154 | self.sumTM = np.sum(traffic) 155 | self.target_link_capacity = self.sumTM/self.numEdges 156 | self.meanTM = np.mean(traffic) 157 | self.stdTM = np.std(traffic) 158 | 159 | def compute_link_utilization_reset_sp(self): 160 | # Compute the paths that cross each link and then add up the bandwidth to obtain the link utilization 161 | for src in range (0,self.numNodes): 162 | for dst in range (0,self.numNodes): 163 | if src!=dst: 164 | self.allocate_to_destination_sp(src, dst, src, dst) 165 | 166 | def mark_edges(self, action_flags, src, dst, init_source, final_destination): 167 | currentPath = self.shortest_paths[src,dst] 168 | 169 | i = 0 170 | j = 1 171 | 172 | while (j < len(currentPath)): 173 | firstNode = currentPath[i] 174 | secondNode = currentPath[j] 175 | 176 | action_flags[self.edgesDict[str(firstNode)+':'+str(secondNode)]] += 1.0 177 | i = i + 1 178 | j = j + 1 179 | 180 | 181 | def mark_action_to_edges(self, first_node, init_source, final_destination): 182 | # In this function we mark for each link which is the bw that it will allocate. This we will 183 | # use to avoid repeated actions 184 | action_flags = np.zeros(self.numEdges) 185 | 186 | # Mark until first_node 187 | self.mark_edges(action_flags, init_source, first_node, init_source, final_destination) 188 | 189 | # If the first node is a middlepoint 190 | if first_node!=final_destination: 191 | self.mark_edges(action_flags, first_node, final_destination, init_source, final_destination) 192 | 193 | return action_flags 194 | 195 | def compute_middlepoint_set_remove_rep_actions_no_loop(self): 196 | # In this function we compute the middlepoint set but we don't take into account the middlepoints whose 197 | # actions are repeated and neither those middlepoints whose SPs pass over the DST node 198 | 199 | # Compute SPs for each src,dst pair 200 | self.compute_SPs() 201 | 202 | # We compute the middlepoint set for each src,dst pair and we don't consider repeated actions 203 | self.src_dst_k_middlepoints = dict() 204 | # Iterate over all node1,node2 pairs from the graph 205 | for n1 in range (0,self.numNodes): 206 | for n2 in range (0,self.numNodes): 207 | if (n1 != n2): 208 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)] = list() 209 | repeated_actions = list() 210 | for midd in range (0,self.K): 211 | # If the middlepoint is not the source node 212 | if midd!=n1: 213 | action_flags = self.mark_action_to_edges(midd, n1, n2) 214 | # If we allocated to a middlepoint that is not the final destination 215 | if midd!=n2: 216 | # If the repeated_actions list is empty we make the following verifications 217 | if len(repeated_actions) == 0: 218 | 219 | path1 = self.shortest_paths[n1, midd] 220 | path2 = self.shortest_paths[midd, n2] 221 | 222 | # Check that the dst node is not in the SP to avoid loops! 223 | currentPath = path1[:len(path1)-1]+path2 224 | dst_counter = 0 225 | for node in currentPath: 226 | if node==n2 or node==n1: 227 | dst_counter += 1 228 | # If there is only one dst node 229 | if dst_counter==2: 230 | repeated_actions.append(action_flags) 231 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd) 232 | else: 233 | repeatedAction = False 234 | # Compare the current action with the previous ones 235 | for previous_actions in repeated_actions: 236 | subtraction = np.absolute(np.subtract(action_flags,previous_actions)) 237 | if np.sum(subtraction)==0.0: 238 | repeatedAction = True 239 | break 240 | # If we didn't find any identical action, we make the following verifications 241 | if not repeatedAction: 242 | path1 = self.shortest_paths[n1, midd] 243 | path2 = self.shortest_paths[midd, n2] 244 | # Check that the dst node is not in the SP to avoid loops! 245 | currentPath = path1[:len(path1)-1]+path2 246 | dst_counter = 0 247 | for node in currentPath: 248 | if node==n2 or node==n1: 249 | dst_counter += 1 250 | # If there is only one dst node 251 | if dst_counter==2: 252 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd) 253 | repeated_actions.append(action_flags) 254 | 255 | else: 256 | # If it's the first action we add it to the repeated actions list 257 | if len(repeated_actions) == 0: 258 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd) 259 | repeated_actions.append(action_flags) 260 | else: 261 | repeatedAction = False 262 | # Compare the current action with the previous ones 263 | for previous_actions in repeated_actions: 264 | subtraction = np.absolute(np.subtract(action_flags,previous_actions)) 265 | if np.sum(subtraction)==0.0: 266 | repeatedAction = True 267 | break 268 | 269 | # If we didn't find any identical action, we add the middlepoint to the set 270 | if not repeatedAction: 271 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd) 272 | repeated_actions.append(action_flags) 273 | 274 | def compute_SPs(self): 275 | diameter = nx.diameter(self.graph) 276 | self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype=object) 277 | 278 | allPaths = dict() 279 | sp_path = self.dataset_folder_name+"/shortest_paths.json" 280 | 281 | if not os.path.isfile(sp_path): 282 | for n1 in range (0,self.numNodes): 283 | for n2 in range (0,self.numNodes): 284 | if (n1 != n2): 285 | allPaths[str(n1)+':'+str(n2)] = [] 286 | # First we compute the shortest paths taking into account the diameter 287 | [allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=diameter*2)] # We take all the paths from n1 to n2 and we order them according to the path length 288 | # sorted() ordena los paths de menor a mayor numero de 289 | # saltos y los que tienen los mismos saltos te los ordena por indice 290 | aux_sorted_paths = sorted(allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item)) # self.shortest_paths[n1,n2] = nx.shortest_path(self.graph, n1, n2,weight='weight') 291 | allPaths[str(n1)+':'+str(n2)] = aux_sorted_paths[0] 292 | 293 | with open(sp_path, 'w') as fp: 294 | json.dump(allPaths, fp) 295 | else: 296 | allPaths = json.load(open(sp_path)) 297 | 298 | for n1 in range (0,self.numNodes): 299 | for n2 in range (0,self.numNodes): 300 | if (n1 != n2): 301 | self.shortest_paths[n1,n2] = allPaths[str(n1)+':'+str(n2)] 302 | 303 | def generate_environment(self, dataset_folder_name, graph_topology_name, EPISODE_LENGTH, K, percentage_demands): 304 | self.episode_length = EPISODE_LENGTH 305 | self.graph_topology_name = graph_topology_name 306 | self.dataset_folder_name = dataset_folder_name 307 | self.list_eligible_demands = list() 308 | self.percentage_demands = percentage_demands 309 | 310 | self.maxCapacity = 0 # We take the maximum capacity to normalize 311 | 312 | graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph" 313 | results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_0" 314 | tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+".0.demands" 315 | self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file) 316 | 317 | self.node_to_index_dic = self.defoDatasetAPI.node_to_index_dic_pvt 318 | self.index_to_node_lst = self.defoDatasetAPI.index_to_node_lst_pvt 319 | 320 | self.graph = self.defoDatasetAPI.Gbase 321 | self.add_features_to_edges() 322 | self.numNodes = len(self.graph.nodes()) 323 | self.numEdges = len(self.graph.edges()) 324 | 325 | self.K = K 326 | if self.K>self.numNodes: 327 | self.K = self.numNodes 328 | 329 | self.edge_state = np.zeros((self.numEdges, 2)) 330 | self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype="object") 331 | 332 | position = 0 333 | for i in self.graph: 334 | for j in self.graph[i]: 335 | self.edgesDict[str(i)+':'+str(j)] = position 336 | self.graph[i][j][0]['capacity'] = self.defoDatasetAPI.links_bw[i][j] 337 | self.graph[i][j][0]['weight'] = self.defoDatasetAPI.links_weight[i][j] 338 | if self.graph[i][j][0]['capacity']>self.maxCapacity: 339 | self.maxCapacity = self.graph[i][j][0]['capacity'] 340 | self.edge_state[position][1] = self.graph[i][j][0]['capacity'] 341 | self.graph[i][j][0]['utilization'] = 0.0 342 | self.graph[i][j][0]['crossing_paths'].clear() 343 | position += 1 344 | 345 | # We create the list of nodes ids to pick randomly from them 346 | self.nodes = list(range(0,self.numNodes)) 347 | 348 | self.compute_middlepoint_set_remove_rep_actions_no_loop() 349 | 350 | def step_sp(self, action, source, destination): 351 | # We get the K-middlepoints between source-destination 352 | middlePointList = list(self.src_dst_k_middlepoints[str(source) +':'+ str(destination)]) 353 | middlePoint = middlePointList[action] 354 | 355 | # First we allocate until the middlepoint using the shortest path 356 | self.allocate_to_destination_sp(source, middlePoint, source, destination) 357 | # If we allocated to a middlepoint that is not the final destination 358 | if middlePoint!=destination: 359 | # Then we allocate from the middlepoint to the destination using the shortest path 360 | self.allocate_to_destination_sp(middlePoint, destination, source, destination) 361 | # We store that the pair source,destination has a middlepoint 362 | self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint 363 | 364 | # Find new maximum and minimum utilization link 365 | old_Utilization = self.edgeMaxUti[2] 366 | self.edgeMaxUti = (0, 0, 0) 367 | for i in self.graph: 368 | for j in self.graph[i]: 369 | position = self.edgesDict[str(i)+':'+str(j)] 370 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 371 | link_capacity = self.links_bw[i][j] 372 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 373 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 374 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 375 | 376 | return self.edgeMaxUti[2] 377 | 378 | def step_hill_sp(self, action, source, destination): 379 | # We get the K-middlepoints between source-destination 380 | middlePointList = list(self.src_dst_k_middlepoints[str(source) +':'+ str(destination)]) 381 | middlePoint = middlePointList[action] 382 | 383 | # First we allocate until the middlepoint using the shortest path 384 | self.allocate_to_destination_sp(source, middlePoint, source, destination) 385 | # If we allocated to a middlepoint that is not the final destination 386 | if middlePoint!=destination: 387 | # Then we allocate from the middlepoint to the destination using the shortest path 388 | self.allocate_to_destination_sp(middlePoint, destination, source, destination) 389 | # We store that the pair source,destination has a middlepoint 390 | self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint 391 | 392 | # Find new maximum and minimum utilization link 393 | old_Utilization = self.edgeMaxUti[2] 394 | self.edgeMaxUti = (0, 0, 0) 395 | for i in self.graph: 396 | for j in self.graph[i]: 397 | position = self.edgesDict[str(i)+':'+str(j)] 398 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 399 | link_capacity = self.links_bw[i][j] 400 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 401 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 402 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 403 | 404 | return -self.edgeMaxUti[2] 405 | 406 | def reset_sp(self, tm_id): 407 | """ 408 | Reset environment and setup for new episode. 409 | Generate new TM but load the same routing. We remove the path with more bandwidth 410 | from the link with more utilization to later allocate it on a new path in the act(). 411 | """ 412 | self._generate_tm(tm_id) 413 | 414 | self.sp_middlepoints = dict() 415 | 416 | # Clear the link utilization and crossing paths 417 | for i in self.graph: 418 | for j in self.graph[i]: 419 | self.graph[i][j][0]['utilization'] = 0.0 420 | self.graph[i][j][0]['crossing_paths'].clear() 421 | 422 | # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints 423 | self.compute_link_utilization_reset_sp() 424 | 425 | # We iterate over all links in an ordered fashion and store the features to edge_state 426 | for i in self.graph: 427 | for j in self.graph[i]: 428 | position = self.edgesDict[str(i)+':'+str(j)] 429 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 430 | self.edge_state[position][1] = self.graph[i][j][0]['capacity'] 431 | link_capacity = self.links_bw[i][j] 432 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 433 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 434 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 435 | 436 | return self.edgeMaxUti[2] 437 | 438 | def reset_hill_sp(self, tm_id): 439 | """ 440 | Reset environment and setup for new episode. 441 | Generate new TM but load the same routing. We remove the path with more bandwidth 442 | from the link with more utilization to later allocate it on a new path in the act(). 443 | """ 444 | self._generate_tm(tm_id) 445 | 446 | self.sp_middlepoints = dict() 447 | 448 | # Clear the link utilization and crossing paths 449 | for i in self.graph: 450 | for j in self.graph[i]: 451 | self.graph[i][j][0]['utilization'] = 0.0 452 | self.graph[i][j][0]['crossing_paths'].clear() 453 | 454 | # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints 455 | self.compute_link_utilization_reset_sp() 456 | 457 | # We iterate over all links in an ordered fashion and store the features to edge_state 458 | self.edgeMaxUti = (0, 0, 0) 459 | for i in self.graph: 460 | for j in self.graph[i]: 461 | position = self.edgesDict[str(i)+':'+str(j)] 462 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 463 | self.edge_state[position][1] = self.graph[i][j][0]['capacity'] 464 | link_capacity = self.links_bw[i][j] 465 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 466 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 467 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 468 | 469 | return -self.edgeMaxUti[2] 470 | 471 | def _get_top_k_critical_flows(self, list_ids): 472 | self.list_eligible_demands.clear() 473 | for linkId in list_ids: 474 | i = linkId[0] 475 | j = linkId[1] 476 | for demand, value in self.graph[i][j][0]['crossing_paths'].items(): 477 | src, dst = int(demand.split(':')[0]), int(demand.split(':')[1]) 478 | if (src, dst, self.TM[src,dst]) not in self.list_eligible_demands: 479 | self.list_eligible_demands.append((src, dst, self.TM[src,dst])) 480 | 481 | self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True) 482 | if len(self.list_eligible_demands)>int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands)): 483 | self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))] 484 | 485 | def reset_DRL_hill_sp(self, tm_id, best_routing, list_of_demands_to_change): 486 | """ 487 | Reset environment and setup for new episode. 488 | Generate new TM but load the same routing. We remove the path with more bandwidth 489 | from the link with more utilization to later allocate it on a new path in the act(). 490 | """ 491 | self._generate_tm(tm_id) 492 | if best_routing is not None: 493 | self.sp_middlepoints = best_routing 494 | else: 495 | self.sp_middlepoints = dict() 496 | self.list_of_demands_to_change = list_of_demands_to_change 497 | 498 | # Clear the link utilization and crossing paths 499 | for i in self.graph: 500 | for j in self.graph[i]: 501 | self.graph[i][j][0]['utilization'] = 0.0 502 | self.graph[i][j][0]['crossing_paths'].clear() 503 | 504 | # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints 505 | self.compute_link_utilization_reset_sp() 506 | 507 | # We restore the best routing configuration from the DRL agent 508 | for key, middlepoint in self.sp_middlepoints.items(): 509 | source = int(key.split(':')[0]) 510 | dest = int(key.split(':')[1]) 511 | if middlepoint!=dest: 512 | # First we remove current routing and then we assign the new middlepoint 513 | self.decrease_links_utilization_sp(source, dest, source, dest) 514 | 515 | # First we allocate until the middlepoint 516 | self.allocate_to_destination_sp(source, middlepoint, source, dest) 517 | # Then we allocate from the middlepoint to the destination 518 | self.allocate_to_destination_sp(middlepoint, dest, source, dest) 519 | 520 | # We iterate over all links in an ordered fashion and store the features to edge_state 521 | self.edgeMaxUti = (0, 0, 0) 522 | # This list is used to obtain the top K flows from the critical links 523 | list_link_uti_id = list() 524 | for i in self.graph: 525 | for j in self.graph[i]: 526 | position = self.edgesDict[str(i)+':'+str(j)] 527 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 528 | self.edge_state[position][1] = self.graph[i][j][0]['capacity'] 529 | link_capacity = self.links_bw[i][j] 530 | # We store the link utilization and the corresponding edge 531 | list_link_uti_id.append((i, j, self.edge_state[position][0])) 532 | 533 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 534 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 535 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 536 | 537 | list_link_uti_id = sorted(list_link_uti_id, key=lambda tup: tup[2], reverse=True)[:self.num_critical_links] 538 | self._get_top_k_critical_flows(list_link_uti_id) 539 | 540 | # If we want to take the x% bigger demands 541 | # self.list_eligible_demands = sorted(list_link_uti_id, key=lambda tup: tup[0], reverse=True) 542 | # self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))] 543 | 544 | return -self.edgeMaxUti[2] 545 | 546 | def allocate_to_destination_sp(self, src, dst, init_source, final_destination): 547 | # In this function we allocated the bandwidth by segments. This funcion is used when we want 548 | # to allocate from a src to a middlepoint and then from middlepoint to a dst using the sp 549 | bw_allocate = self.TM[init_source][final_destination] 550 | currentPath = self.shortest_paths[src,dst] 551 | 552 | i = 0 553 | j = 1 554 | 555 | while (j < len(currentPath)): 556 | firstNode = currentPath[i] 557 | secondNode = currentPath[j] 558 | 559 | self.graph[firstNode][secondNode][0]['utilization'] += bw_allocate 560 | self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] = bw_allocate 561 | self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization'] 562 | i = i + 1 563 | j = j + 1 564 | -------------------------------------------------------------------------------- /gym-graph/gym_graph/envs/environment16.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import networkx as nx 4 | import random 5 | from gym import error, spaces, utils 6 | from random import choice 7 | import pandas as pd 8 | import pickle 9 | import json 10 | import os.path 11 | import gc 12 | import defo_process_results as defoResults 13 | import matplotlib.pyplot as plt 14 | 15 | class Env16(gym.Env): 16 | """ 17 | Here I only take X% of the demands. There are some flags 18 | that indicate if to take the X% larger demands, the X% from the 5 most loaded links 19 | or random. 20 | 21 | Environment used in the middlepoint routing problem. Here we compute the SP to reach a middlepoint. 22 | We are using bidirectional links in this environment! 23 | In this environment we make the MP between edges. 24 | self.edge_state[:][0] = link utilization 25 | self.edge_state[:][1] = link capacity 26 | self.edge_state[:][2] = bw allocated (the one that goes from src to dst) 27 | """ 28 | def __init__(self): 29 | self.graph = None # Here we store the graph as DiGraph (without repeated edges) 30 | self.source = None 31 | self.destination = None 32 | self.demand = None 33 | 34 | self.edge_state = None 35 | self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset 36 | self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 37 | 38 | self.diameter = None 39 | 40 | # Nx Graph where the nodes have features. Betweenness is allways normalized. 41 | # The other features are "raw" and are being normalized before prediction 42 | self.first = None 43 | self.firstTrueSize = None 44 | self.second = None 45 | self.between_feature = None 46 | 47 | self.percentage_demands = None # X% of the most loaded demands we use for optimization 48 | self.shufle_demands = False # If True we shuffle the list of traffic demands 49 | self.top_K_critical_demands = False # If we want to take the top X% of the 5 most loaded links 50 | self.num_critical_links = 5 51 | 52 | self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint 53 | self.shortest_paths = None # For each src,dst we store the shortest path to reach d 54 | self.sp_middlepoints_step = dict() # We store the midlepoint assignation before step() finishes 55 | 56 | # Mean and standard deviation of link betweenness 57 | self.mu_bet = None 58 | self.std_bet = None 59 | 60 | # Episode length in timesteps 61 | self.episode_length = None 62 | self.currentVal = None # Value used in hill_climbing way of choosing the next demand 63 | self.initial_maxLinkUti = None 64 | self.iter_list_elig_demn = None 65 | 66 | # Error at the end of episode to evaluate the learning process 67 | self.error_evaluation = None 68 | # Ideal target link capacity: self.sumTM/self.numEdges 69 | self.target_link_capacity = None 70 | 71 | self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst 72 | self.sumTM = None 73 | self.routing = None # Loaded routing matrix 74 | self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair 75 | 76 | self.K = None 77 | self.nodes = None # List of nodes to pick randomly from them 78 | self.ordered_edges = None 79 | self.edgesDict = dict() # Stores the position id of each edge in order 80 | self.previous_path = None 81 | 82 | self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints 83 | self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent. 84 | self.link_capacity_feature = None 85 | 86 | self.numNodes = None 87 | self.numEdges = None 88 | self.next_state = None 89 | 90 | # We store the edge that has maximum utilization 91 | # (src, dst, MaxUtilization) 92 | self.edgeMaxUti = None 93 | # We store the edge that has minimum utilization 94 | # (src, dst, MaxUtilization) 95 | self.edgeMinUti = None 96 | # We store the path with more bandwidth from the edge with maximum utilization 97 | # (src, dst, MaxBandwidth) 98 | self.patMaxBandwth = None 99 | self.maxBandwidth = None 100 | 101 | self.episode_over = True 102 | self.reward = 0 103 | self.allPaths = dict() # Stores the paths for each src:dst pair 104 | 105 | def seed(self, seed): 106 | random.seed(seed) 107 | np.random.seed(seed) 108 | 109 | def add_features_to_edges(self): 110 | incId = 1 111 | for node in self.graph: 112 | for adj in self.graph[node]: 113 | if not 'betweenness' in self.graph[node][adj][0]: 114 | self.graph[node][adj][0]['betweenness'] = 0 115 | if not 'edgeId' in self.graph[node][adj][0]: 116 | self.graph[node][adj][0]['edgeId'] = incId 117 | if not 'numsp' in self.graph[node][adj][0]: 118 | self.graph[node][adj][0]['numsp'] = 0 119 | if not 'utilization' in self.graph[node][adj][0]: 120 | self.graph[node][adj][0]['utilization'] = 0 121 | if not 'capacity' in self.graph[node][adj][0]: 122 | self.graph[node][adj][0]['capacity'] = 0 123 | if not 'weight' in self.graph[node][adj][0]: 124 | self.graph[node][adj][0]['weight'] = 0 125 | if not 'kshortp' in self.graph[node][adj][0]: 126 | self.graph[node][adj][0]['kshortp'] = 0 127 | if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge 128 | self.graph[node][adj][0]['crossing_paths'] = dict() 129 | incId = incId + 1 130 | 131 | def num_shortest_path(self, topology): 132 | self.diameter = nx.diameter(self.graph) 133 | # Iterate over all node1,node2 pairs from the graph 134 | for n1 in range (0,self.numNodes): 135 | for n2 in range (0,self.numNodes): 136 | if (n1 != n2): 137 | # Check if we added the element of the matrix 138 | if str(n1)+':'+str(n2) not in self.allPaths: 139 | self.allPaths[str(n1)+':'+str(n2)] = [] 140 | # First we compute the shortest paths taking into account the diameter 141 | [self.allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=self.diameter*2)] 142 | 143 | # We take all the paths from n1 to n2 and we order them according to the path length 144 | # sorted() ordena los paths de menor a mayor numero de 145 | # saltos y los que tienen los mismos saltos te los ordena por indice 146 | self.allPaths[str(n1)+':'+str(n2)] = sorted(self.allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item)) 147 | path = 0 148 | while path < self.K and path < len(self.allPaths[str(n1)+':'+str(n2)]): 149 | currentPath = self.allPaths[str(n1)+':'+str(n2)][path] 150 | i = 0 151 | j = 1 152 | 153 | # Iterate over pairs of nodes and allocate linkDemand 154 | while (j < len(currentPath)): 155 | self.graph.get_edge_data(currentPath[i], currentPath[j])[0]['numsp'] = \ 156 | self.graph.get_edge_data(currentPath[i], currentPath[j])[0]['numsp'] + 1 157 | i = i + 1 158 | j = j + 1 159 | 160 | path = path + 1 161 | 162 | # Remove paths not needed 163 | del self.allPaths[str(n1)+':'+str(n2)][path:len(self.allPaths[str(n1)+':'+str(n2)])] 164 | gc.collect() 165 | 166 | def decrease_links_utilization_sp(self, src, dst, init_source, final_destination): 167 | # In this function we desallocate the bandwidth by segments. This funcion is used when we want 168 | # to desallocate from a src to a middlepoint and then from middlepoint to a dst using the sp 169 | 170 | # We obtain the demand from the original source,destination pair 171 | bw_allocated = self.TM[init_source][final_destination] 172 | currentPath = self.shortest_paths[src,dst] 173 | 174 | i = 0 175 | j = 1 176 | while (j < len(currentPath)): 177 | firstNode = currentPath[i] 178 | secondNode = currentPath[j] 179 | 180 | self.graph[firstNode][secondNode][0]['utilization'] -= bw_allocated 181 | if str(init_source)+':'+str(final_destination) in self.graph[firstNode][secondNode][0]['crossing_paths']: 182 | del self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] 183 | self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization'] 184 | i = i + 1 185 | j = j + 1 186 | 187 | def _get_top_k_critical_flows(self, list_ids): 188 | self.list_eligible_demands.clear() 189 | for linkId in list_ids: 190 | i = linkId[1] 191 | j = linkId[2] 192 | for demand, value in self.graph[i][j][0]['crossing_paths'].items(): 193 | src, dst = int(demand.split(':')[0]), int(demand.split(':')[1]) 194 | if (src, dst, self.TM[src,dst]) not in self.list_eligible_demands: 195 | self.list_eligible_demands.append((src, dst, self.TM[src,dst])) 196 | 197 | self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True) 198 | if len(self.list_eligible_demands)>int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands)): 199 | self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))] 200 | 201 | def _generate_tm(self, tm_id): 202 | # Sample a file randomly to initialize the tm 203 | graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph" 204 | # This 'results_file' file is ignored! 205 | results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id) 206 | tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands" 207 | 208 | self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file) 209 | self.links_bw = self.defoDatasetAPI.links_bw 210 | self.MP_matrix = self.defoDatasetAPI.MP_matrix 211 | self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file) 212 | 213 | self.iter_list_elig_demn = 0 214 | self.list_eligible_demands.clear() 215 | min_links_bw = 1000000.0 216 | for src in range (0,self.numNodes): 217 | for dst in range (0,self.numNodes): 218 | if src!=dst: 219 | self.list_eligible_demands.append((src, dst, self.TM[src,dst])) 220 | # If we have a link between src and dst 221 | if src in self.graph and dst in self.graph[src]: 222 | # Store the link with minimum bw 223 | if self.links_bw[src][dst]=len(sorted_dict): 253 | path = 0 254 | srcPath = int(sorted_dict[path][0].split(':')[0]) 255 | dstPath = int(sorted_dict[path][0].split(':')[1]) 256 | self.patMaxBandwth = (srcPath, dstPath, self.TM[srcPath][dstPath]) 257 | 258 | def _obtain_path_from_set_rand(self): 259 | len_demans = len(self.list_eligible_demands)-1 260 | path = random.randint(0, len_demans) 261 | srcPath = int(self.list_eligible_demands[path][0]) 262 | dstPath = int(self.list_eligible_demands[path][1]) 263 | self.patMaxBandwth = (srcPath, dstPath, int(self.list_eligible_demands[path][2])) 264 | 265 | def _obtain_demand(self): 266 | src = self.list_eligible_demands[self.iter_list_elig_demn][0] 267 | dst = self.list_eligible_demands[self.iter_list_elig_demn][1] 268 | bw = self.list_eligible_demands[self.iter_list_elig_demn][2] 269 | self.patMaxBandwth = (src, dst, int(bw)) 270 | self.iter_list_elig_demn += 1 271 | 272 | def get_value(self, source, destination, action): 273 | # We get the K-middlepoints between source-destination 274 | middlePointList = self.src_dst_k_middlepoints[str(source) +':'+ str(destination)] 275 | middlePoint = middlePointList[action] 276 | 277 | # First we allocate until the middlepoint 278 | self.allocate_to_destination_sp(source, middlePoint, source, destination) 279 | # If we allocated to a middlepoint that is not the final destination 280 | if middlePoint!=destination: 281 | # Then we allocate from the middlepoint to the destination 282 | self.allocate_to_destination_sp(middlePoint, destination, source, destination) 283 | # We store that the pair source,destination has a middlepoint 284 | self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint 285 | 286 | currentValue = -1000000 287 | # Get the maximum loaded link and it's value after allocating to the corresponding middlepoint 288 | for i in self.graph: 289 | for j in self.graph[i]: 290 | position = self.edgesDict[str(i)+':'+str(j)] 291 | link_capacity = self.links_bw[i][j] 292 | if self.edge_state[position][0]/link_capacity>currentValue: 293 | currentValue = self.edge_state[position][0]/link_capacity 294 | 295 | # Dissolve allocation step so that later we can try another action 296 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 297 | if str(source)+':'+str(destination) in self.sp_middlepoints: 298 | middlepoint = self.sp_middlepoints[str(source)+':'+str(destination)] 299 | self.decrease_links_utilization_sp(source, middlepoint, source, destination) 300 | self.decrease_links_utilization_sp(middlepoint, destination, source, destination) 301 | del self.sp_middlepoints[str(source)+':'+str(destination)] 302 | else: # Remove the bandwidth allocated from the src to the destination 303 | self.decrease_links_utilization_sp(source, destination, source, destination) 304 | 305 | return -currentValue 306 | 307 | def _obtain_demand_hill_climbing(self): 308 | dem_iter = 0 309 | nextVal = -1000000 310 | self.next_state = None 311 | # Iterate for each demand possible 312 | for source in range(self.numNodes): 313 | for dest in range(self.numNodes): 314 | if source!=dest: 315 | for action in range(len(self.src_dst_k_middlepoints[str(source)+':'+str(dest)])): 316 | middlepoint = -1 317 | # First we need to desallocate the current demand before we explore all it's possible actions 318 | # Check if there is a middlepoint to desallocate from src-middlepoint-dst 319 | if str(source)+':'+str(dest) in self.sp_middlepoints: 320 | middlepoint = self.sp_middlepoints[str(source)+':'+str(dest)] 321 | self.decrease_links_utilization_sp(source, middlepoint, source, dest) 322 | self.decrease_links_utilization_sp(middlepoint, dest, source, dest) 323 | del self.sp_middlepoints[str(source)+':'+str(dest)] 324 | else: # Remove the bandwidth allocated from the src to the destination 325 | self.decrease_links_utilization_sp(source, dest, source, dest) 326 | 327 | evalState = self.get_value(source, dest, action) 328 | if evalState > nextVal: 329 | nextVal = evalState 330 | self.next_state = (action, source, dest) 331 | 332 | # Allocate back the demand whose actions we explored 333 | # If the current demand had a middlepoint, we allocate src-middlepoint-dst 334 | if middlepoint>=0: 335 | # First we allocate until the middlepoint 336 | self.allocate_to_destination_sp(source, middlepoint, source, dest) 337 | # Then we allocate from the middlepoint to the destination 338 | self.allocate_to_destination_sp(middlepoint, dest, source, dest) 339 | # We store that the pair source,destination has a middlepoint 340 | self.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint 341 | else: 342 | # Then we allocate from the middlepoint to the destination 343 | self.allocate_to_destination_sp(source, dest, source, dest) 344 | self.patMaxBandwth = (self.next_state[1], self.next_state[2], self.TM[self.next_state[1]][self.next_state[2]]) 345 | 346 | def compute_middlepoint_set_random(self): 347 | # We choose the K-middlepoints for each src-dst randomly 348 | self.src_dst_k_middlepoints = dict() 349 | # Iterate over all node1,node2 pairs from the graph 350 | for n1 in range (0,self.numNodes): 351 | for n2 in range (0,self.numNodes): 352 | if (n1 != n2): 353 | num_middlepoints = 0 354 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)] = list() 355 | # We add the destination as a candidate middlepoint (in case we have direct connection) 356 | self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(n2) 357 | num_middlepoints += 1 358 | while num_middlepointsself.numNodes: 550 | self.K = self.numNodes 551 | 552 | self.edge_state = np.zeros((self.numEdges, 3)) 553 | self.betweenness_centrality = np.zeros(self.numEdges) # Used in the fully connected 554 | self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype="object") 555 | 556 | position = 0 557 | for i in self.graph: 558 | for j in self.graph[i]: 559 | self.edgesDict[str(i)+':'+str(j)] = position 560 | self.graph[i][j][0]['capacity'] = self.defoDatasetAPI.links_bw[i][j] 561 | self.graph[i][j][0]['weight'] = self.defoDatasetAPI.links_weight[i][j] 562 | if self.graph[i][j][0]['capacity']>self.maxCapacity: 563 | self.maxCapacity = self.graph[i][j][0]['capacity'] 564 | self.edge_state[position][1] = self.graph[i][j][0]['capacity'] 565 | self.betweenness_centrality[position] = btwns[i,j] 566 | self.graph[i][j][0]['utilization'] = 0.0 567 | self.graph[i][j][0]['crossing_paths'].clear() 568 | position += 1 569 | 570 | self._first_second() 571 | self.firstTrueSize = len(self.first) 572 | 573 | self.link_capacity_feature = np.divide(self.edge_state[:,1], self.maxCapacity) 574 | 575 | # We create the list of nodes ids to pick randomly from them 576 | self.nodes = list(range(0,self.numNodes)) 577 | 578 | self.compute_middlepoint_set_remove_rep_actions_no_loop() 579 | 580 | def step(self, action, demand, source, destination): 581 | # Action is the middlepoint. Careful because it can also be action==destination if src,dst are connected directly by an edge 582 | self.episode_over = False 583 | self.reward = 0 584 | 585 | # We get the K-middlepoints between source-destination 586 | middlePointList = self.src_dst_k_middlepoints[str(source) +':'+ str(destination)] 587 | middlePoint = middlePointList[action] 588 | 589 | # First we allocate until the middlepoint 590 | self.allocate_to_destination_sp(source, middlePoint, source, destination) 591 | # If we allocated to a middlepoint that is not the final destination 592 | if middlePoint!=destination: 593 | # Then we allocate from the middlepoint to the destination 594 | self.allocate_to_destination_sp(middlePoint, destination, source, destination) 595 | # We store that the pair source,destination has a middlepoint 596 | self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint 597 | 598 | self.sp_middlepoints_step = self.sp_middlepoints 599 | 600 | # Find new maximum and minimum utilization link 601 | old_Utilization = self.edgeMaxUti[2] 602 | self.edgeMaxUti = (0, 0, 0) 603 | for i in self.graph: 604 | for j in self.graph[i]: 605 | position = self.edgesDict[str(i)+':'+str(j)] 606 | self.edge_state[position][0] = self.graph[i][j][0]['utilization'] 607 | link_capacity = self.links_bw[i][j] 608 | norm_edge_state_capacity = self.edge_state[position][0]/link_capacity 609 | if norm_edge_state_capacity>self.edgeMaxUti[2]: 610 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 611 | 612 | self.currentVal = -self.edgeMaxUti[2] 613 | 614 | self.reward = np.around((old_Utilization-self.edgeMaxUti[2])*10,2) 615 | 616 | # If we didn't iterate over all demands 617 | if self.iter_list_elig_demnself.edgeMaxUti[2]: 668 | self.edgeMaxUti = (i, j, norm_edge_state_capacity) 669 | 670 | if self.top_K_critical_demands: 671 | list_link_uti_id = sorted(list_link_uti_id, key=lambda tup: tup[0], reverse=True)[:self.num_critical_links] 672 | self._get_top_k_critical_flows(list_link_uti_id) 673 | 674 | self.currentVal = -self.edgeMaxUti[2] 675 | self.initial_maxLinkUti = -self.edgeMaxUti[2] 676 | # From the link with more utilization, we obtain a random path of the 5 with more bandwidth 677 | #self._obtain_path_more_bandwidth_rand_link() 678 | #self._obtain_path_from_set_rand() 679 | #self._obtain_demand_hill_climbing() 680 | self._obtain_demand() 681 | 682 | # Remove bandwidth allocated for the path with more bandwidth from the link with more utilization 683 | self.decrease_links_utilization_sp(self.patMaxBandwth[0], self.patMaxBandwth[1], self.patMaxBandwth[0], self.patMaxBandwth[1]) 684 | 685 | # We desmark the bw_allocated 686 | self.edge_state[:,2] = 0 687 | 688 | return self.TM[self.patMaxBandwth[0]][self.patMaxBandwth[1]], self.patMaxBandwth[0], self.patMaxBandwth[1] 689 | 690 | def allocate_to_destination_sp(self, src, dst, init_source, final_destination): 691 | # In this function we allocated the bandwidth by segments. This funcion is used when we want 692 | # to allocate from a src to a middlepoint and then from middlepoint to a dst using the sp 693 | bw_allocate = self.TM[init_source][final_destination] 694 | currentPath = self.shortest_paths[src,dst] 695 | 696 | i = 0 697 | j = 1 698 | 699 | while (j < len(currentPath)): 700 | firstNode = currentPath[i] 701 | secondNode = currentPath[j] 702 | 703 | self.graph[firstNode][secondNode][0]['utilization'] += bw_allocate 704 | self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] = bw_allocate 705 | self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization'] 706 | i = i + 1 707 | j = j + 1 708 | 709 | def mark_action_sp(self, src, dst, init_source, final_destination): 710 | # In this function we mark the action in the corresponding edges of the SP between src,dst 711 | bw_allocate = self.TM[init_source][final_destination] 712 | currentPath = self.shortest_paths[src,dst] 713 | 714 | i = 0 715 | j = 1 716 | 717 | while (j < len(currentPath)): 718 | firstNode = currentPath[i] 719 | secondNode = currentPath[j] 720 | 721 | self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][2] = bw_allocate/self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][1] 722 | i = i + 1 723 | j = j + 1 -------------------------------------------------------------------------------- /gym-graph/gym_graph/envs/environment20.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import networkx as nx 4 | import random 5 | from gym import error, spaces, utils 6 | from random import choice 7 | import pandas as pd 8 | import pickle 9 | import os.path 10 | import json 11 | import gc 12 | import defo_process_results as defoResults 13 | 14 | class Env20(gym.Env): 15 | """ 16 | Similar to environment15.py but this one is used for the SAP (instead of hill climbing) 17 | 18 | Environment used in the middlepoint routing problem. 19 | We are using bidirectional links in this environment! 20 | In this environment we make the MP between nodes and concatenate the edge features 21 | to the node features in the message function. 22 | self.edge_state[:][0] = link utilization 23 | self.edge_state[:][1] = link capacity 24 | self.edge_state[:][2] = bw allocated (the one that goes from src to dst) 25 | """ 26 | def __init__(self): 27 | self.graph = None # Here we store the graph as DiGraph (without repeated edges) 28 | self.source = None 29 | self.destination = None 30 | self.demand = None 31 | 32 | self.edge_state = None 33 | self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset 34 | self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 35 | 36 | self.diameter = None 37 | 38 | # Nx Graph where the nodes have features. Betweenness is allways normalized. 39 | # The other features are "raw" and are being normalized before prediction 40 | self.between_feature = None 41 | 42 | self.nodeId = None 43 | self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint 44 | self.shortest_paths = None # For each src,dst we store the shortest path to reach d 45 | 46 | # Mean and standard deviation of link betweenness 47 | self.mu_bet = None 48 | self.std_bet = None 49 | 50 | # Episode length in timesteps 51 | self.episode_length = None 52 | self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent. 53 | self.iter_list_elig_demn = None 54 | 55 | # Error at the end of episode to evaluate the learning process 56 | self.error_evaluation = None 57 | # Ideal target link capacity: self.sumTM/self.numEdges 58 | self.target_link_capacity = None 59 | 60 | self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst 61 | self.meanTM = None 62 | self.stdTM = None 63 | self.sumTM = None 64 | self.routing = None # Loaded routing matrix 65 | self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair 66 | 67 | self.K = None 68 | self.nodes = None # List of nodes to pick randomly from them 69 | self.ordered_edges = None 70 | self.edgesDict = dict() # Stores the position id of each edge in order 71 | self.previous_path = None 72 | 73 | self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints 74 | self.node_to_index_dic = None # For each node from the real graph we store it's index 75 | self.index_to_node_lst = None # We store a list of nodes in an ordered fashion 76 | 77 | self.numNodes = None 78 | self.numEdges = None 79 | self.numSteps = 0 # As our problem can go forever, we limit it to 10 steps 80 | 81 | self.sameLink = False # Indicates if we are working with the same link 82 | 83 | # We store the edge that has maximum utilization 84 | # (src, dst, MaxUtilization) 85 | self.edgeMaxUti = None 86 | # We store the edge that has minimum utilization 87 | # (src, dst, MaxUtilization) 88 | self.edgeMinUti = None 89 | # We store the path with more bandwidth from the edge with maximum utilization 90 | # (src, dst, MaxBandwidth) 91 | self.patMaxBandwth = None 92 | self.maxBandwidth = None 93 | 94 | self.episode_over = True 95 | self.reward = 0 96 | self.allPaths = dict() # Stores the paths for each src:dst pair 97 | 98 | def seed(self, seed): 99 | random.seed(seed) 100 | np.random.seed(seed) 101 | 102 | def add_features_to_edges(self): 103 | incId = 1 104 | for node in self.graph: 105 | for adj in self.graph[node]: 106 | if not 'betweenness' in self.graph[node][adj][0]: 107 | self.graph[node][adj][0]['betweenness'] = 0 108 | if not 'edgeId' in self.graph[node][adj][0]: 109 | self.graph[node][adj][0]['edgeId'] = incId 110 | if not 'numsp' in self.graph[node][adj][0]: 111 | self.graph[node][adj][0]['numsp'] = 0 112 | if not 'utilization' in self.graph[node][adj][0]: 113 | self.graph[node][adj][0]['utilization'] = 0 114 | if not 'capacity' in self.graph[node][adj][0]: 115 | self.graph[node][adj][0]['capacity'] = 0 116 | if not 'weight' in self.graph[node][adj][0]: 117 | self.graph[node][adj][0]['weight'] = 0 118 | if not 'kshortp' in self.graph[node][adj][0]: 119 | self.graph[node][adj][0]['kshortp'] = 0 120 | if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge 121 | self.graph[node][adj][0]['crossing_paths'] = dict() 122 | incId = incId + 1 123 | 124 | def _generate_tm(self, tm_id): 125 | # Sample a file randomly to initialize the tm 126 | graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph" 127 | # This 'results_file' file is ignored! 128 | results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id) 129 | tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands" 130 | 131 | self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file) 132 | self.links_bw = self.defoDatasetAPI.links_bw 133 | self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file) 134 | 135 | self.iter_list_elig_demn = 0 136 | self.list_eligible_demands.clear() 137 | min_links_bw = 1000000.0 138 | for src in range (0,self.numNodes): 139 | for dst in range (0,self.numNodes): 140 | if src!=dst: 141 | self.list_eligible_demands.append((src, dst, self.TM[src,dst])) 142 | # If we have a link between src and dst 143 | if src in self.graph and dst in self.graph[src]: 144 | # Store the link with minimum bw 145 | if self.links_bw[src][dst]maxUti: 275 | maxUti = self.edge_state[position][0]/link_capacity 276 | self.edgeMaxUti = (i, j, maxUti) 277 | if self.edge_state[position][0]/link_capacity max_util: 98 | max_util = self._graph.get_edge_data(*i)['utilization'] 99 | return max_util 100 | 101 | def mark_action(self, action): 102 | """ 103 | mark action on links the path have 104 | """ 105 | marked = copy.deepcopy(self._graph_state) 106 | if action == -1: 107 | return marked 108 | demand = self._demand_list[self._demand_idx] 109 | temp = self._shortest_path[demand[0]][action] 110 | for i in range(len(temp) - 1): 111 | marked[self.edges_dict[(temp[i], temp[i+1])]][2] = demand[2] 112 | temp = self._shortest_path[action][demand[1]] 113 | for i in range(len(temp) - 1): 114 | marked[self.edges_dict[(temp[i], temp[i + 1])]][2] = demand[2] 115 | return marked 116 | 117 | def seed(self, seed): 118 | random.seed(seed) 119 | np.random.seed(seed) 120 | 121 | def step(self, action): 122 | if action != 0: 123 | demand = self._demand_list[self._demand_idx] 124 | action = self.action_space[(demand[0], demand[1])][action] 125 | temp = self._shortest_path[demand[0]][action] 126 | for i in range(len(temp)-1): 127 | self._graph[temp[i]][temp[i+1]]['bwAlloc'] += demand[2] 128 | self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i + 1]]['bwAlloc'] \ 129 | / self._graph[temp[i]][temp[i + 1]]['capacity'] 130 | self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i + 1]]['utilization'] 131 | 132 | temp = self._shortest_path[action][demand[1]] 133 | for i in range(len(temp)-1): 134 | self._graph[temp[i]][temp[i+1]]['bwAlloc'] += demand[2] 135 | self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i+1]]['bwAlloc'] \ 136 | / self._graph[temp[i]][temp[i+1]]['capacity'] 137 | self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i+1]]['utilization'] 138 | 139 | temp = self._demand_routing[demand] 140 | for i in range(len(temp) - 1): 141 | self._graph[temp[i]][temp[i+1]]['bwAlloc'] -= demand[2] 142 | self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i+1]]['bwAlloc'] \ 143 | / self._graph[temp[i]][temp[i + 1]]['capacity'] 144 | self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i+1]]['utilization'] 145 | self._demand_routing[demand] = self._shortest_path[demand[0]][action][0:-1] + self._shortest_path[action][demand[1]] 146 | 147 | max_util = self._max_link_util() 148 | reward = self.max_util - max_util 149 | self.max_util = max_util 150 | 151 | self._demand_idx = self._demand_idx + 1 152 | if self._demand_idx == len(self._demand_list): 153 | self._done = True 154 | demand = None 155 | else: 156 | self._done = False 157 | demand = self._demand_list[self._demand_idx] 158 | 159 | return copy.deepcopy(self._graph_state), self._done, demand, reward 160 | 161 | def reset(self, topology, demand_list=None): 162 | self._graph = generate_graph(topology) 163 | self._demand_list = demand_list 164 | self._demand_idx = 0 165 | self._num_edges = len(self._graph.edges()) 166 | self._ordered_edges = sorted([edge for edge in self._graph.edges()]) 167 | self.edges_dict = dict() 168 | self._graph_state = np.zeros((self._num_edges, 3)) 169 | self.max_util = 0 170 | self._done = False 171 | 172 | if self._demand_list == None: 173 | self._demand_list = self._generate_traffic() 174 | self._demand_list = sorted(self._demand_list, key=lambda x: x[2], reverse=True) 175 | 176 | idx = 0 177 | for n1, n2 in self._ordered_edges: 178 | self.edges_dict[(n1, n2)] = idx 179 | self.edges_dict[(n2, n1)] = idx 180 | self._graph_state[idx][0] = self._graph.get_edge_data(n1, n2)['capacity'] #/ self._graph.get_edge_data(n1, n2)['capacity'] 181 | self._graph_state[idx][1] = self._graph.get_edge_data(n1, n2)['utilization'] 182 | idx = idx + 1 183 | 184 | self.neighbor_edges = dict() 185 | for n1, n2 in self._ordered_edges: 186 | self.neighbor_edges[(n1, n2)] = list() 187 | for m, n in list(self._graph.edges(n1)) + list(self._graph.edges(n2)): 188 | if (n1 != m or n2 != n) and (n1 != n or n2 != m): 189 | self.neighbor_edges[(n1, n2)].append((m, n)) 190 | 191 | self._shortest_path = dict(nx.all_pairs_shortest_path(self._graph)) 192 | self.action_space = dict() 193 | for i in self._graph.nodes(): 194 | for j in self._graph.nodes(): 195 | self.action_space[(i, j)] = [-1] 196 | for k in self._graph.nodes(): 197 | if k == i or k == j: 198 | continue 199 | if j not in self._shortest_path[i][k] or i not in self._shortest_path[k][j]: 200 | self.action_space[(i, j)].append(k) 201 | 202 | self._demand_routing = dict() 203 | for i in self._demand_list: 204 | temp = self._shortest_path[i[0]][i[1]] 205 | for j in range(len(temp) - 1): 206 | self._graph[temp[j]][temp[j+1]]['bwAlloc'] += i[2] 207 | self._graph[temp[j]][temp[j+1]]['utilization'] = self._graph[temp[j]][temp[j+1]]['bwAlloc'] \ 208 | / self._graph[temp[j]][temp[j+1]]['capacity'] 209 | self._graph_state[self.edges_dict[(temp[j], temp[j+1])]][1] \ 210 | = self._graph[temp[j]][temp[j+1]]['utilization'] 211 | self._demand_routing[i] = self._shortest_path[i[0]][i[1]] 212 | self.max_util = self._max_link_util() 213 | 214 | return copy.deepcopy(self._graph_state), self._demand_list[self._demand_idx] 215 | 216 | def render(self, mode='human'): 217 | if mode == 'human': 218 | pos = nx.spring_layout(self._graph) 219 | edge_labels = nx.get_edge_attributes(self._graph, 'capacity') 220 | nx.draw(self._graph, pos, with_labels=True) 221 | nx.draw_networkx_edge_labels(self._graph, pos, edge_labels=edge_labels) 222 | plt.show() 223 | plt.clf() 224 | -------------------------------------------------------------------------------- /gym_env/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_env', 4 | version='0.0.1', 5 | install_requires=['gym', 'networkx'] # And any other dependencies foo needs 6 | ) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from actor_critic import PPOAC 3 | import gym 4 | import gym_graph 5 | import random 6 | import numpy as np 7 | import os 8 | import gc 9 | import time 10 | 11 | if __name__ == '__main__': 12 | 13 | if not os.path.exists("./Logs"): 14 | os.makedirs("./Logs") 15 | 16 | SEED = 9 17 | os.environ['PYTHONHASHSEED'] = str(SEED) 18 | np.random.seed(SEED) 19 | random.seed(SEED) 20 | torch.manual_seed(1) 21 | torch.cuda.manual_seed(1) 22 | torch.cuda.manual_seed_all(1) 23 | torch.backends.cudnn.benchmark = False 24 | torch.backends.cudnn.deterministic = True 25 | experiment_letter = "_B_NEW" 26 | take_critic_demands = True # True if we want to take the demands from the most critical links, True if we want to take the largest 27 | percentage_demands = 15 # Percentage of demands that will be used in the optimization 28 | str_perctg_demands = str(percentage_demands) 29 | percentage_demands /= 100 30 | 31 | max_iters = 150 32 | EVALUATION_EPISODES = 20 # As the demand selection is deterministic, it doesn't make sense to evaluate multiple times over the same TM 33 | 34 | num_samples_top1 = int(np.ceil(percentage_demands * 380)) * 5 35 | num_samples_top2 = int(np.ceil(percentage_demands * 506)) * 4 36 | num_samples_top3 = int(np.ceil(percentage_demands * 272)) * 6 37 | 38 | num_samples_top = [num_samples_top1, num_samples_top2, num_samples_top3] 39 | 40 | differentiation_str = "Enero_3top_" + str_perctg_demands + experiment_letter 41 | model_dir = "./models" + differentiation_str 42 | 43 | if not os.path.exists(model_dir): 44 | os.makedirs(model_dir) 45 | 46 | fileLogs = open("./Logs/exp" + differentiation_str + "Logs.txt", "w") 47 | 48 | ENV_NAME = 'GraphEnv-v16' 49 | 50 | training_tm_ids = set(range(100)) 51 | 52 | hyper_parameter = { 53 | 'feature_size': 20, 54 | 't': 5, 55 | 'readout_units': 20, 56 | 'episode': 20, 57 | 'lr': 0.0002, 58 | 'lr_decay_rate': 0.96, 59 | 'lr_decay_step': 60, 60 | 'mini_batch': 55, 61 | 'gae_gamma': 0.99, 62 | 'gae_lambda': 0.95, 63 | 'clip_value': 0.5, 64 | 'entropy_beta': 0.01, 65 | 'entropy_step': 60, 66 | 'l2_regular': 0.0001, 67 | 'buffer_size': num_samples_top1 + num_samples_top2 + num_samples_top3, 68 | 'update_times': 8 69 | } 70 | 71 | dataset_root_folder = "../Enero_datasets/dataset_sing_top/data/results_my_3_tops_unif_05-1/" 72 | dataset_folder_name1 = "NEW_BtAsiaPac" 73 | dataset_folder_name2 = "NEW_Garr199905" 74 | dataset_folder_name3 = "NEW_Goodnet" 75 | 76 | dataset_folder_name1 = dataset_root_folder + dataset_folder_name1 77 | dataset_folder_name2 = dataset_root_folder + dataset_folder_name2 78 | dataset_folder_name3 = dataset_root_folder + dataset_folder_name3 79 | 80 | env_training1 = gym.make(ENV_NAME) 81 | env_training1.seed(SEED) 82 | env_training1.generate_environment(dataset_folder_name1 + "/TRAIN", "BtAsiaPac", 0, 100, percentage_demands) 83 | env_training1.top_K_critical_demands = take_critic_demands 84 | 85 | env_training2 = gym.make(ENV_NAME) 86 | env_training2.seed(SEED) 87 | env_training2.generate_environment(dataset_folder_name2 + "/TRAIN", "Garr199905", 0, 100, percentage_demands) 88 | env_training2.top_K_critical_demands = take_critic_demands 89 | 90 | env_training3 = gym.make(ENV_NAME) 91 | env_training3.seed(SEED) 92 | env_training3.generate_environment(dataset_folder_name3 + "/TRAIN", "Goodnet", 0, 100, percentage_demands) 93 | env_training3.top_K_critical_demands = take_critic_demands 94 | 95 | env_training = [env_training1, env_training2, env_training3] 96 | 97 | env_eval1 = gym.make(ENV_NAME) 98 | env_eval1.seed(SEED) 99 | env_eval1.generate_environment(dataset_folder_name1 + "/EVALUATE", "BtAsiaPac", 0, 100, percentage_demands) 100 | env_eval1.top_K_critical_demands = take_critic_demands 101 | 102 | env_eval2 = gym.make(ENV_NAME) 103 | env_eval2.seed(SEED) 104 | env_eval2.generate_environment(dataset_folder_name2 + "/EVALUATE", "Garr199905", 0, 100, percentage_demands) 105 | env_eval2.top_K_critical_demands = take_critic_demands 106 | 107 | env_eval3 = gym.make(ENV_NAME) 108 | env_eval3.seed(SEED) 109 | env_eval3.generate_environment(dataset_folder_name3 + "/EVALUATE", "Goodnet", 0, 100, percentage_demands) 110 | env_eval3.top_K_critical_demands = take_critic_demands 111 | 112 | env_eval = [env_eval1, env_eval2, env_eval3] 113 | 114 | counter_store_model = 0 115 | max_reward = -1000 116 | AC_policy = PPOAC(hyper_parameter) 117 | for iters in range(100): 118 | 119 | if iters * hyper_parameter['episode'] >= hyper_parameter['entropy_step']: 120 | AC_policy.entropy_beta = hyper_parameter['entropy_beta'] / 10 121 | for e in range(hyper_parameter['episode']): 122 | 123 | print(f"Episode {iters*hyper_parameter['episode']+e}") 124 | 125 | critic_features = [] 126 | tensors = [] 127 | actions = [] 128 | values = [] 129 | masks = [] 130 | rewards = [] 131 | actions_probs = [] 132 | 133 | total_num_samples = 0 134 | 135 | timer_a = time.time() 136 | AC_policy.actor.train() 137 | AC_policy.critic.train() 138 | 139 | for topo in range(len(env_training)): 140 | print(f"topo {topo+1}") 141 | number_samples_reached = False 142 | total_num_samples += num_samples_top[topo] 143 | tm_id = random.sample(training_tm_ids, 1)[0] 144 | while not number_samples_reached: 145 | demand, src, dst = env_training[topo].reset(tm_id=tm_id) 146 | while True: 147 | action_dist, tensor = AC_policy.predict(env_training[topo], src, dst) 148 | 149 | critic_feature = AC_policy.critic_get_graph_features(env_training[topo]) 150 | value = AC_policy.critic(critic_feature)[0] 151 | 152 | action = np.random.choice(len(action_dist), p=action_dist.cpu().detach().numpy()) 153 | action_one_hot = torch.nn.functional.one_hot(torch.tensor(action), num_classes=len(action_dist)) 154 | reward, done, _, demand, src, dst, _, _, _ = env_training[topo].step(action, demand, src, dst) 155 | mask = not done 156 | 157 | tensors.append(tensor) 158 | critic_features.append(critic_feature) 159 | actions.append(action_one_hot) 160 | values.append(value.cpu().detach()) 161 | masks.append(mask) 162 | rewards.append(reward) 163 | actions_probs.append(action_dist) 164 | 165 | if len(tensors) == total_num_samples: 166 | number_samples_reached = True 167 | break 168 | 169 | if done: 170 | break 171 | 172 | critic_feature = AC_policy.critic_get_graph_features(env_training[-1]) 173 | value = AC_policy.critic(critic_feature)[0] 174 | values.append(value.cpu().detach()) 175 | timer_b = time.time() 176 | print("collect_data", timer_b - timer_a, "sec") 177 | 178 | timer_a = time.time() 179 | returns, advantages = AC_policy.compute_gae(values, masks, rewards) 180 | actor_loss, critic_loss = AC_policy.update(actions, actions_probs, tensors, critic_features, returns, 181 | advantages) 182 | if AC_policy.scheduler.get_last_lr()[0] > 0.0001: 183 | AC_policy.scheduler.step() 184 | timer_b = time.time() 185 | print("update", timer_b - timer_a, "sec") 186 | 187 | fileLogs.write("a," + str(actor_loss.cpu().detach().numpy()) + ",\n") 188 | fileLogs.write("c," + str(critic_loss.cpu().detach().numpy()) + ",\n") 189 | fileLogs.flush() 190 | 191 | rewards_test = np.zeros(EVALUATION_EPISODES * 3) 192 | error_links = np.zeros(EVALUATION_EPISODES * 3) 193 | max_link_utis = np.zeros(EVALUATION_EPISODES * 3) 194 | min_link_utis = np.zeros(EVALUATION_EPISODES * 3) 195 | uti_stds = np.zeros(EVALUATION_EPISODES * 3) 196 | 197 | AC_policy.actor.eval() 198 | AC_policy.critic.eval() 199 | 200 | timer_a = time.time() 201 | for topo in range(len(env_eval)): 202 | for tm_id in range(EVALUATION_EPISODES): 203 | demand, src, dst = env_eval[topo].reset(tm_id=tm_id) 204 | total_reward = 0 205 | posi = EVALUATION_EPISODES * topo + tm_id 206 | while True: 207 | action_dist, _ = AC_policy.predict(env_eval[topo], src, dst) 208 | action = torch.argmax(action_dist) 209 | 210 | reward, done, error_eval_links, demand, src, dst, max_link_uti, min_link_uti, uti_std = \ 211 | env_eval[topo].step(action, demand, src, dst) 212 | 213 | total_reward += reward 214 | if done: 215 | break 216 | rewards_test[posi] = total_reward 217 | error_links[posi] = error_eval_links 218 | max_link_utis[posi] = max_link_uti[2] 219 | min_link_utis[posi] = min_link_uti 220 | uti_stds[posi] = uti_std 221 | 222 | timer_b = time.time() 223 | print("eval", timer_b - timer_a, "sec") 224 | eval_mean_reward = np.mean(rewards_test) 225 | fileLogs.write(";," + str(np.mean(uti_stds)) + ",\n") 226 | fileLogs.write("+," + str(np.mean(error_links)) + ",\n") 227 | fileLogs.write("<," + str(np.amax(max_link_utis)) + ",\n") 228 | fileLogs.write(">," + str(np.amax(min_link_utis)) + ",\n") 229 | fileLogs.write("ENTR," + str(AC_policy.entropy_beta) + ",\n") 230 | fileLogs.write("REW," + str(eval_mean_reward) + ",\n") 231 | fileLogs.write("lr," + str(AC_policy.scheduler.get_last_lr()[0]) + ",\n") 232 | 233 | if eval_mean_reward > max_reward: 234 | max_reward = eval_mean_reward 235 | fileLogs.write("MAX REWD: " + str(max_reward) + " REWD_ID: " + str(counter_store_model) + ",\n") 236 | torch.save(AC_policy.actor.state_dict(), model_dir + '/' + f'actor_{counter_store_model}.pt') 237 | torch.save(AC_policy.critic.state_dict(), model_dir + '/' + f'critic_{counter_store_model}.pt') 238 | counter_store_model += 1 239 | 240 | fileLogs.flush() 241 | 242 | gc.collect() 243 | fileLogs.close() 244 | torch.save(AC_policy.actor.state_dict(), model_dir + '/' + f'actor_final.pt') 245 | torch.save(AC_policy.critic.state_dict(), model_dir + '/' + f'critic_final.pt') 246 | 247 | 248 | 249 | 250 | -------------------------------------------------------------------------------- /modelsEnero_3top_15_B_NEW/actor_60.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/actor_60.pt -------------------------------------------------------------------------------- /modelsEnero_3top_15_B_NEW/actor_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/actor_final.pt -------------------------------------------------------------------------------- /modelsEnero_3top_15_B_NEW/critic_60.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/critic_60.pt -------------------------------------------------------------------------------- /modelsEnero_3top_15_B_NEW/critic_final.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/critic_final.pt -------------------------------------------------------------------------------- /parse_PPO.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import os 4 | import matplotlib.pyplot as plt 5 | from operator import add, sub 6 | from scipy.signal import savgol_filter 7 | 8 | def smooth(scalars, weight): # Weight between 0 and 1 9 | last = scalars[0] # First value in the plot (first timestep) 10 | smoothed = list() 11 | for point in scalars: 12 | smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value 13 | smoothed.append(smoothed_val) # Save it 14 | last = smoothed_val # Anchor the last smoothed value 15 | 16 | return smoothed 17 | 18 | def read_max_load_link(standard_out_file): 19 | pre_optim_max_load_link, post_optim_max_load_link = 0, 0 20 | with open(standard_out_file) as fd: 21 | while (True): 22 | line = fd.readline() 23 | if line.startswith("pre-optimization"): 24 | camps = line.split(" ") 25 | pre_optim_max_load_link = float(camps[-1].split('\n')[0]) 26 | elif line.startswith("post-optimization"): 27 | camps = line.split(" ") 28 | post_optim_max_load_link = float(camps[-1].split('\n')[0]) 29 | break 30 | return (pre_optim_max_load_link, post_optim_max_load_link) 31 | 32 | if __name__ == "__main__": 33 | # python parse_PPO.py -d ./Logs/expSP_3top_15_B_NEWLogs.txt 34 | parser = argparse.ArgumentParser(description='Parse file and create plots') 35 | 36 | parser.add_argument('-d', help='data file', type=str, required=True, nargs='+') 37 | args = parser.parse_args() 38 | 39 | aux = args.d[0].split(".") 40 | aux = aux[1].split("exp") 41 | differentiation_str = str(aux[1].split("Logs")[0]) 42 | 43 | actor_loss = [] 44 | critic_loss = [] 45 | avg_std = [] 46 | max_link_uti = [] 47 | min_link_uti = [] 48 | defo_max_uti = [] 49 | error_links = [] 50 | avg_rewards = [] 51 | learning_rate = [] 52 | cummulative_rewards = [] 53 | 54 | if not os.path.exists("./Images"): 55 | os.makedirs("./Images") 56 | 57 | if not os.path.exists("./Images/TRAINING/"+differentiation_str): 58 | os.makedirs("./Images/TRAINING/"+differentiation_str) 59 | 60 | path_to_dir = "./Images/TRAINING/"+differentiation_str+"/" 61 | 62 | model_id = 0 63 | # Load best model 64 | with open(args.d[0]) as fp: 65 | for line in reversed(list(fp)): 66 | arrayLine = line.split(":") 67 | if arrayLine[0]=='MAX REWD': 68 | model_id = int(arrayLine[2].split(",")[0]) 69 | break 70 | 71 | print("Model with maximum reward: ", model_id) 72 | 73 | with open(args.d[0]) as fp: 74 | for line in fp: 75 | arrayLine = line.split(",") 76 | if arrayLine[0]=="<": 77 | max_link_uti.append(float(arrayLine[1])) 78 | elif arrayLine[0]==">": 79 | min_link_uti.append(float(arrayLine[1])) 80 | elif arrayLine[0]=="a": 81 | actor_loss.append(float(arrayLine[1])) 82 | elif arrayLine[0]=="lr": 83 | learning_rate.append(float(arrayLine[1])) 84 | elif arrayLine[0]==";": 85 | avg_std.append(float(arrayLine[1])) 86 | elif arrayLine[0]=="+": 87 | error_links.append(float(arrayLine[1])) 88 | elif arrayLine[0]=="REW": 89 | if float(arrayLine[1])<-3000: 90 | avg_rewards.append(-3000) 91 | else: 92 | avg_rewards.append(float(arrayLine[1])) 93 | elif arrayLine[0]=="c": 94 | critic_loss.append(float(arrayLine[1])) 95 | 96 | plt.plot(actor_loss) 97 | plt.xlabel("Training Episode") 98 | plt.ylabel("ACTOR Loss") 99 | plt.savefig(path_to_dir+"ACTORLoss" + differentiation_str) 100 | plt.close() 101 | 102 | plt.plot(critic_loss) 103 | plt.xlabel("Training Episode") 104 | plt.ylabel("CRITIC Loss (MSE)") 105 | plt.yscale("log") 106 | plt.savefig(path_to_dir+"CRITICLoss" + differentiation_str) 107 | plt.close() 108 | 109 | plt.plot(max_link_uti, label="DRL Max Link Uti") 110 | plt.plot(defo_max_uti, label="DEFO Max Link Uti", c="tab:red") 111 | 112 | print("DRL MAX reward: ", np.amax(avg_rewards)) 113 | plt.xlabel("Episodes") 114 | lgd = plt.legend(loc="lower left", bbox_to_anchor=(0.07, -0.22), ncol=2, fancybox=True, shadow=True) 115 | plt.title("GNN+AC Testing score") 116 | plt.ylabel("Maximum link utilization") 117 | #plt.yscale('log') 118 | plt.savefig(path_to_dir+"MaxLinkUti" + differentiation_str, bbox_extra_artists=(lgd,), bbox_inches='tight') 119 | plt.close() 120 | 121 | plt.plot(min_link_uti) 122 | plt.xlabel("Episodes") 123 | plt.title("GNN+AC Testing score") 124 | plt.ylabel("Minimum link utilization") 125 | plt.savefig(path_to_dir+"MinLinkUti" + differentiation_str) 126 | plt.close() 127 | 128 | plt.plot(avg_rewards) 129 | plt.xlabel("Episodes") 130 | plt.title("GNN+AC Testing score") 131 | plt.ylabel("Average reward") 132 | plt.savefig(path_to_dir+"AvgReward" + differentiation_str) 133 | plt.close() 134 | 135 | plt.plot(learning_rate) 136 | plt.xlabel("Episodes") 137 | plt.title("GNN+AC Testing score") 138 | plt.ylabel("Learning rate") 139 | plt.savefig(path_to_dir+"Lr_" + differentiation_str) 140 | plt.close() 141 | 142 | plt.plot(error_links) 143 | plt.xlabel("Episodes") 144 | plt.title("GNN+AC Testing score") 145 | plt.ylabel("Error link (sum_total_TM/num_links") 146 | plt.savefig(path_to_dir+"ErrorLinks" + differentiation_str) 147 | plt.close() 148 | 149 | plt.plot(avg_std) 150 | plt.xlabel("Episodes") 151 | plt.title("GNN+AC Testing score") 152 | plt.ylabel("Avg std of link utilization") 153 | plt.savefig(path_to_dir+"AvgStdUti" + differentiation_str) 154 | plt.close() 155 | 156 | -------------------------------------------------------------------------------- /requitrment.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://download.pytorch.org/whl/cu116 2 | torch==1.13.1 3 | networkx==2.5 4 | gym==0.17.3 5 | pandas 6 | matplotlib==3.4.1 7 | seaborn 8 | pickle5 9 | numpy<1.24 10 | -------------------------------------------------------------------------------- /runs/1/events.out.tfevents.1650360660.barry.182599.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/1/events.out.tfevents.1650360660.barry.182599.0 -------------------------------------------------------------------------------- /runs/1/events.out.tfevents.1650363931.barry.184203.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/1/events.out.tfevents.1650363931.barry.184203.0 -------------------------------------------------------------------------------- /runs/2/events.out.tfevents.1650452288.barry.199430.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/2/events.out.tfevents.1650452288.barry.199430.0 -------------------------------------------------------------------------------- /script_eval_on_single_topology.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import gym 3 | import os 4 | import json 5 | import gym_graph 6 | import random 7 | import argparse 8 | import time as tt 9 | import torch 10 | import pickle 11 | import sys 12 | from actor_critic import PPOAC 13 | sys.setrecursionlimit(2000) 14 | 15 | # This script is used to evaluate a DRL agent on a single instance of a topology and a TM 16 | # from the repetita dataset. The eval_on_single_topology.py script calls this script for each TM 17 | 18 | # os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 19 | 20 | ENV_MIDDROUT_AGENT_SP = 'GraphEnv-v16' 21 | ENV_SIMM_ANEAL_AGENT = 'GraphEnv-v15' 22 | ENV_SAP_AGENT = 'GraphEnv-v20' 23 | SEED = 9 24 | 25 | percentage_demands = 15 # Percentage of demands that will be used in the optimization 26 | str_perctg_demands = str(percentage_demands) 27 | percentage_demands /= 100 28 | 29 | os.environ['PYTHONHASHSEED']=str(SEED) 30 | np.random.seed(SEED) 31 | torch.manual_seed(1) 32 | 33 | # Indicates how many time-steps has an episode 34 | EPISODE_LENGTH_MIDDROUT = 100 35 | NUM_ACTIONS = 100 # Put a very large number if we want to take all actions possible for each topology 36 | 37 | MAX_NUM_EDGES = 100 38 | 39 | def play_middRout_games_sp(tm_id, env_middRout_sp, agent, timesteps): 40 | demand, source, destination = env_middRout_sp.reset(tm_id) 41 | rewardAddTest = 0 42 | 43 | initMaxUti = env_middRout_sp.edgeMaxUti[2] 44 | OSPF_init = initMaxUti 45 | best_routing = env_middRout_sp.sp_middlepoints_step.copy() 46 | 47 | list_of_demands_to_change = env_middRout_sp.list_eligible_demands 48 | timesteps.append((0, initMaxUti)) 49 | 50 | start = tt.time() 51 | time_start_DRL = start 52 | while 1: 53 | action_dist, tensor = agent.predict(env_middRout_sp, source, destination) 54 | action = torch.argmax(action_dist) 55 | 56 | reward, done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_middRout_sp.step(action, demand, source, destination) 57 | rewardAddTest += reward 58 | if maxLinkUti[2]energy: 110 | energy = env.edge_state[position][0]/link_capacity 111 | position = position + 1 112 | 113 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 114 | if str(source)+':'+str(destination) in env.sp_middlepoints: 115 | middlepoint = env.sp_middlepoints[str(source)+':'+str(destination)] 116 | env.decrease_links_utilization_sp(source, middlepoint, source, destination) 117 | env.decrease_links_utilization_sp(middlepoint, destination, source, destination) 118 | del env.sp_middlepoints[str(source)+':'+str(destination)] 119 | else: # Remove the bandwidth allocated from the src to the destination 120 | env.decrease_links_utilization_sp(source, destination, source, destination) 121 | 122 | # Allocate back the demand whose actions we explored 123 | # If the current demand had a middlepoint, we allocate src-middlepoint-dst 124 | if originalMiddlepoint>=0: 125 | # First we allocate until the middlepoint 126 | env.allocate_to_destination_sp(source, originalMiddlepoint, source, destination) 127 | # Then we allocate from the middlepoint to the destination 128 | env.allocate_to_destination_sp(originalMiddlepoint, destination, source, destination) 129 | # We store that the pair source,destination has a middlepoint 130 | env.sp_middlepoints[str(source)+':'+str(destination)] = originalMiddlepoint 131 | else: 132 | # Then we allocate from the middlepoint to the destination 133 | env.allocate_to_destination_sp(source, destination, source, destination) 134 | 135 | return energy, action, source, destination 136 | 137 | 138 | def play_sp_simulated_annealing_games(tm_id): 139 | env_sim_anneal = gym.make(ENV_SIMM_ANEAL_AGENT) 140 | env_sim_anneal.seed(SEED) 141 | env_sim_anneal.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands) 142 | 143 | init_energy = env_sim_anneal.reset_sp(tm_id) 144 | sim_agent = SIMULATED_ANNEALING_SP(env_sim_anneal) 145 | 146 | Tmax = 1 147 | Tmin = 0.000001 148 | cooling_ratio = 0.000001 # best value is 0.0001 but very slow 149 | T = Tmax 150 | L = 4 # Number of trials per temperature value. With L=3 I get even better results 151 | energy = init_energy 152 | itera = 0 153 | 154 | start = tt.time() 155 | while T>Tmin: 156 | for _ in range(L): 157 | next_energy, action, source, destination = sim_agent.next_state(env_sim_anneal) 158 | delta_energy = (energy-next_energy) 159 | itera += 1 160 | # If we decreased the maximum link utilization we take the action 161 | if delta_energy>0: 162 | # We des-allocate the chosen path to apply later the chosen action 163 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 164 | if str(source)+':'+str(destination) in env_sim_anneal.sp_middlepoints: 165 | middlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 166 | originalMiddlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 167 | env_sim_anneal.decrease_links_utilization_sp(source, middlepoint, source, destination) 168 | env_sim_anneal.decrease_links_utilization_sp(middlepoint, destination, source, destination) 169 | del env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 170 | else: # Remove the bandwidth allocated from the src to the destination 171 | env_sim_anneal.decrease_links_utilization_sp(source, destination, source, destination) 172 | energy = env_sim_anneal.step_sp(action, source, destination) 173 | # If not, accept the action with some probability 174 | elif np.exp(delta_energy/T)>random.uniform(0, 1): 175 | # We des-allocate the chosen path to apply later the chosen action 176 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 177 | if str(source)+':'+str(destination) in env_sim_anneal.sp_middlepoints: 178 | middlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 179 | originalMiddlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 180 | env_sim_anneal.decrease_links_utilization_sp(source, middlepoint, source, destination) 181 | env_sim_anneal.decrease_links_utilization_sp(middlepoint, destination, source, destination) 182 | del env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 183 | else: # Remove the bandwidth allocated from the src to the destination 184 | env_sim_anneal.decrease_links_utilization_sp(source, destination, source, destination) 185 | energy = env_sim_anneal.step_sp(action, source, destination) 186 | T -= cooling_ratio 187 | end = tt.time() 188 | return energy, end-start 189 | 190 | class HILL_CLIMBING: 191 | def __init__(self, env): 192 | self.num_actions = env.K 193 | 194 | def get_value_sp(self, env, source, destination, action): 195 | # We get the K-middlepoints between source-destination 196 | middlePointList = list(env.src_dst_k_middlepoints[str(source) +':'+ str(destination)]) 197 | middlePoint = middlePointList[action] 198 | 199 | # First we allocate until the middlepoint 200 | env.allocate_to_destination_sp(source, middlePoint, source, destination) 201 | # If we allocated to a middlepoint that is not the final destination 202 | if middlePoint!=destination: 203 | # Then we allocate from the middlepoint to the destination 204 | env.allocate_to_destination_sp(middlePoint, destination, source, destination) 205 | # We store that the pair source,destination has a middlepoint 206 | env.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint 207 | 208 | currentValue = -1000000 209 | position = 0 210 | # Get the maximum loaded link and it's value after allocating to the corresponding middlepoint 211 | for i in env.graph: 212 | for j in env.graph[i]: 213 | link_capacity = env.links_bw[i][j] 214 | if env.edge_state[position][0]/link_capacity>currentValue: 215 | currentValue = env.edge_state[position][0]/link_capacity 216 | position = position + 1 217 | 218 | # Dissolve allocation step so that later we can try another action 219 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 220 | if str(source)+':'+str(destination) in env.sp_middlepoints: 221 | middlepoint = env.sp_middlepoints[str(source)+':'+str(destination)] 222 | env.decrease_links_utilization_sp(source, middlepoint, source, destination) 223 | env.decrease_links_utilization_sp(middlepoint, destination, source, destination) 224 | del env.sp_middlepoints[str(source)+':'+str(destination)] 225 | else: # Remove the bandwidth allocated from the src to the destination 226 | env.decrease_links_utilization_sp(source, destination, source, destination) 227 | 228 | return -currentValue 229 | 230 | def explore_neighbourhood_sp(self, env): 231 | dem_iter = 0 232 | nextVal = -1000000 233 | next_state = None 234 | 235 | # Iterate for each demand possible 236 | for source in range(env.numNodes): 237 | for dest in range(env.numNodes): 238 | if source!=dest: 239 | for action in range(len(env.src_dst_k_middlepoints[str(source)+':'+str(dest)])): 240 | middlepoint = -1 241 | # First we need to desallocate the current demand before we explore all it's possible actions 242 | # Check if there is a middlepoint to desallocate from src-middlepoint-dst 243 | if str(source)+':'+str(dest) in env.sp_middlepoints: 244 | middlepoint = env.sp_middlepoints[str(source)+':'+str(dest)] 245 | env.decrease_links_utilization_sp(source, middlepoint, source, dest) 246 | env.decrease_links_utilization_sp(middlepoint, dest, source, dest) 247 | del env.sp_middlepoints[str(source)+':'+str(dest)] 248 | # Else, there is no middlepoint and we desallocate the entire src,dst 249 | else: 250 | # Remove the bandwidth allocated from the src to the destination 251 | env.decrease_links_utilization_sp(source, dest, source, dest) 252 | 253 | evalState = self.get_value_sp(env, source, dest, action) 254 | if evalState > nextVal: 255 | nextVal = evalState 256 | next_state = (action, source, dest) 257 | 258 | # Allocate back the demand whose actions we explored 259 | # If the current demand had a middlepoint, we allocate src-middlepoint-dst 260 | if middlepoint>=0: 261 | # First we allocate until the middlepoint 262 | env.allocate_to_destination_sp(source, middlepoint, source, dest) 263 | # Then we allocate from the middlepoint to the destination 264 | env.allocate_to_destination_sp(middlepoint, dest, source, dest) 265 | # We store that the pair source,destination has a middlepoint 266 | env.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint 267 | else: 268 | # Then we allocate from the middlepoint to the destination 269 | env.allocate_to_destination_sp(source, dest, source, dest) 270 | return nextVal, next_state 271 | 272 | def explore_neighbourhood_DRL_sp(self, env): 273 | dem_iter = 0 274 | nextVal = -1000000 275 | next_state = None 276 | 277 | # We iterate over the top critical demands 278 | for elem in env.list_eligible_demands: 279 | source = elem[0] 280 | dest = elem[1] 281 | for action in range(len(env.src_dst_k_middlepoints[str(source)+':'+str(dest)])): 282 | middlepoint = -1 283 | # First we need to desallocate the current demand before we explore all it's possible actions 284 | # Check if there is a middlepoint to desallocate from src-middlepoint-dst 285 | if str(source)+':'+str(dest) in env.sp_middlepoints: 286 | middlepoint = env.sp_middlepoints[str(source)+':'+str(dest)] 287 | env.decrease_links_utilization_sp(source, middlepoint, source, dest) 288 | env.decrease_links_utilization_sp(middlepoint, dest, source, dest) 289 | del env.sp_middlepoints[str(source)+':'+str(dest)] 290 | # Else, there is no middlepoint and we desallocate the entire src,dst 291 | else: 292 | # Remove the bandwidth allocated from the src to the destination 293 | env.decrease_links_utilization_sp(source, dest, source, dest) 294 | 295 | evalState = self.get_value_sp(env, source, dest, action) 296 | if evalState > nextVal: 297 | nextVal = evalState 298 | next_state = (action, source, dest) 299 | 300 | # Allocate back the demand whose actions we explored 301 | # If the current demand had a middlepoint, we allocate src-middlepoint-dst 302 | if middlepoint>=0: 303 | # First we allocate until the middlepoint 304 | env.allocate_to_destination_sp(source, middlepoint, source, dest) 305 | # Then we allocate from the middlepoint to the destination 306 | env.allocate_to_destination_sp(middlepoint, dest, source, dest) 307 | # We store that the pair source,destination has a middlepoint 308 | env.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint 309 | else: 310 | # Then we allocate from the middlepoint to the destination 311 | env.allocate_to_destination_sp(source, dest, source, dest) 312 | return nextVal, next_state 313 | 314 | def play_sp_hill_climbing_games(tm_id): 315 | # Here we use sp in hill climbing to select the middlepoint and to evaluate 316 | env_hill_climb = gym.make(ENV_SIMM_ANEAL_AGENT) 317 | env_hill_climb.seed(SEED) 318 | env_hill_climb.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands) 319 | 320 | currentVal = env_hill_climb.reset_hill_sp(tm_id) 321 | hill_climb_agent = HILL_CLIMBING(env_hill_climb) 322 | start = tt.time() 323 | while 1: 324 | nextVal, next_state = hill_climb_agent.explore_neighbourhood_sp(env_hill_climb) 325 | # If the difference between the two edges is super small but non-zero, we break (this is because of precision reasons) 326 | if nextVal<=currentVal or (abs((-1)*nextVal-(-1)*currentVal)<1e-4): 327 | break 328 | 329 | # Before we apply the new action, we need to remove the current allocation of the chosen demand 330 | action = next_state[0] 331 | source = next_state[1] 332 | dest = next_state[2] 333 | 334 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 335 | if str(source)+':'+str(dest) in env_hill_climb.sp_middlepoints: 336 | middlepoint = env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 337 | env_hill_climb.decrease_links_utilization_sp(source, middlepoint, source, dest) 338 | env_hill_climb.decrease_links_utilization_sp(middlepoint, dest, source, dest) 339 | del env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 340 | # If there is no middlepoint assigned to the src,dst pair 341 | else: 342 | # Remove the bandwidth allocated from the src to the destination using sp 343 | env_hill_climb.decrease_links_utilization_sp(source, dest, source, dest) 344 | 345 | # We apply the new chosen action to the selected demand 346 | currentVal = env_hill_climb.step_hill_sp(action, source, dest) 347 | end = tt.time() 348 | return currentVal*(-1), end-start 349 | 350 | def play_DRL_GNN_sp_hill_climbing_games(tm_id, best_routing, list_of_demands_to_change, timesteps, time_start_DRL): 351 | # Here we use sp in hill climbing to select the middlepoint and to evaluate 352 | env_hill_climb = gym.make(ENV_SIMM_ANEAL_AGENT) 353 | env_hill_climb.seed(SEED) 354 | env_hill_climb.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands) 355 | 356 | currentVal = env_hill_climb.reset_DRL_hill_sp(tm_id, best_routing, list_of_demands_to_change) 357 | hill_climb_agent = HILL_CLIMBING(env_hill_climb) 358 | start = tt.time() 359 | while 1: 360 | nextVal, next_state = hill_climb_agent.explore_neighbourhood_DRL_sp(env_hill_climb) 361 | # If the difference between the two edges is super small but non-zero, we break (this is because of precision reasons) 362 | if nextVal<=currentVal or (abs((-1)*nextVal-(-1)*currentVal)<1e-4): 363 | break 364 | 365 | # Before we apply the new action, we need to remove the current allocation of the chosen demand 366 | action = next_state[0] 367 | source = next_state[1] 368 | dest = next_state[2] 369 | 370 | # Remove bandwidth allocated until the middlepoint and then from the middlepoint on 371 | if str(source)+':'+str(dest) in env_hill_climb.sp_middlepoints: 372 | middlepoint = env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 373 | env_hill_climb.decrease_links_utilization_sp(source, middlepoint, source, dest) 374 | env_hill_climb.decrease_links_utilization_sp(middlepoint, dest, source, dest) 375 | del env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 376 | # If there is no middlepoint assigned to the src,dst pair 377 | else: 378 | # Remove the bandwidth allocated from the src to the destination using sp 379 | env_hill_climb.decrease_links_utilization_sp(source, dest, source, dest) 380 | 381 | # We apply the new chosen action to the selected demand 382 | currentVal = env_hill_climb.step_hill_sp(action, source, dest) 383 | timer = tt.time() 384 | timesteps.append((timer-time_start_DRL, currentVal*(-1))) 385 | end = tt.time() 386 | return currentVal*(-1), end-start 387 | 388 | class SAPAgent: 389 | def __init__(self, env): 390 | self.K = env.K 391 | 392 | def act(self, env, demand, n1, n2): 393 | pathList = env.allPaths[str(n1) +':'+ str(n2)] 394 | path = 0 395 | allocated = 0 # Indicates 1 if we allocated the demand, 0 otherwise 396 | while allocated==0 and path < len(pathList) and path 1: 406 | can_allocate = 0 407 | break 408 | i = i + 1 409 | j = j + 1 410 | 411 | if can_allocate==1: 412 | return path 413 | path = path + 1 414 | 415 | return -1 416 | 417 | def play_sap_games(tm_id): 418 | env_sap = gym.make(ENV_SAP_AGENT) 419 | env_sap.seed(SEED) 420 | env_sap.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS) 421 | 422 | demand, source, destination = env_sap.reset(tm_id) 423 | sap_Agent = SAPAgent(env_sap) 424 | 425 | rewardAddTest = 0 426 | start = tt.time() 427 | while 1: 428 | action = sap_Agent.act(env_sap, demand, source, destination) 429 | 430 | done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_sap.step(action, demand, source, destination) 431 | if done: 432 | break 433 | end = tt.time() 434 | return maxLinkUti[2], end-start 435 | 436 | def play_middRout_games(tm_id, env_middRout, agent): 437 | demand, source, destination = env_middRout.reset(tm_id) 438 | rewardAddTest = 0 439 | while 1: 440 | # Change to agent.pred_action_node_distrib_sp to choose the middlepoint using only the SPs 441 | action_dist, tensor = agent.pred_action_node_distrib_sp(env_middRout, source, destination) 442 | action = np.argmax(action_dist) 443 | 444 | reward, done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_middRout.step(action, demand, source, destination) 445 | rewardAddTest += reward 446 | if done: 447 | break 448 | return rewardAddTest, maxLinkUti[2], minLinkUti, utiStd 449 | 450 | 451 | if __name__ == "__main__": 452 | 453 | hyper_parameter = { 454 | 'feature_size': 20, 455 | 't': 5, 456 | 'readout_units': 20, 457 | 'episode': 20, 458 | 'lr': 0.0002, 459 | 'lr_decay_rate': 0.96, 460 | 'lr_decay_step': 60, 461 | 'mini_batch': 55, 462 | 'gae_gamma': 0.99, 463 | 'gae_lambda': 0.95, 464 | 'clip_value': 0.5, 465 | 'entropy_beta': 0.01, 466 | 'entropy_step': 60, 467 | 'l2_regular': 0.0001, 468 | 'buffer_size': 0, 469 | 'update_times': 8 470 | } 471 | 472 | # Parse logs and get best model 473 | parser = argparse.ArgumentParser(description='Parse file and create plots') 474 | 475 | parser.add_argument('-t', help='DEFO demands TM file id', type=str, required=True, nargs='+') 476 | parser.add_argument('-g', help='graph topology name', type=str, required=True, nargs='+') 477 | parser.add_argument('-m', help='model id whose weights to load', type=str, required=True, nargs='+') 478 | parser.add_argument('-o', help='Where to store the pckl file', type=str, required=True, nargs='+') 479 | parser.add_argument('-d', help='differentiation string', type=str, required=True, nargs='+') 480 | parser.add_argument('-f', help='general dataset folder name', type=str, required=True, nargs='+') 481 | parser.add_argument('-f2', help='specific dataset folder name', type=str, required=True, nargs='+') 482 | args = parser.parse_args() 483 | 484 | drl_eval_res_folder = args.o[0] 485 | tm_id = int(args.t[0]) 486 | model_id = args.m[0] 487 | differentiation_str = args.d[0] 488 | graph_topology_name = args.g[0] 489 | general_dataset_folder = args.f[0] 490 | specific_dataset_folder = args.f2[0] 491 | 492 | timesteps = list() 493 | results = np.zeros(17) 494 | 495 | ########### The following lines of code is to evaluate a DRL SP-based agent 496 | env_DRL_SP = gym.make(ENV_MIDDROUT_AGENT_SP) 497 | env_DRL_SP.seed(SEED) 498 | env_DRL_SP.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands) 499 | # Set to True f we want to take the top X% of the 5 most loaded links 500 | env_DRL_SP.top_K_critical_demands = True 501 | 502 | DRL_SP_Agent = PPOAC(hyper_parameter) 503 | model_dir = "./models" + differentiation_str 504 | DRL_SP_Agent.actor.load_state_dict(torch.load(model_dir + f"/actor_{model_id}.pt")) 505 | DRL_SP_Agent.actor.eval() 506 | # Restore variables on creation if a checkpoint exists. 507 | print("Restored DRL_SP model ", f"/actor_{model_id}.pt") 508 | 509 | ################################################ 510 | 511 | # We can also use simulated annealing but it is going to take a while 512 | max_link_uti_sim_annealing, optim_cost_SA = 1,1 #play_sp_simulated_annealing_games(tm_id) 513 | 514 | max_link_uti_sp_hill_climb, optim_cost_HILL = play_sp_hill_climbing_games(tm_id) 515 | 516 | max_link_uti_SAP, optim_cost_SAP = 1, 1 #play_sap_games(tm_id) 517 | 518 | max_link_uti_DRL_SP, optim_cost_DRL_GNN, OSPF_init, best_routing, list_of_demands_to_change, time_start_DRL = play_middRout_games_sp(tm_id, env_DRL_SP, DRL_SP_Agent, timesteps) 519 | 520 | max_link_uti_DRL_SP_HILL, optim_cost_DRL_HILL = play_DRL_GNN_sp_hill_climbing_games(tm_id, best_routing, list_of_demands_to_change, timesteps, time_start_DRL) 521 | 522 | new_timesteps = list() 523 | for elem in timesteps: 524 | new_timesteps.append((elem[0], elem[1], time_start_DRL, max_link_uti_DRL_SP)) 525 | 526 | print("MAX UTI abans i despres d'optimitzar: ", OSPF_init, max_link_uti_DRL_SP_HILL, tm_id) 527 | 528 | results[3] = max_link_uti_DRL_SP_HILL 529 | results[4] = max_link_uti_sim_annealing 530 | results[6] = len(env_DRL_SP.defoDatasetAPI.Gbase.edges()) # We store the number of edges to order the figures 531 | results[7] = max_link_uti_sp_hill_climb 532 | results[8] = max_link_uti_SAP 533 | results[9] = max_link_uti_DRL_SP 534 | results[11] = OSPF_init 535 | results[12] = optim_cost_SA 536 | results[13] = optim_cost_SAP 537 | results[14] = optim_cost_DRL_GNN 538 | results[15] = optim_cost_HILL 539 | results[16] = optim_cost_DRL_GNN+optim_cost_DRL_HILL 540 | 541 | path_to_pckl_rewards = drl_eval_res_folder + differentiation_str+ '/'+ graph_topology_name + '/' 542 | if not os.path.exists(path_to_pckl_rewards): 543 | os.makedirs(path_to_pckl_rewards) 544 | 545 | with open(path_to_pckl_rewards + graph_topology_name +'.' + str(tm_id) + ".pckl", 'wb') as f: 546 | pickle.dump(results, f, pickle.HIGHEST_PROTOCOL) 547 | 548 | with open(path_to_pckl_rewards + graph_topology_name +'.' + str(tm_id) + ".timesteps", 'w') as fp: 549 | json.dump(new_timesteps, fp) --------------------------------------------------------------------------------