├── .gitignore
├── Actor.py
├── Critic.py
├── Images
    ├── EVALUATION
    │   └── Enero_3top_15_B_NEW
    │   │   ├── Figure_5.png
    │   │   └── Figure_6.png
    └── TRAINING
    │   └── Enero_3top_15_B_NEW
    │       ├── ACTORLossEnero_3top_15_B_NEW.png
    │       ├── AvgRewardEnero_3top_15_B_NEW.png
    │       ├── AvgStdUtiEnero_3top_15_B_NEW.png
    │       ├── CRITICLossEnero_3top_15_B_NEW.png
    │       ├── ErrorLinksEnero_3top_15_B_NEW.png
    │       ├── Lr_Enero_3top_15_B_NEW.png
    │       ├── MaxLinkUtiEnero_3top_15_B_NEW.png
    │       └── MinLinkUtiEnero_3top_15_B_NEW.png
├── LICENSE
├── Logs
    └── expEnero_3top_15_B_NEWLogs.txt
├── README.md
├── actor_critic.py
├── defo_process_results.py
├── eval.py
├── eval_on_single_topology.py
├── figures_5_and_6.py
├── gym-graph
    ├── gym_graph
    │   ├── __init__.py
    │   └── envs
    │   │   ├── __init__.py
    │   │   ├── environment15.py
    │   │   ├── environment16.py
    │   │   └── environment20.py
    └── setup.py
├── gym_env
    ├── gym_env
    │   ├── __init__.py
    │   └── envs
    │   │   ├── __init__.py
    │   │   └── env1.py
    └── setup.py
├── main.py
├── modelsEnero_3top_15_B_NEW
    ├── actor_60.pt
    ├── actor_final.pt
    ├── critic_60.pt
    └── critic_final.pt
├── parse_PPO.py
├── requitrment.txt
├── runs
    ├── 1
    │   ├── events.out.tfevents.1650360660.barry.182599.0
    │   └── events.out.tfevents.1650363931.barry.184203.0
    └── 2
    │   └── events.out.tfevents.1650452288.barry.199430.0
└── script_eval_on_single_topology.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | test.py
3 | gym_env/gym_env.egg-info/
4 | 


--------------------------------------------------------------------------------
/Actor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Actor(nn.Module):
 7 |     def __init__(self, feature_size=20, t=4, readout_units=20):
 8 |         super(Actor, self).__init__()
 9 |         self.feature_size = feature_size
10 |         self.t = t
11 |         self.readout_units = readout_units
12 |         self.message = nn.Sequential(
13 |             nn.Linear(feature_size*2, feature_size),
14 |             nn.SELU()
15 |         )
16 |         self.message.apply(self._init_hidden_weights)
17 |         self.update = nn.GRUCell(input_size=feature_size, hidden_size=feature_size)
18 |         self.update.apply(self._init_hidden_weights)
19 |         self.readout = nn.Sequential(
20 |             nn.Linear(feature_size, self.readout_units),
21 |             nn.SELU(),
22 |             nn.Linear(self.readout_units, self.readout_units),
23 |             nn.SELU()
24 |         )
25 |         self.readout.apply(self._init_hidden_weights)
26 |         self.out_layer = nn.Linear(self.readout_units, 1)
27 |         torch.nn.init.orthogonal_(self.out_layer.weight, gain=np.sqrt(0.01))
28 |         torch.nn.init.constant_(self.out_layer.bias, 0)
29 | 
30 |     def _init_hidden_weights(self, m):
31 |         if isinstance(m, nn.Linear):
32 |             torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2))
33 |             torch.nn.init.constant_(m.bias, 0)
34 |         if isinstance(m, nn.GRUCell):
35 |             torch.nn.init.xavier_uniform_(m.weight_ih)
36 |             torch.nn.init.xavier_uniform_(m.weight_hh)
37 |             torch.nn.init.constant_(m.bias_ih, 0)
38 |             torch.nn.init.constant_(m.bias_hh, 0)
39 | 
40 |     def forward(self, x):
41 |         state = x['link_state']
42 |         first = x['first'].unsqueeze(1).expand(-1, x['state_dim'])
43 |         second = x['second'].unsqueeze(1).expand(-1, x['state_dim'])
44 |         graph_id = x['graph_id'].unsqueeze(1).expand(-1, x['state_dim'])
45 | 
46 |         for _ in range(self.t):
47 |             main_edges = torch.gather(state, 0, first)
48 |             neigh_edges = torch.gather(state, 0, second)
49 |             edges_concat = torch.cat((main_edges, neigh_edges), 1)
50 |             m = self.message(edges_concat)
51 | 
52 |             m = torch.zeros(state.shape, dtype=m.dtype, device=state.device).scatter_add_(0, second, m)
53 |             state = self.update(m, state)
54 | 
55 |         feature = torch.zeros((x['num_actions'], x['state_dim']), dtype=state.dtype,
56 |                               device=state.device).scatter_add_(0, graph_id, state)
57 |         output = self.out_layer(self.readout(feature))
58 | 
59 |         return output
60 | 
61 | 
62 | """
63 | 
64 | link_state:
65 |     link_capacity: float
66 |     link_utilization: float
67 |     action: mark bw
68 |     bw: float
69 | 
70 | input:
71 |     link_state
72 |     pair: [0, 1] => [[0, 0], [1, 1]]
73 | 
74 | """


--------------------------------------------------------------------------------
/Critic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Critic(nn.Module):
 7 |     def __init__(self, feature_size=20, t=4, readout_units=20):
 8 |         super(Critic, self).__init__()
 9 |         self.feature_size = feature_size
10 |         self.t = t
11 |         self.readout_units = readout_units
12 |         self.message = nn.Sequential(
13 |             nn.Linear(feature_size*2, feature_size),
14 |             nn.SELU()
15 |         )
16 |         self.message.apply(self._init_hidden_weights)
17 |         self.update = nn.GRUCell(input_size=feature_size, hidden_size=feature_size)
18 |         self.update.apply(self._init_hidden_weights)
19 |         self.readout = nn.Sequential(
20 |             nn.Linear(feature_size, self.readout_units),
21 |             nn.SELU(),
22 |             nn.Linear(self.readout_units, self.readout_units),
23 |             nn.SELU()
24 |         )
25 |         self.readout.apply(self._init_hidden_weights)
26 |         self.out_layer = nn.Linear(self.readout_units, 1)
27 |         torch.nn.init.orthogonal_(self.out_layer.weight, gain=np.sqrt(1))
28 |         torch.nn.init.constant_(self.out_layer.weight, 0)
29 | 
30 |     def _init_hidden_weights(self, m):
31 |         if isinstance(m, nn.Linear):
32 |             torch.nn.init.orthogonal_(m.weight, gain=np.sqrt(2))
33 |             torch.nn.init.constant_(m.bias, 0)
34 |         if isinstance(m, nn.GRUCell):
35 |             torch.nn.init.xavier_uniform_(m.weight_ih)
36 |             torch.nn.init.xavier_uniform_(m.weight_hh)
37 |             torch.nn.init.constant_(m.bias_ih, 0)
38 |             torch.nn.init.constant_(m.bias_hh, 0)
39 | 
40 |     def forward(self, x):
41 |         state = x['link_state']
42 |         first = x['first'].unsqueeze(1).expand(-1, x['state_dim'])
43 |         second = x['second'].unsqueeze(1).expand(-1, x['state_dim'])
44 | 
45 |         for _ in range(self.t):
46 |             main_edges = torch.gather(state, 0, first)
47 |             neigh_edges = torch.gather(state, 0, second)
48 |             edges_concat = torch.cat((main_edges, neigh_edges), 1)
49 |             m = self.message(edges_concat)
50 | 
51 |             m = torch.zeros(state.shape, dtype=m.dtype, device=state.device).scatter_add_(0, second, m)
52 |             state = self.update(m, state)
53 | 
54 |         feature = torch.sum(state, 0)
55 |         output = self.out_layer(self.readout(feature))
56 | 
57 |         return output
58 | 
59 | 
60 | """
61 | 
62 | link_state:
63 |     link_capacity: float
64 |     link_utilization: float
65 |     action: mark bw
66 |     bw: float
67 | 
68 | input:
69 |     link_state
70 |     pair: [0, 1] => [[0, 0], [1, 1]]
71 | 
72 | """


--------------------------------------------------------------------------------
/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_5.png


--------------------------------------------------------------------------------
/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/EVALUATION/Enero_3top_15_B_NEW/Figure_6.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/ACTORLossEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/ACTORLossEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/AvgRewardEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/AvgRewardEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/AvgStdUtiEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/AvgStdUtiEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/CRITICLossEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/CRITICLossEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/ErrorLinksEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/ErrorLinksEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/Lr_Enero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/Lr_Enero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/MaxLinkUtiEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/MaxLinkUtiEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/Images/TRAINING/Enero_3top_15_B_NEW/MinLinkUtiEnero_3top_15_B_NEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/Images/TRAINING/Enero_3top_15_B_NEW/MinLinkUtiEnero_3top_15_B_NEW.png


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Barry0310
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | tags: Git
 3 | ---
 4 | # DRL-GNN-implement
 5 | 
 6 | - ENERO DRL agent tensorflow ver. rewrite to pytorch
 7 | 
 8 | - Reference:
 9 |     - [Towards Real-Time Routing Optimization with
10 | Deep Reinforcement Learning: Open Challenges](https://arxiv.org/pdf/2106.09754.pdf)
11 |     - [ENERO: Efficient Real-Time WAN Routing
12 | Optimization with Deep Reinforcement Learning](https://arxiv.org/pdf/2109.10883.pdf)
13 |     - https://github.com/BNN-UPC/ENERO
14 |     
15 | - train: `python train.py`
16 | - eval: `python eval.py`


--------------------------------------------------------------------------------
/actor_critic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from Actor import Actor
  4 | from Critic import Critic
  5 | import torch.optim as optim
  6 | from collections import deque
  7 | import gc
  8 | 
  9 | 
 10 | class PPOAC:
 11 |     def __init__(self, hyper_parameter, device=None):
 12 |         H = hyper_parameter
 13 |         self.gae_gamma = H['gae_gamma']
 14 |         self.gae_lambda = H['gae_lambda']
 15 |         self.clip_value = H['clip_value']
 16 |         self.mini_batch = H['mini_batch']
 17 |         self.feature_size = H['feature_size']
 18 |         self.entropy_beta = H['entropy_beta']
 19 |         self.buffer_size = H['buffer_size']
 20 |         self.update_times = H['update_times']
 21 |         self.actor = Actor(feature_size=self.feature_size, t=H['t'], readout_units=H['readout_units'])
 22 |         self.critic = Critic(feature_size=self.feature_size, t=H['t'], readout_units=H['readout_units'])
 23 |         self.optimizer = optim.AdamW(list(self.actor.parameters()) + list(self.critic.parameters()), lr=H['lr'])
 24 |         self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=H['lr_decay_step'],
 25 |                                                    gamma=H['lr_decay_rate'])
 26 | 
 27 |         self.buffer = deque(maxlen=self.buffer_size)
 28 |         self.buffer_index = np.arange(self.buffer_size)
 29 |         self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")
 30 |         self.actor.to(self.device)
 31 |         self.critic.to(self.device)
 32 | 
 33 |     def old_cummax(self, alist, extractor):
 34 |         maxes = torch.tensor([torch.amax(extractor(v)) + 1 for v in alist])
 35 |         cummaxes = [torch.zeros_like(maxes[0])]
 36 |         for i in range(len(maxes) - 1):
 37 |             cummaxes.append(torch.sum(maxes[0:i + 1]))
 38 |         return torch.tensor(cummaxes)
 39 | 
 40 |     def predict(self, env, src, dst):
 41 |         list_k_features = []
 42 | 
 43 |         middle_point_list = env.src_dst_k_middlepoints[str(src) + ':' + str(dst)]
 44 |         for mid in range(len(middle_point_list)):
 45 |             env.mark_action_sp(src, middle_point_list[mid], src, dst)
 46 |             if middle_point_list[mid] != dst:
 47 |                 env.mark_action_sp(middle_point_list[mid], dst, src, dst)
 48 |             features = self.actor_get_graph_features(env)
 49 |             list_k_features.append(features)
 50 |             env.edge_state[:, 2] = 0
 51 | 
 52 |         graph_ids = [torch.full([list_k_features[it]['link_state'].shape[0]], it) for it in range(len(list_k_features))]
 53 | 
 54 |         first_offset = self.old_cummax(list_k_features, lambda v: v['first'])
 55 |         second_offset = self.old_cummax(list_k_features, lambda v: v['second'])
 56 |         tensor = {
 57 |             'graph_id': torch.cat([v for v in graph_ids], dim=0).to(self.device),
 58 |             'link_state': torch.cat([v['link_state'] for v in list_k_features], dim=0).to(self.device),
 59 |             'first': torch.cat([v['first'] + m for v, m in zip(list_k_features, first_offset)], dim=0,).to(self.device),
 60 |             'second': torch.cat([v['second'] + m for v, m in zip(list_k_features, second_offset)], dim=0).to(self.device),
 61 |             'state_dim': self.feature_size,
 62 |             'num_actions': len(middle_point_list),
 63 |         }
 64 |         q_values = self.actor(tensor)
 65 |         q_values = torch.reshape(q_values, (-1, ))
 66 |         soft_max_q_values = torch.nn.functional.softmax(q_values, dim=0)
 67 | 
 68 |         return soft_max_q_values, tensor
 69 | 
 70 |     def actor_get_graph_features(self, env):
 71 |         temp = {
 72 |             'num_edges': env.numEdges,
 73 |             'length': env.firstTrueSize,
 74 |             'capacity': env.link_capacity_feature,
 75 |             'bw_allocated': env.edge_state[:,2],
 76 |             'utilization': np.divide(env.edge_state[:,0], env.edge_state[:, 1]),
 77 |             'first': env.first,
 78 |             'second': env.second
 79 |         }
 80 | 
 81 |         temp['utilization'] = torch.reshape(torch.tensor(temp['utilization'][0:temp['num_edges']], dtype=torch.float32),
 82 |                                             (temp['num_edges'], 1))
 83 |         temp['capacity'] = torch.reshape(torch.tensor(temp['capacity'][0:temp['num_edges']], dtype=torch.float32),
 84 |                                          (temp['num_edges'], 1))
 85 |         temp['bw_allocated'] = torch.reshape(torch.tensor(temp['bw_allocated'][0:temp['num_edges']],
 86 |                                                           dtype=torch.float32), (temp['num_edges'], 1))
 87 | 
 88 |         hidden_states = torch.cat([temp['utilization'], temp['capacity'], temp['bw_allocated']], dim=1)
 89 |         link_state = torch.nn.functional.pad(hidden_states, (0, self.feature_size - 3), 'constant')
 90 | 
 91 |         inputs = {'link_state': link_state, 'first': torch.tensor(temp['first'][0:temp['length']]),
 92 |                   'second': torch.tensor(temp['second'][0:temp['length']])}
 93 | 
 94 |         return inputs
 95 | 
 96 |     def critic_get_graph_features(self, env):
 97 |         temp = {
 98 |             'num_edges': env.numEdges,
 99 |             'length': env.firstTrueSize,
100 |             'capacity': env.link_capacity_feature,
101 |             'utilization': np.divide(env.edge_state[:, 0], env.edge_state[:, 1]),
102 |             'first': env.first,
103 |             'second': env.second
104 |         }
105 | 
106 |         temp['utilization'] = torch.reshape(torch.tensor(temp['utilization'][0:temp['num_edges']], dtype=torch.float32),
107 |                                             [temp['num_edges'], 1])
108 |         temp['capacity'] = torch.reshape(torch.tensor(temp['capacity'][0:temp['num_edges']], dtype=torch.float32),
109 |                                          [temp['num_edges'], 1])
110 | 
111 |         hidden_states = torch.cat([temp['utilization'], temp['capacity']], dim=1)
112 |         link_state = torch.nn.functional.pad(hidden_states, (0, self.feature_size - 2), 'constant')
113 | 
114 |         inputs = {'link_state': link_state.to(self.device),
115 |                   'first': torch.tensor(temp['first'][0:temp['length']]).to(self.device),
116 |                   'second': torch.tensor(temp['second'][0:temp['length']]).to(self.device),
117 |                   'state_dim': self.feature_size}
118 | 
119 |         return inputs
120 | 
121 |     def compute_gae(self, values, masks, rewards):
122 |         returns = []
123 |         gae = 0
124 | 
125 |         for i in reversed(range(len(rewards))):
126 |             delta = rewards[i] + self.gae_gamma * values[i+1] * masks[i] - values[i]
127 |             gae = delta + self.gae_gamma * self.gae_lambda * masks[i] * gae
128 |             returns.insert(0, gae + values[i])
129 | 
130 |         adv = np.array(returns) - values[:-1]
131 | 
132 |         return returns, (adv - np.mean(adv)) / (np.std(adv) + 1e-10)
133 | 
134 |     def _compute_actor_loss(self, adv, old_act, old_policy_probs, link_state, graph_id,
135 |                             first, second, state_dim, num_actions):
136 |         old_policy_probs = old_policy_probs.detach()
137 | 
138 |         q_values = self.actor({
139 |             'graph_id': graph_id,
140 |             'link_state': link_state,
141 |             'first': first,
142 |             'second': second,
143 |             'state_dim': state_dim,
144 |             'num_actions': num_actions,
145 |         })
146 |         q_values = torch.reshape(q_values, (-1,))
147 |         new_policy_probs = torch.nn.functional.softmax(q_values, dim=0)
148 | 
149 |         ratio = torch.exp(
150 |             torch.log(torch.sum(old_act * new_policy_probs)) - torch.log(torch.sum(old_act * old_policy_probs))
151 |         )
152 |         surr1 = -ratio*adv
153 |         surr2 = -torch.clip(ratio, min=1-0.1, max=1+0.1) * adv
154 | 
155 |         loss = torch.max(surr1, surr2)
156 |         entropy = -torch.sum(torch.log(new_policy_probs) * new_policy_probs)
157 | 
158 |         return loss, entropy
159 | 
160 |     def _compute_critic_loss(self, ret, link_state, first, second, state_dim):
161 | 
162 |         value = self.critic({
163 |             'link_state': link_state,
164 |             'first': first,
165 |             'second': second,
166 |             'state_dim': state_dim
167 |         })[0]
168 |         loss = torch.square(ret - value)
169 | 
170 |         return loss
171 | 
172 |     def update(self, actions, actions_probs, tensors, critic_features, returns, advantages):
173 | 
174 |         for pos in range(self.buffer_size):
175 |             tensor = tensors[pos]
176 |             critic_feature = critic_features[pos]
177 |             action = actions[pos]
178 |             ret = returns[pos]
179 |             adv = advantages[pos]
180 |             action_dist = actions_probs[pos]
181 | 
182 |             update_tensor = {
183 |                 'graph_id': tensor['graph_id'],
184 |                 'link_state': tensor['link_state'],
185 |                 'first': tensor['first'],
186 |                 'second': tensor['second'],
187 |                 'state_dim': tensor['state_dim'],
188 |                 'num_actions': tensor['num_actions'],
189 |                 'link_state_critic': critic_feature['link_state'],
190 |                 'old_act': action.to(self.device),
191 |                 'adv': adv,
192 |                 'old_policy_probs': action_dist,
193 |                 'first_critic': critic_feature['first'],
194 |                 'second_critic': critic_feature['second'],
195 |                 'ret': ret,
196 |             }
197 | 
198 |             self.buffer.append(update_tensor)
199 | 
200 |         for i in range(self.update_times):
201 |             np.random.shuffle(self.buffer_index)
202 |             for start in range(0, self.buffer_size, self.mini_batch):
203 |                 end = start + self.mini_batch
204 |                 entropy = 0
205 |                 actor_loss = 0
206 |                 critic_loss = 0
207 |                 for index in self.buffer_index[start:end]:
208 |                     sample = self.buffer[index]
209 | 
210 |                     sample_actor_loss, sample_entropy = self._compute_actor_loss(sample['adv'], sample['old_act'],
211 |                                                                                  sample['old_policy_probs'],
212 |                                                                                  sample['link_state'],
213 |                                                                                  sample['graph_id'], sample['first'],
214 |                                                                                  sample['second'], sample['state_dim'],
215 |                                                                                  sample['num_actions'])
216 |                     sample_critic_loss = self._compute_critic_loss(sample['ret'], sample['link_state_critic'],
217 |                                                                    sample['first_critic'], sample['second_critic'],
218 |                                                                    sample['state_dim'])
219 |                     entropy += sample_entropy
220 |                     actor_loss += sample_actor_loss
221 |                     critic_loss += sample_critic_loss
222 | 
223 |                 entropy /= self.mini_batch
224 |                 actor_loss = actor_loss / self.mini_batch - self.entropy_beta * entropy
225 |                 critic_loss /= self.mini_batch
226 | 
227 |                 total_loss = actor_loss + critic_loss
228 |                 self.optimizer.zero_grad()
229 |                 total_loss.backward()
230 |                 torch.nn.utils.clip_grad_norm_(list(self.actor.parameters())+list(self.critic.parameters()),
231 |                                                max_norm=self.clip_value)
232 |                 self.optimizer.step()
233 | 
234 |         self.buffer.clear()
235 |         gc.collect()
236 |         return actor_loss, critic_loss
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------
/defo_process_results.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | import numpy as np
  4 | import re
  5 | import sys
  6 | import networkx as nx
  7 | 
  8 | node_to_index_dic = {}
  9 | index_to_node_lst = []
 10 | 
 11 | def index_to_node(n):
 12 |     return(index_to_node_lst[n])
 13 | 
 14 | def node_to_index(node):
 15 |     return(node_to_index_dic[node])
 16 | 
 17 | 
 18 | class Defo_results:
 19 |     
 20 |     net_size = 0
 21 |     MP_matrix = None
 22 |     ecmp_routing_matrix = None
 23 |     routing_matrix = None
 24 |     links_bw = None
 25 |     links_weight = None
 26 |     Gbase = None
 27 |     node_to_index_dic_pvt = None
 28 |     index_to_node_lst_pvt = None
 29 |     pre_optim_max_load_link = None
 30 |     post_optim_max_load_link = None
 31 |     
 32 |     def __init__(self, graph_file, results_file):
 33 |         self.graph_file = graph_file
 34 |         # We comment it as we don't use the results for now. We focus on SP
 35 |         #self.results_file = results_file
 36 |         self.Gbase = nx.MultiDiGraph()
 37 |         self.process_graph_file()
 38 |         
 39 |         #self.process()
 40 |     
 41 |     def read_max_load_link (self, standard_out_file):
 42 |         with open(standard_out_file) as fd:
 43 |             while (True):
 44 |                 line = fd.readline()
 45 |                 if line.startswith("pre-optimization"):
 46 |                     camps = line.split(" ")
 47 |                     print(camps)
 48 |                     self.pre_optim_max_load_link = float(camps[-1].split('\n')[0])
 49 |                 elif line.startswith("post-optimization"):
 50 |                     camps = line.split(" ")
 51 |                     self.post_optim_max_load_link = float(camps[-1].split('\n')[0])
 52 |                     break
 53 |         return (self.pre_optim_max_load_link, self.post_optim_max_load_link)
 54 |     
 55 |     def process_graph_file(self):
 56 |         with open(self.graph_file) as fd:
 57 |             line = fd.readline()
 58 |             camps = line.split(" ")
 59 |             self.net_size = int(camps[1])
 60 |             # Remove : label x y
 61 |             line = fd.readline()
 62 |             
 63 |             for i in range (self.net_size):
 64 |                 line = fd.readline()
 65 |                 node = line[0:line.find(" ")]
 66 |                 node_to_index_dic[node] = i
 67 |                 index_to_node_lst.append(node)
 68 |                 
 69 |             self.links_bw = []
 70 |             self.links_weight = []
 71 |             for i in range(self.net_size):
 72 |                 self.links_bw.append({})
 73 |                 self.links_weight.append({})
 74 |             for line in fd:
 75 |                 if (not line.startswith("Link_") and not line.startswith("edge_")):
 76 |                     continue
 77 |                 camps = line.split(" ")
 78 |                 src = int(camps[1])
 79 |                 dst = int(camps[2])
 80 |                 weight = int(camps[3])
 81 |                 bw = float(camps[4])
 82 |                 self.Gbase.add_edge(src, dst)
 83 |                 self.links_bw[src][dst] = bw
 84 |                 self.links_weight[src][dst] = weight
 85 |         self.node_to_index_dic_pvt = node_to_index_dic
 86 |         self.index_to_node_lst_pvt = index_to_node_lst
 87 |                 
 88 |     def process (self):
 89 |         with open(self.results_file) as fd:
 90 |             while (True):
 91 |                 line = fd.readline()
 92 |                 if (line == ""):
 93 |                     break
 94 |                 if (line.startswith("*")):
 95 |                     if (line == "***Next hops priority 2 (sr paths)***\n"):
 96 |                         self._read_middle_points(fd)
 97 |                     if (line == "***Next hops priority 3 (ecmp paths)***\n"):
 98 |                         self._read_ecmp_routing(fd)
 99 |                         break
100 |         self._gen_routing_matrix()
101 | 
102 |     def _read_middle_points(self,fd):
103 |         self.MP_matrix = np.zeros((self.net_size,self.net_size),dtype="object")
104 |         while (True):
105 |             pos = fd.tell()
106 |             line = fd.readline()
107 |             if (line.startswith("*")):
108 |                 fd.seek(pos)
109 |                 return
110 |             if (not line.startswith("seq")):
111 |                 continue
112 |             line = line[line.find(": ")+2:]
113 |             if (line[-1]=='\n'):
114 |                 line = line[:-1]
115 |             
116 |             ptr = 0
117 |             mp_path = []
118 |             while (True):
119 |                 prev_ptr = ptr
120 |                 ptr = line.find(" -> ",ptr)
121 |                 if (ptr == -1):
122 |                     mp_path.append(line[prev_ptr:])
123 |                     break
124 |                 else:
125 |                     mp_path.append(line[prev_ptr:ptr])
126 |                     ptr += 4
127 |             src = node_to_index(mp_path[0])
128 |             dst = node_to_index(mp_path[-1])
129 |             self.MP_matrix[src,dst] = mp_path
130 |         
131 |     
132 |     def _read_ecmp_routing(self,fd):
133 |         self.ecmp_routing_matrix = np.zeros((self.net_size,self.net_size),dtype="object")
134 |         next_node_matrix = np.zeros((self.net_size,self.net_size),dtype="object")
135 |         dst_node = None
136 |         while (True):
137 |             line = fd.readline()
138 |             if (line == ""):
139 |                 break
140 |             if (line.startswith("Destination")):
141 |                 dst_node_str = line[line.find(" ")+1:-1]
142 |                 dst_node = node_to_index(dst_node_str)
143 |             if (line.startswith("node")):
144 |                 src_node_str = line[6:line.find(", ")]
145 |                 src_node = node_to_index(src_node_str)
146 |                 sub_line = line[line.find("[")+1:line.find("]")]
147 |                 ptr = 0
148 |                 next_node_lst = []
149 |                 while (True):
150 |                     prev_ptr = ptr
151 |                     ptr = sub_line.find(", ",ptr)
152 |                     if (ptr == -1):
153 |                         next_node_lst.append(sub_line[prev_ptr:])
154 |                         break
155 |                     else:
156 |                         next_node_lst.append(sub_line[prev_ptr:ptr])
157 |                         ptr += 2
158 | 
159 |                 next_node_matrix[src_node,dst_node] = next_node_lst
160 | 
161 |         for i in range (self.net_size):
162 |             for j in range (self.net_size):
163 |                 end_paths = []
164 |                 paths_info = [{"path":[index_to_node(i)],"proportion":1.0}]
165 |                 while (len(paths_info) != 0):
166 |                     for path_info in paths_info:
167 |                         path = path_info["path"]
168 |                         if (node_to_index(path[-1]) == j):
169 |                             paths_info.remove(path_info)
170 |                             end_paths.append(path_info)
171 |                             continue
172 |                         next_lst = next_node_matrix[node_to_index(path[-1]),j]
173 |                         num_next_hops = len(next_lst)
174 |                         if (num_next_hops > 1):
175 |                             for next_node in next_lst:
176 |                                 new_path = list(path)
177 |                                 new_path.append(next_node)
178 |                                 paths_info.append({"path":new_path,"proportion":path_info["proportion"]/num_next_hops})
179 |                             paths_info.remove(path_info)
180 |                         else:
181 |                             path.append(next_lst[0])
182 |                 self.ecmp_routing_matrix[i,j] = end_paths
183 |         
184 |     def _gen_routing_matrix(self):
185 |         self.routing_matrix = np.zeros((self.net_size,self.net_size),dtype="object")
186 |         for i in range(self.net_size):
187 |             for j in range(self.net_size):
188 |                 if (i == j):
189 |                     continue
190 |                 end_path_info_list = []
191 |                 mp_path = self.MP_matrix[i,j]
192 |                 #print (i,j,mp_path)
193 |                 src_mp = mp_path[0]
194 |                 for mp in mp_path:
195 |                     dst_mp = mp
196 |                     sub_path_info_lst =  self.ecmp_routing_matrix[node_to_index(src_mp),node_to_index(dst_mp)]
197 |                     if (len(end_path_info_list) == 0):
198 |                         for sub_path_info in sub_path_info_lst:
199 |                             end_path_info_list.append({"path":sub_path_info["path"][:-1],"proportion":sub_path_info["proportion"]})
200 |                     elif (len(sub_path_info_lst) > 1):
201 |                         aux_end_path_list = []
202 |                         for path_info in end_path_info_list:
203 |                             for sub_path_info in sub_path_info_lst:
204 |                                 new_path = list(path_info["path"])
205 |                                 new_path.extend(sub_path_info["path"][:-1])
206 |                                 aux_end_path_list.append({"path":new_path,"proportion":path_info["proportion"]*sub_path_info["proportion"]})
207 |                         end_path_info_list = aux_end_path_list
208 |                     else:
209 |                         for path_info in end_path_info_list:
210 |                             path_info["path"].extend(sub_path_info_lst[0]["path"][:-1])
211 |                     src_mp = dst_mp
212 |                 for path_info in end_path_info_list:
213 |                     path_info["path"].append(dst_mp)
214 |                 self.routing_matrix[i,j] = end_path_info_list
215 |     
216 |     def _get_traffic_matrix (self,traffic_file):
217 |         tm = np.zeros((self.net_size,self.net_size))
218 |         with open(traffic_file) as fd:
219 |             fd.readline()
220 |             fd.readline()
221 |             for line in fd:
222 |                 camps = line.split(" ")
223 |                 # We force that the bws are integers
224 |                 tm[int(camps[1]),int(camps[2])] = np.floor(float(camps[3]))
225 |         return (tm)
226 |     
227 |     def _link_utilization(self, routing_matrix, traffic_file):
228 |         link_utilization = []
229 |         traffic_matrix = self._get_traffic_matrix(traffic_file)
230 |         for i in range(self.net_size):
231 |             link_utilization.append({})
232 |         for i in range(self.net_size):
233 |             for j in range (self.net_size):
234 |                 if (i==j):
235 |                     continue
236 |                 traffic_all_path = traffic_matrix[i,j]
237 |                 routings_lst = routing_matrix[i,j]
238 |                 for path_info in routings_lst:
239 |                     path = path_info["path"]
240 |                     traffic = traffic_all_path*path_info["proportion"]
241 |                     n0 = path[0]
242 |                     for n1 in path[1:]:
243 |                         N0 = node_to_index(n0)
244 |                         N1 = node_to_index(n1)
245 |                         if N1 in link_utilization[N0]:
246 |                             link_utilization[N0][N1] += traffic
247 |                         else:
248 |                             link_utilization[N0][N1] = traffic
249 |                         n0 = n1
250 |         max_lu = (0,0,0)
251 |         for i in range(self.net_size):
252 |             for j in link_utilization[i].keys():
253 |                 link_traffic = link_utilization[i][j]
254 |                 link_capacity = self.links_bw[i][j]
255 |                 link_utilization[i][j] = link_traffic / link_capacity
256 |                 if (link_utilization[i][j] > max_lu[0]):
257 |                     max_lu = (link_utilization[i][j], i, j)
258 |         #return (link_utilization, max_lu)
259 |         return (max_lu)
260 |     
261 |     def get_opt_link_utilization(self,traffic_file):
262 |         return (self._link_utilization(self.routing_matrix,traffic_file))
263 |     
264 |     def get_direct_link_utilization(self,traffic_file):
265 |         return (self._link_utilization(self.ecmp_routing_matrix,traffic_file))
266 | 
267 | if (__name__ == "__main__"):
268 |     
269 |     args = sys.argv
270 |     if ("-h" in args):
271 |         print ("HELP:   python3 ./defo_process_results.py <graph_file> <results_file> <tm_file>")
272 |         exit()
273 |     
274 |     # graph_file = args[1]
275 |     # results_file = args[2]
276 |     # tm_file = args[3]
277 |     
278 |     # results = Defo_results(graph_file,results_file)
279 |     
280 |     # print ("============== Direct =====================")
281 |     # print (results.get_direct_link_utilization(tm_file))
282 |     # print ("============== Optim =====================")
283 |     # print (results.get_opt_link_utilization(tm_file))
284 | 
285 |     for tm_id in range(1):
286 |         graph_topology_name = "VisionNet"
287 |         graph_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/"+graph_topology_name+".graph"
288 |         results_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/res_"+graph_topology_name+"_"+str(tm_id)
289 |         tm_file = "../DEFOResults/results-1-link_capacity-unif-05-1-zoo/"+graph_topology_name+"/"+graph_topology_name+"."+str(tm_id)+".demands"
290 |         results = Defo_results(graph_file,results_file)
291 |         num_demands_changed = 0
292 |         for i in range(results.net_size):
293 |             for j in range (results.net_size):
294 |                 if (i!=j):
295 |                     if len(results.MP_matrix[i,j])>2:
296 |                         num_demands_changed+=1
297 |         print("For tm_id: ", tm_id, " we have changed ", num_demands_changed, " demands")
298 |                     
299 |     


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | if __name__ == "__main__":
 4 |     topo = ["NEW_EliBackbone/EVALUATE", "NEW_Janetbackbone/EVALUATE", "NEW_HurricaneElectric/EVALUATE"]
 5 |     log = "Enero_3top_15_B_NEW"
 6 |     subprocess.call(["python", "parse_PPO.py", "-d", "./Logs/exp" + log + "Logs.txt"])
 7 |     for t in topo:
 8 |         subprocess.call(["python", "eval_on_single_topology.py",
 9 |                          "-max_edge", "100", "-min_edge", "5",
10 |                          "-max_nodes", "30", "-min_nodes", "1",
11 |                          "-n", "2",
12 |                          "-f1", "results_single_top", "-f2", t,
13 |                          "-d", f"./Logs/exp{log}Logs.txt"])
14 |     subprocess.call(["python", "figures_5_and_6.py", "-d", log])
15 | 


--------------------------------------------------------------------------------
/eval_on_single_topology.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import argparse
 4 | from multiprocessing import Pool
 5 | 
 6 | def worker_execute(args):
 7 |     tm_id = args[0]
 8 |     model_id = args[1]
 9 |     drl_eval_res_folder = args[2]
10 |     differentiation_str = args[3]
11 |     graph_topology_name = args[4]
12 |     general_dataset_folder = args[5]
13 |     specific_dataset_folder = args[6]
14 | 
15 |     subprocess.call(["python", "script_eval_on_single_topology.py", "-t", str(tm_id), "-m", str(model_id), "-g", graph_topology_name, "-o", drl_eval_res_folder, "-d", differentiation_str, "-f", general_dataset_folder, "-f2", specific_dataset_folder])
16 | 
17 | if __name__ == "__main__":
18 |     # First we execute this script to evaluate our drl agent over different topologies from the folder (argument -f2)
19 |     # python eval_on_single_topology.py -max_edge 100 -min_edge 5 -max_nodes 30 -min_nodes 1 -n 2 -f1 results_single_top -f2 NEW_Garr199905/EVALUATE -d ./Logs/expSP_3top_15_B_NEWLogs.txt
20 |     # To parse the results of this script, we must then execute the parse_middrouting_files.py file
21 |     
22 |     # Parse logs and get best model
23 |     parser = argparse.ArgumentParser(description='Parse file and create plots')
24 | 
25 |     parser.add_argument('-d', help='logs data file', type=str, required=True, nargs='+')
26 |     parser.add_argument('-f1', help='Dataset name within dataset_sing_top', type=str, required=True, nargs='+')
27 |     parser.add_argument('-f2', help='specific dataset folder name of the topology to evaluate on', type=str, required=True, nargs='+')
28 |     parser.add_argument('-max_edge', help='maximum number of edges the topology can have', type=int, required=True, nargs='+')
29 |     parser.add_argument('-min_edge', help='minimum number of edges the topology can have', type=int, required=True, nargs='+')
30 |     parser.add_argument('-max_nodes', help='minimum number of nodes the topology can have', type=int, required=True, nargs='+')
31 |     parser.add_argument('-min_nodes', help='minimum number of nodes the topology can have', type=int, required=True, nargs='+')
32 |     parser.add_argument('-n', help='number of processes to use for the pool (number of DEFO instances running at the same time)', type=int, required=True, nargs='+')
33 | 
34 |     args = parser.parse_args()
35 | 
36 |     aux = args.d[0].split(".")
37 |     aux = aux[1].split("exp")
38 |     differentiation_str = str(aux[1].split("Logs")[0])
39 | 
40 |     # Point to the folder were the datasets of argument f2 are located
41 |     general_dataset_folder = "../Enero_datasets/dataset_sing_top/data/results_my_3_tops_unif_05-1/"+args.f2[0]+"/"
42 |     # In this folder we store the rewards that later will be parsed for plotting
43 |     drl_eval_res_folder = "../Enero_datasets/dataset_sing_top/data/"+args.f1[0]+"/evalRes_"+args.f2[0]+"/"
44 | 
45 |     if not os.path.exists("./Images"):
46 |         os.makedirs("./Images")
47 | 
48 |     if not os.path.exists(drl_eval_res_folder):
49 |         os.makedirs(drl_eval_res_folder)
50 | 
51 |     if not os.path.exists(drl_eval_res_folder+differentiation_str):
52 |         os.makedirs(drl_eval_res_folder+differentiation_str)
53 |     else:
54 |         os.system("rm -rf %s" % (drl_eval_res_folder+differentiation_str))
55 |         os.makedirs(drl_eval_res_folder+differentiation_str)
56 | 
57 |     model_id = 0
58 |     # Load best model
59 |     with open(args.d[0]) as fp:
60 |         for line in reversed(list(fp)):
61 |             arrayLine = line.split(":")
62 |             if arrayLine[0]=='MAX REWD':
63 |                 model_id = int(arrayLine[2].split(",")[0])
64 |                 break
65 | 
66 |     # Iterate over all topologies and evaluate our DRL agent on all TMs
67 |     for subdir, dirs, files in os.walk(general_dataset_folder):
68 |         for file in files:
69 |             if file.endswith((".graph")):
70 |                 topology_num_nodes = 0
71 |                 with open(general_dataset_folder+file) as fd:
72 |                     # Loop to read the Number of NODES and EDGES
73 |                     while (True):
74 |                         line = fd.readline()
75 |                         if (line == ""):
76 |                             break
77 |                         if (line.startswith("NODES")):
78 |                             topology_num_nodes = int(line.split(' ')[1])
79 | 
80 |                         # If we are inside the range of number of nodes
81 |                         if topology_num_nodes>=args.min_nodes[0] and topology_num_nodes<=args.max_nodes[0]:
82 |                             if (line.startswith("EDGES")):
83 |                                 topology_num_edges = int(line.split(' ')[1])
84 |                                 # If we are inside the range of number of edges
85 |                                 if topology_num_edges<=args.max_edge[0] and topology_num_edges>=args.min_edge[0]:
86 |                                     topology_Name = file.split('.')[0]
87 |                                     print("*****")
88 |                                     print("***** Evaluating on file: "+file+" with number of edges "+str(topology_num_edges))
89 |                                     print("*****")                                    
90 |                                     argums = [(tm_id, model_id, drl_eval_res_folder, differentiation_str, topology_Name, general_dataset_folder, args.f2[0]) for tm_id in range(50)]
91 |                                     with Pool(processes=args.n[0]) as pool:
92 |                                         pool.map(worker_execute, argums)
93 |                         else:
94 |                             break
95 | 
96 |     


--------------------------------------------------------------------------------
/figures_5_and_6.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import pandas as pd
  4 | import seaborn as sns
  5 | from itertools import cycle
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import pickle
  9 | 
 10 | def smooth(scalars, weight):  # Weight between 0 and 1
 11 |     last = scalars[0]  # First value in the plot (first timestep)
 12 |     smoothed = list()
 13 |     for point in scalars:
 14 |         smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
 15 |         smoothed.append(smoothed_val)                        # Save it
 16 |         last = smoothed_val                                  # Anchor the last smoothed value
 17 | 
 18 |     return smoothed
 19 | 
 20 | def frange(x, y, jump):
 21 |   while x < y:
 22 |     yield x
 23 |     x += jump
 24 | 
 25 | #folders = ["../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_Garr199905/EVALUATE/"]
 26 | folders = ["../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_EliBackbone/EVALUATE/","../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_Janetbackbone/EVALUATE/","../Enero_datasets/dataset_sing_top/data/results_single_top/evalRes_NEW_HurricaneElectric/EVALUATE/"]
 27 | 
 28 | if __name__ == "__main__":
 29 |     # This script is to plot the Figures 5 and 6 from COMNET 2022 paper.
 30 | 
 31 |     # Before executing this file we must execute the eval_on_single_topology.py file to evaluate the DRL model and store the results
 32 |     # We also need to evaluate DEFO for these new topologies. To do this, I copy the corresponding 
 33 |     # folder where it needs to be and I execute the script run_Defo_single_top.py for each topology.
 34 |     # python figures_5_and_6.py -d SP_3top_15_B_NEW 
 35 |     parser = argparse.ArgumentParser(description='Parse files and create plots')
 36 | 
 37 |     # The flag 'd' indicates the directory where to store the figures
 38 |     parser.add_argument('-d', help='differentiation string for the model', type=str, required=True, nargs='+')
 39 | 
 40 |     args = parser.parse_args()
 41 | 
 42 |     differentiation_str = args.d[0]
 43 | 
 44 |     drl_top1_uti = []
 45 |     ls_top1_uti = []
 46 |     enero_top1_uti = []
 47 |     cost_drl_top1 = []
 48 |     cost_ls_top1 = []
 49 |     cost_enero_top1 = []
 50 | 
 51 |     drl_top2_uti = []
 52 |     ls_top2_uti = []
 53 |     enero_top2_uti = []
 54 |     cost_drl_top2 = []
 55 |     cost_ls_top2 = []
 56 |     cost_enero_top2 = []
 57 | 
 58 |     drl_top3_uti = []
 59 |     ls_top3_uti = []
 60 |     enero_top3_uti = []
 61 |     cost_drl_top3 = []
 62 |     cost_ls_top3 = []
 63 |     cost_enero_top3 = []
 64 | 
 65 |     if not os.path.exists("./Images"):
 66 |         os.makedirs("./Images")
 67 | 
 68 |     path_to_dir = "./Images/EVALUATION/"+differentiation_str+'/'
 69 | 
 70 |     if not os.path.exists(path_to_dir):
 71 |         os.makedirs(path_to_dir)
 72 | 
 73 |     dd_Eli = pd.DataFrame(columns=['AC','LS','Enero','Topologies'])
 74 |     dd_Janet = pd.DataFrame(columns=['AC','LS','Enero','Topologies'])
 75 |     dd_Hurricane = pd.DataFrame(columns=['AC','LS','Enero','Topologies'])
 76 | 
 77 |     # Iterate over all topologies and evaluate our DRL agent on all TMs
 78 |     for folder in folders:
 79 |         drl_eval_res_folder = folder+differentiation_str+'/'
 80 |         topology_eval_name = folder.split('NEW_')[1].split('/')[0]
 81 |         for subdir, dirs, files in os.walk(drl_eval_res_folder):
 82 |             it = 0
 83 |             for file in files:
 84 |                 if file.endswith((".pckl")):
 85 |                     results = []
 86 |                     path_to_pckl_rewards = drl_eval_res_folder + topology_eval_name + '/'
 87 |                     with open(path_to_pckl_rewards+file, 'rb') as f:
 88 |                         results = pickle.load(f)
 89 |                     if folder==folders[0]:
 90 |                         dd_Eli.loc[it] = [results[9],results[7],results[3],topology_eval_name]
 91 |                         cost_ls_top1.append(results[15])
 92 |                         cost_drl_top1.append(results[14])
 93 |                         cost_enero_top1.append(results[16])
 94 |                     elif folder==folders[1]:
 95 |                         dd_Janet.loc[it] = [results[9],results[7],results[3],topology_eval_name]
 96 |                         cost_ls_top2.append(results[15])
 97 |                         cost_drl_top2.append(results[14])
 98 |                         cost_enero_top2.append(results[16])
 99 |                     else:
100 |                         dd_Hurricane.loc[it] = [results[9],results[7],results[3],topology_eval_name]
101 |                         cost_ls_top3.append(results[15])
102 |                         cost_drl_top3.append(results[14])
103 |                         cost_enero_top3.append(results[16])
104 |                     it += 1
105 |     
106 |     plt.rcParams['axes.titlesize'] = 20
107 |     plt.rcParams['figure.figsize'] = (11.5, 9)
108 |     plt.rcParams['xtick.labelsize'] = 22
109 |     plt.rcParams['ytick.labelsize'] = 22
110 |     plt.rcParams['legend.fontsize'] = 17
111 |     fig, ax = plt.subplots()
112 |     
113 |     n = np.arange(1,len(cost_ls_top1)+1) / np.float(len(cost_ls_top1))
114 |     Xs = np.sort(cost_ls_top1)
115 |     ax.step(Xs,n, c='cyan', linestyle=(0, (1,1)), label="LS EliBackbone", linewidth=4) 
116 |     Xs = np.sort(cost_drl_top1)
117 |     ax.step(Xs,n,c='darkgreen', linestyle='-', label="DRL EliBackbone", linewidth=4) 
118 |     Xs = np.sort(cost_enero_top1)
119 |     ax.step(Xs,n,c='maroon', linestyle=(0, (2.5, 1)),label="Enero EliBackbone", linewidth=4) 
120 |     Xs = np.sort(cost_ls_top2)
121 |     ax.step(Xs,n, c='dodgerblue', linestyle=(0, (1, 2.5)),label="LS Janetbackbone", linewidth=4) 
122 |     Xs = np.sort(cost_drl_top2)
123 |     ax.step(Xs,n,c='lime', linestyle='-',label="DRL Janetbackbone", linewidth=4) 
124 |     Xs = np.sort(cost_enero_top2)
125 |     ax.step(Xs,n,c='red', linestyle=(0, (2.5, 3)),label="Enero Janetbackbone", linewidth=4) 
126 |     Xs = np.sort(cost_ls_top3)
127 |     ax.step(Xs,n, c='navy', linestyle=(0, (1,6)),label="LS HurricaneElectric", linewidth=4)
128 |     Xs = np.sort(cost_drl_top3)
129 |     ax.step(Xs,n,c='palegreen', linestyle='-',label="DRL HurricaneElectric", linewidth=4)
130 |     Xs = np.sort(cost_enero_top3)
131 |     ax.step(Xs,n,c='orange', linestyle=(0, (2.5, 6)),label="Enero HurricaneElectric", linewidth=4)
132 | 
133 |     plt.ylim((0, 1.005))
134 |     plt.xlim((0, 50.0))
135 |     plt.xticks(np.arange(0, 50, 8))
136 |     plt.ylabel('CDF', fontsize=22)
137 |     plt.xlabel("Execution Cost (s)", fontsize=20)
138 |     plt.grid(color='gray')
139 |     plt.legend(loc='lower right', ncol=3, bbox_to_anchor=(1.03, -0.3))
140 |     plt.tight_layout()
141 |     plt.savefig(path_to_dir+'Figure_6.png', bbox_inches='tight',pad_inches = 0)
142 |     plt.close()
143 | 
144 |  
145 |     # Define some hatches
146 |     hatches = cycle(['-', '|', ''])
147 |     cdf = pd.concat([dd_Eli,dd_Janet,dd_Hurricane])
148 |     mdf = pd.melt(cdf, id_vars=['Topologies'], var_name=['Topology'])      # MELT
149 |     ax = sns.boxplot(x="Topologies", y="value", hue="Topology", data=mdf, palette="mako")  # RUN PLOT
150 |     plt.rcParams['axes.grid'] = True
151 |     plt.rcParams['figure.figsize'] = (3.47, 2.0)
152 |     plt.rcParams['axes.titlesize'] = 22
153 |     plt.rcParams['xtick.labelsize'] = 22
154 |     plt.rcParams['ytick.labelsize'] = 22
155 |     plt.rcParams['legend.fontsize'] = 24
156 |     ax.set_xlabel("",fontsize=0)
157 |     ax.set_ylabel("Maximum Link Utilization",fontsize=24)
158 |     plt.rcParams["axes.labelweight"] = "bold"
159 |     ax.grid(which='major', axis='y', linestyle='-')
160 |     plt.rcParams.update({'font.size': 22})
161 |     plt.rcParams['pdf.fonttype'] = 42
162 |     # Loop over the bars
163 |     for i, patch in enumerate(ax.artists):
164 |         # Boxes from left to right
165 |         hatch = next(hatches)
166 |         patch.set_hatch(hatch*2)
167 |         col = patch.get_facecolor()
168 |         #patch.set_edgecolor(col)
169 |         patch.set_edgecolor("black")
170 |         patch.set_facecolor('None')
171 | 
172 |         # Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
173 |         # Loop over them here, and use the same colour as above
174 |         for j in range(i * 6, i * 6 + 6):
175 |             line = ax.lines[j]
176 |             line.set_color("black")
177 |             line.set_mfc("black")
178 |             line.set_mec("black")
179 |             # Change color of the median
180 |             if j == i*6+4:
181 |                 line.set_color("orange")
182 |                 line.set_mfc("orange")
183 |                 line.set_mec("orange")
184 | 
185 |     for i, patch in enumerate(ax.patches):
186 |         hatch = next(hatches)
187 |         patch.set_hatch(hatch*2)
188 |         col = patch.get_facecolor()
189 |         #patch.set_edgecolor(col)
190 |         patch.set_edgecolor("black")
191 |         patch.set_facecolor('None')
192 |     
193 |     plt.legend(loc='upper left', ncol=3)
194 |     plt.ylim((0.5, 1.2))
195 |     plt.tight_layout()
196 |     plt.savefig(path_to_dir+'Figure_5.png', bbox_inches='tight',pad_inches = 0)
197 |     plt.clf()
198 |     plt.close()
199 | 


--------------------------------------------------------------------------------
/gym-graph/gym_graph/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | 
 4 | register(
 5 |     id='GraphEnv-v15',
 6 |     entry_point='gym_graph.envs:Env15',
 7 | )
 8 | 
 9 | register(
10 |     id='GraphEnv-v16',
11 |     entry_point='gym_graph.envs:Env16',
12 | )
13 | 
14 | register(
15 |     id='GraphEnv-v20',
16 |     entry_point='gym_graph.envs:Env20',
17 | )
18 | 


--------------------------------------------------------------------------------
/gym-graph/gym_graph/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_graph.envs.environment15 import Env15
2 | from gym_graph.envs.environment16 import Env16
3 | from gym_graph.envs.environment20 import Env20


--------------------------------------------------------------------------------
/gym-graph/gym_graph/envs/environment15.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import networkx as nx
  4 | import random
  5 | from gym import error, spaces, utils
  6 | from random import choice
  7 | import pandas as pd
  8 | import pickle
  9 | import json 
 10 | import os.path
 11 | import gc
 12 | import defo_process_results as defoResults
 13 | 
 14 | class Env15(gym.Env):
 15 |     """
 16 |     Environment used for the simulated annealing and hill climbing benchmarks in the 
 17 |     script_eval_on_single_topology.py with SP only! No ecmp at all here!
 18 | 
 19 |     Environment used in the middlepoint routing problem using SP to reach a middlepoint.
 20 |     We are using bidirectional links in this environment!
 21 |     self.edge_state[:][0] = link utilization
 22 |     self.edge_state[:][1] = link capacity
 23 |     """
 24 |     def __init__(self):
 25 |         self.graph = None # Here we store the graph as DiGraph (without repeated edges)
 26 |         self.source = None
 27 |         self.destination = None
 28 |         self.demand = None
 29 | 
 30 |         self.edge_state = None
 31 |         self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset
 32 |         self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 
 33 | 
 34 |         self.diameter = None
 35 |         self.list_of_demands_to_change = None # Eligible demands coming from the DRL agent
 36 | 
 37 |         # Nx Graph where the nodes have features. Betweenness is allways normalized.
 38 |         # The other features are "raw" and are being normalized before prediction
 39 |         self.between_feature = None
 40 | 
 41 |         self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint
 42 |         self.shortest_paths = None # For each src,dst we store the shortest path to reach d
 43 | 
 44 |         # Mean and standard deviation of link betweenness
 45 |         self.mu_bet = None
 46 |         self.std_bet = None
 47 | 
 48 |         # Episode length in timesteps
 49 |         self.episode_length = None
 50 | 
 51 |         self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent.
 52 |         self.num_critical_links = 5
 53 | 
 54 |         # Error at the end of episode to evaluate the learning process
 55 |         self.error_evaluation = None
 56 |         # Ideal target link capacity: self.sumTM/self.numEdges
 57 |         self.target_link_capacity = None
 58 | 
 59 |         self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst
 60 |         self.meanTM = None
 61 |         self.stdTM = None
 62 |         self.sumTM = None
 63 |         self.routing = None # Loaded routing matrix
 64 |         self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair
 65 | 
 66 |         self.K = None
 67 |         self.nodes = None # List of nodes to pick randomly from them
 68 |         self.ordered_edges = None
 69 |         self.edgesDict = dict() # Stores the position id of each edge in order
 70 |         self.previous_path = None
 71 | 
 72 |         self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints
 73 |         self.node_to_index_dic = None # For each node from the real graph we store it's index
 74 |         self.index_to_node_lst = None # We store a list of nodes in an ordered fashion
 75 | 
 76 |         self.numNodes = None
 77 |         self.numEdges = None
 78 |         self.numSteps = 0 # As our problem can go forever, we limit it to 10 steps
 79 | 
 80 |         self.sameLink = False # Indicates if we are working with the same link
 81 | 
 82 |         # We store the edge that has maximum utilization
 83 |         # (src, dst, MaxUtilization)
 84 |         self.edgeMaxUti = None 
 85 |         # We store the path with more bandwidth from the edge with maximum utilization
 86 |         # (src, dst, MaxBandwidth)
 87 |         self.patMaxBandwth = None 
 88 |         self.maxBandwidth = None
 89 | 
 90 |         self.episode_over = True
 91 |         self.reward = 0
 92 |         self.allPaths = dict() # Stores the paths for each src:dst pair
 93 | 
 94 |     def seed(self, seed):
 95 |         random.seed(seed)
 96 |         np.random.seed(seed)
 97 |     
 98 |     def add_features_to_edges(self):
 99 |         incId = 1
100 |         for node in self.graph:
101 |             for adj in self.graph[node]:
102 |                 if not 'edgeId' in self.graph[node][adj][0]:
103 |                     self.graph[node][adj][0]['edgeId'] = incId
104 |                 if not 'numsp' in self.graph[node][adj][0]:
105 |                     self.graph[node][adj][0]['numsp'] = 0
106 |                 if not 'utilization' in self.graph[node][adj][0]:
107 |                     self.graph[node][adj][0]['utilization'] = 0
108 |                 if not 'capacity' in self.graph[node][adj][0]:
109 |                     self.graph[node][adj][0]['capacity'] = 0
110 |                 if not 'weight' in self.graph[node][adj][0]:
111 |                     self.graph[node][adj][0]['weight'] = 0
112 |                 if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge
113 |                     self.graph[node][adj][0]['crossing_paths'] = dict()
114 |                 incId = incId + 1
115 |     
116 |     def decrease_links_utilization_sp(self, src, dst, init_source, final_destination):
117 |         # In this function we desallocate the bandwidth by segments. This funcion is used when we want
118 |         # to desallocate from a src to a middlepoint and then from middlepoint to a dst using the sp
119 | 
120 |         # We obtain the demand from the original source,destination pair
121 |         bw_allocated = self.TM[init_source][final_destination]
122 |         currentPath = self.shortest_paths[src,dst]
123 | 
124 |         i = 0
125 |         j = 1
126 |         while (j < len(currentPath)):
127 |             firstNode = currentPath[i]
128 |             secondNode = currentPath[j]
129 | 
130 |             self.graph[firstNode][secondNode][0]['utilization'] -= bw_allocated 
131 |             if str(init_source)+':'+str(final_destination) in self.graph[firstNode][secondNode][0]['crossing_paths']:
132 |                 del self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)]
133 |             self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization']
134 |             i = i + 1
135 |             j = j + 1
136 | 
137 |     def _generate_tm(self, tm_id):        
138 |         # Sample a file randomly to initialize the tm
139 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
140 |         # This 'results_file' file is ignored!
141 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id)
142 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands"
143 |         
144 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
145 |         self.links_bw = self.defoDatasetAPI.links_bw
146 |         self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file)
147 | 
148 |         self.maxBandwidth = np.amax(self.TM)
149 | 
150 |         traffic = np.copy(self.TM)
151 |         # Remove diagonal from matrix
152 |         traffic = traffic[~np.eye(traffic.shape[0], dtype=bool)].reshape(traffic.shape[0], -1)
153 | 
154 |         self.sumTM = np.sum(traffic)
155 |         self.target_link_capacity = self.sumTM/self.numEdges
156 |         self.meanTM = np.mean(traffic)
157 |         self.stdTM = np.std(traffic)
158 |     
159 |     def compute_link_utilization_reset_sp(self):
160 |         # Compute the paths that cross each link and then add up the bandwidth to obtain the link utilization
161 |         for src in range (0,self.numNodes):
162 |             for dst in range (0,self.numNodes):
163 |                 if src!=dst:
164 |                     self.allocate_to_destination_sp(src, dst, src, dst)
165 | 
166 |     def mark_edges(self, action_flags, src, dst, init_source, final_destination):
167 |         currentPath = self.shortest_paths[src,dst]
168 |         
169 |         i = 0
170 |         j = 1
171 | 
172 |         while (j < len(currentPath)):
173 |             firstNode = currentPath[i]
174 |             secondNode = currentPath[j]
175 | 
176 |             action_flags[self.edgesDict[str(firstNode)+':'+str(secondNode)]] += 1.0
177 |             i = i + 1
178 |             j = j + 1
179 | 
180 |     
181 |     def mark_action_to_edges(self, first_node, init_source, final_destination): 
182 |         # In this function we mark for each link which is the bw that it will allocate. This we will
183 |         # use to avoid repeated actions
184 |         action_flags = np.zeros(self.numEdges)
185 |         
186 |         # Mark until first_node
187 |         self.mark_edges(action_flags, init_source, first_node, init_source, final_destination)
188 | 
189 |         # If the first node is a middlepoint
190 |         if first_node!=final_destination:
191 |             self.mark_edges(action_flags, first_node, final_destination, init_source, final_destination)
192 |         
193 |         return action_flags
194 | 
195 |     def compute_middlepoint_set_remove_rep_actions_no_loop(self):
196 |         # In this function we compute the middlepoint set but we don't take into account the middlepoints whose 
197 |         # actions are repeated and neither those middlepoints whose SPs pass over the DST node
198 |         
199 |         # Compute SPs for each src,dst pair
200 |         self.compute_SPs()
201 | 
202 |         # We compute the middlepoint set for each src,dst pair and we don't consider repeated actions
203 |         self.src_dst_k_middlepoints = dict()
204 |         # Iterate over all node1,node2 pairs from the graph
205 |         for n1 in range (0,self.numNodes):
206 |             for n2 in range (0,self.numNodes):
207 |                 if (n1 != n2):
208 |                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)] = list()
209 |                     repeated_actions = list()
210 |                     for midd in range (0,self.K):
211 |                         # If the middlepoint is not the source node
212 |                         if midd!=n1:
213 |                             action_flags = self.mark_action_to_edges(midd, n1, n2)
214 |                             # If we allocated to a middlepoint that is not the final destination
215 |                             if midd!=n2:
216 |                                 # If the repeated_actions list is empty we make the following verifications
217 |                                 if len(repeated_actions) == 0:
218 | 
219 |                                     path1 = self.shortest_paths[n1, midd]
220 |                                     path2 = self.shortest_paths[midd, n2]
221 | 
222 |                                     # Check that the dst node is not in the SP to avoid loops!
223 |                                     currentPath = path1[:len(path1)-1]+path2
224 |                                     dst_counter = 0
225 |                                     for node in currentPath:
226 |                                         if node==n2 or node==n1:
227 |                                             dst_counter += 1
228 |                                     # If there is only one dst node
229 |                                     if dst_counter==2:
230 |                                         repeated_actions.append(action_flags)
231 |                                         self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
232 |                                 else:
233 |                                     repeatedAction = False
234 |                                     # Compare the current action with the previous ones
235 |                                     for previous_actions in repeated_actions:
236 |                                         subtraction = np.absolute(np.subtract(action_flags,previous_actions))
237 |                                         if np.sum(subtraction)==0.0:
238 |                                             repeatedAction = True
239 |                                             break
240 |                                     # If we didn't find any identical action, we make the following verifications
241 |                                     if not repeatedAction:                                        
242 |                                         path1 = self.shortest_paths[n1, midd]
243 |                                         path2 = self.shortest_paths[midd, n2]
244 |                                         # Check that the dst node is not in the SP to avoid loops!
245 |                                         currentPath = path1[:len(path1)-1]+path2
246 |                                         dst_counter = 0
247 |                                         for node in currentPath:
248 |                                             if node==n2 or node==n1:
249 |                                                 dst_counter += 1
250 |                                         # If there is only one dst node
251 |                                         if dst_counter==2:
252 |                                             self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
253 |                                             repeated_actions.append(action_flags)
254 | 
255 |                             else: 
256 |                                 # If it's the first action we add it to the repeated actions list
257 |                                 if len(repeated_actions) == 0:
258 |                                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
259 |                                     repeated_actions.append(action_flags)
260 |                                 else:
261 |                                     repeatedAction = False
262 |                                     # Compare the current action with the previous ones
263 |                                     for previous_actions in repeated_actions:
264 |                                         subtraction = np.absolute(np.subtract(action_flags,previous_actions))
265 |                                         if np.sum(subtraction)==0.0:
266 |                                             repeatedAction = True
267 |                                             break
268 |                                     
269 |                                     # If we didn't find any identical action, we add the middlepoint to the set
270 |                                     if not repeatedAction:
271 |                                         self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
272 |                                         repeated_actions.append(action_flags)
273 | 
274 |     def compute_SPs(self):
275 |         diameter = nx.diameter(self.graph)
276 |         self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype=object)
277 |         
278 |         allPaths = dict()
279 |         sp_path = self.dataset_folder_name+"/shortest_paths.json"
280 | 
281 |         if not os.path.isfile(sp_path):
282 |             for n1 in range (0,self.numNodes):
283 |                 for n2 in range (0,self.numNodes):
284 |                     if (n1 != n2):
285 |                         allPaths[str(n1)+':'+str(n2)] = []
286 |                         # First we compute the shortest paths taking into account the diameter
287 |                         [allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=diameter*2)]                    # We take all the paths from n1 to n2 and we order them according to the path length
288 |                         # sorted() ordena los paths de menor a mayor numero de
289 |                         # saltos y los que tienen los mismos saltos te los ordena por indice
290 |                         aux_sorted_paths = sorted(allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item))                    # self.shortest_paths[n1,n2] = nx.shortest_path(self.graph, n1, n2,weight='weight')
291 |                         allPaths[str(n1)+':'+str(n2)] = aux_sorted_paths[0]
292 |         
293 |             with open(sp_path, 'w') as fp:
294 |                 json.dump(allPaths, fp)
295 |         else:
296 |             allPaths = json.load(open(sp_path))
297 | 
298 |         for n1 in range (0,self.numNodes):
299 |             for n2 in range (0,self.numNodes):
300 |                 if (n1 != n2):
301 |                     self.shortest_paths[n1,n2] = allPaths[str(n1)+':'+str(n2)]
302 |         
303 |     def generate_environment(self, dataset_folder_name, graph_topology_name, EPISODE_LENGTH, K, percentage_demands):
304 |         self.episode_length = EPISODE_LENGTH
305 |         self.graph_topology_name = graph_topology_name
306 |         self.dataset_folder_name = dataset_folder_name
307 |         self.list_eligible_demands = list()
308 |         self.percentage_demands = percentage_demands
309 | 
310 |         self.maxCapacity = 0 # We take the maximum capacity to normalize
311 | 
312 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
313 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_0"
314 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+".0.demands"
315 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
316 | 
317 |         self.node_to_index_dic = self.defoDatasetAPI.node_to_index_dic_pvt
318 |         self.index_to_node_lst = self.defoDatasetAPI.index_to_node_lst_pvt
319 | 
320 |         self.graph = self.defoDatasetAPI.Gbase
321 |         self.add_features_to_edges()
322 |         self.numNodes = len(self.graph.nodes())
323 |         self.numEdges = len(self.graph.edges())
324 | 
325 |         self.K = K
326 |         if self.K>self.numNodes:
327 |             self.K = self.numNodes
328 | 
329 |         self.edge_state = np.zeros((self.numEdges, 2))
330 |         self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype="object")
331 | 
332 |         position = 0
333 |         for i in self.graph:
334 |             for j in self.graph[i]:
335 |                 self.edgesDict[str(i)+':'+str(j)] = position
336 |                 self.graph[i][j][0]['capacity'] = self.defoDatasetAPI.links_bw[i][j]
337 |                 self.graph[i][j][0]['weight'] = self.defoDatasetAPI.links_weight[i][j]
338 |                 if self.graph[i][j][0]['capacity']>self.maxCapacity:
339 |                     self.maxCapacity = self.graph[i][j][0]['capacity']
340 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
341 |                 self.graph[i][j][0]['utilization'] = 0.0
342 |                 self.graph[i][j][0]['crossing_paths'].clear()
343 |                 position += 1
344 | 
345 |         # We create the list of nodes ids to pick randomly from them
346 |         self.nodes = list(range(0,self.numNodes))
347 | 
348 |         self.compute_middlepoint_set_remove_rep_actions_no_loop()
349 |     
350 |     def step_sp(self, action, source, destination):
351 |         # We get the K-middlepoints between source-destination
352 |         middlePointList = list(self.src_dst_k_middlepoints[str(source) +':'+ str(destination)])
353 |         middlePoint = middlePointList[action]
354 | 
355 |         # First we allocate until the middlepoint using the shortest path
356 |         self.allocate_to_destination_sp(source, middlePoint, source, destination)
357 |         # If we allocated to a middlepoint that is not the final destination
358 |         if middlePoint!=destination:
359 |             # Then we allocate from the middlepoint to the destination using the shortest path
360 |             self.allocate_to_destination_sp(middlePoint, destination, source, destination)
361 |             # We store that the pair source,destination has a middlepoint
362 |             self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
363 |         
364 |         # Find new maximum and minimum utilization link
365 |         old_Utilization = self.edgeMaxUti[2]
366 |         self.edgeMaxUti = (0, 0, 0)
367 |         for i in self.graph:
368 |             for j in self.graph[i]:
369 |                 position = self.edgesDict[str(i)+':'+str(j)]
370 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
371 |                 link_capacity = self.links_bw[i][j]
372 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
373 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
374 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
375 | 
376 |         return self.edgeMaxUti[2]
377 |     
378 |     def step_hill_sp(self, action, source, destination):
379 |         # We get the K-middlepoints between source-destination
380 |         middlePointList = list(self.src_dst_k_middlepoints[str(source) +':'+ str(destination)])
381 |         middlePoint = middlePointList[action]
382 | 
383 |         # First we allocate until the middlepoint using the shortest path
384 |         self.allocate_to_destination_sp(source, middlePoint, source, destination)
385 |         # If we allocated to a middlepoint that is not the final destination
386 |         if middlePoint!=destination:
387 |             # Then we allocate from the middlepoint to the destination using the shortest path
388 |             self.allocate_to_destination_sp(middlePoint, destination, source, destination)
389 |             # We store that the pair source,destination has a middlepoint
390 |             self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
391 |         
392 |         # Find new maximum and minimum utilization link
393 |         old_Utilization = self.edgeMaxUti[2]
394 |         self.edgeMaxUti = (0, 0, 0)
395 |         for i in self.graph:
396 |             for j in self.graph[i]:
397 |                 position = self.edgesDict[str(i)+':'+str(j)]
398 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
399 |                 link_capacity = self.links_bw[i][j]
400 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
401 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
402 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
403 | 
404 |         return -self.edgeMaxUti[2]
405 |     
406 |     def reset_sp(self, tm_id):
407 |         """
408 |         Reset environment and setup for new episode. 
409 |         Generate new TM but load the same routing. We remove the path with more bandwidth
410 |         from the link with more utilization to later allocate it on a new path in the act().
411 |         """
412 |         self._generate_tm(tm_id)
413 | 
414 |         self.sp_middlepoints = dict()
415 | 
416 |         # Clear the link utilization and crossing paths
417 |         for i in self.graph:
418 |             for j in self.graph[i]:
419 |                 self.graph[i][j][0]['utilization'] = 0.0
420 |                 self.graph[i][j][0]['crossing_paths'].clear()
421 | 
422 |         # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints
423 |         self.compute_link_utilization_reset_sp()
424 | 
425 |         # We iterate over all links in an ordered fashion and store the features to edge_state
426 |         for i in self.graph:
427 |             for j in self.graph[i]:
428 |                 position = self.edgesDict[str(i)+':'+str(j)]
429 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
430 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
431 |                 link_capacity = self.links_bw[i][j]
432 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
433 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
434 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
435 |         
436 |         return self.edgeMaxUti[2]
437 | 
438 |     def reset_hill_sp(self, tm_id):
439 |         """
440 |         Reset environment and setup for new episode. 
441 |         Generate new TM but load the same routing. We remove the path with more bandwidth
442 |         from the link with more utilization to later allocate it on a new path in the act().
443 |         """
444 |         self._generate_tm(tm_id)
445 | 
446 |         self.sp_middlepoints = dict()
447 | 
448 |         # Clear the link utilization and crossing paths
449 |         for i in self.graph:
450 |             for j in self.graph[i]:
451 |                 self.graph[i][j][0]['utilization'] = 0.0
452 |                 self.graph[i][j][0]['crossing_paths'].clear()
453 | 
454 |         # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints
455 |         self.compute_link_utilization_reset_sp()
456 | 
457 |         # We iterate over all links in an ordered fashion and store the features to edge_state
458 |         self.edgeMaxUti = (0, 0, 0)
459 |         for i in self.graph:
460 |             for j in self.graph[i]:
461 |                 position = self.edgesDict[str(i)+':'+str(j)]
462 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
463 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
464 |                 link_capacity = self.links_bw[i][j]
465 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
466 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
467 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
468 |         
469 |         return -self.edgeMaxUti[2]
470 | 
471 |     def _get_top_k_critical_flows(self, list_ids):
472 |         self.list_eligible_demands.clear()
473 |         for linkId in list_ids:
474 |             i = linkId[0]
475 |             j = linkId[1]
476 |             for demand, value in self.graph[i][j][0]['crossing_paths'].items():
477 |                 src, dst = int(demand.split(':')[0]), int(demand.split(':')[1])
478 |                 if (src, dst, self.TM[src,dst]) not in self.list_eligible_demands:  
479 |                     self.list_eligible_demands.append((src, dst, self.TM[src,dst]))
480 | 
481 |         self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True)
482 |         if len(self.list_eligible_demands)>int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands)):
483 |             self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))]
484 | 
485 |     def reset_DRL_hill_sp(self, tm_id, best_routing, list_of_demands_to_change):
486 |         """
487 |         Reset environment and setup for new episode. 
488 |         Generate new TM but load the same routing. We remove the path with more bandwidth
489 |         from the link with more utilization to later allocate it on a new path in the act().
490 |         """
491 |         self._generate_tm(tm_id)
492 |         if best_routing is not None:
493 |             self.sp_middlepoints = best_routing
494 |         else: 
495 |             self.sp_middlepoints = dict()
496 |         self.list_of_demands_to_change = list_of_demands_to_change
497 | 
498 |         # Clear the link utilization and crossing paths
499 |         for i in self.graph:
500 |             for j in self.graph[i]:
501 |                 self.graph[i][j][0]['utilization'] = 0.0
502 |                 self.graph[i][j][0]['crossing_paths'].clear()
503 |         
504 |         # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints
505 |         self.compute_link_utilization_reset_sp()
506 | 
507 |         # We restore the best routing configuration from the DRL agent
508 |         for key, middlepoint in self.sp_middlepoints.items():
509 |             source = int(key.split(':')[0])
510 |             dest = int(key.split(':')[1])
511 |             if middlepoint!=dest:
512 |                 # First we remove current routing and then we assign the new middlepoint
513 |                 self.decrease_links_utilization_sp(source, dest, source, dest)
514 | 
515 |                 # First we allocate until the middlepoint
516 |                 self.allocate_to_destination_sp(source, middlepoint, source, dest)
517 |                 # Then we allocate from the middlepoint to the destination
518 |                 self.allocate_to_destination_sp(middlepoint, dest, source, dest)        
519 | 
520 |         # We iterate over all links in an ordered fashion and store the features to edge_state
521 |         self.edgeMaxUti = (0, 0, 0)
522 |         # This list is used to obtain the top K flows from the critical links
523 |         list_link_uti_id = list()
524 |         for i in self.graph:
525 |             for j in self.graph[i]:
526 |                 position = self.edgesDict[str(i)+':'+str(j)]
527 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
528 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
529 |                 link_capacity = self.links_bw[i][j]
530 |                 # We store the link utilization and the corresponding edge
531 |                 list_link_uti_id.append((i, j, self.edge_state[position][0]))
532 | 
533 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
534 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
535 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
536 |         
537 |         list_link_uti_id = sorted(list_link_uti_id, key=lambda tup: tup[2], reverse=True)[:self.num_critical_links]
538 |         self._get_top_k_critical_flows(list_link_uti_id)
539 | 
540 |         # If we want to take the x% bigger demands
541 |         # self.list_eligible_demands = sorted(list_link_uti_id, key=lambda tup: tup[0], reverse=True)
542 |         # self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))]
543 | 
544 |         return -self.edgeMaxUti[2]
545 |     
546 |     def allocate_to_destination_sp(self, src, dst, init_source, final_destination): 
547 |         # In this function we allocated the bandwidth by segments. This funcion is used when we want
548 |         # to allocate from a src to a middlepoint and then from middlepoint to a dst using the sp
549 |         bw_allocate = self.TM[init_source][final_destination]
550 |         currentPath = self.shortest_paths[src,dst]
551 |         
552 |         i = 0
553 |         j = 1
554 | 
555 |         while (j < len(currentPath)):
556 |             firstNode = currentPath[i]
557 |             secondNode = currentPath[j]
558 | 
559 |             self.graph[firstNode][secondNode][0]['utilization'] += bw_allocate  
560 |             self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] = bw_allocate
561 |             self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization']
562 |             i = i + 1
563 |             j = j + 1
564 | 


--------------------------------------------------------------------------------
/gym-graph/gym_graph/envs/environment16.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import networkx as nx
  4 | import random
  5 | from gym import error, spaces, utils
  6 | from random import choice
  7 | import pandas as pd
  8 | import pickle
  9 | import json 
 10 | import os.path
 11 | import gc
 12 | import defo_process_results as defoResults
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | class Env16(gym.Env):
 16 |     """
 17 |     Here I only take X% of the demands. There are some flags
 18 |     that indicate if to take the X% larger demands, the X% from the 5 most loaded links
 19 |     or random.
 20 | 
 21 |     Environment used in the middlepoint routing problem. Here we compute the SP to reach a middlepoint.
 22 |     We are using bidirectional links in this environment!
 23 |     In this environment we make the MP between edges.
 24 |     self.edge_state[:][0] = link utilization
 25 |     self.edge_state[:][1] = link capacity
 26 |     self.edge_state[:][2] = bw allocated (the one that goes from src to dst)
 27 |     """
 28 |     def __init__(self):
 29 |         self.graph = None # Here we store the graph as DiGraph (without repeated edges)
 30 |         self.source = None
 31 |         self.destination = None
 32 |         self.demand = None
 33 | 
 34 |         self.edge_state = None
 35 |         self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset
 36 |         self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 
 37 | 
 38 |         self.diameter = None
 39 | 
 40 |         # Nx Graph where the nodes have features. Betweenness is allways normalized.
 41 |         # The other features are "raw" and are being normalized before prediction
 42 |         self.first = None
 43 |         self.firstTrueSize = None
 44 |         self.second = None
 45 |         self.between_feature = None
 46 | 
 47 |         self.percentage_demands = None # X% of the most loaded demands we use for optimization
 48 |         self.shufle_demands = False # If True we shuffle the list of traffic demands
 49 |         self.top_K_critical_demands = False # If we want to take the top X% of the 5 most loaded links
 50 |         self.num_critical_links = 5
 51 | 
 52 |         self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint
 53 |         self.shortest_paths = None # For each src,dst we store the shortest path to reach d
 54 |         self.sp_middlepoints_step = dict() # We store the midlepoint assignation before step() finishes
 55 | 
 56 |         # Mean and standard deviation of link betweenness
 57 |         self.mu_bet = None
 58 |         self.std_bet = None
 59 | 
 60 |         # Episode length in timesteps
 61 |         self.episode_length = None
 62 |         self.currentVal = None # Value used in hill_climbing way of choosing the next demand
 63 |         self.initial_maxLinkUti = None
 64 |         self.iter_list_elig_demn = None
 65 | 
 66 |         # Error at the end of episode to evaluate the learning process
 67 |         self.error_evaluation = None
 68 |         # Ideal target link capacity: self.sumTM/self.numEdges
 69 |         self.target_link_capacity = None
 70 | 
 71 |         self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst
 72 |         self.sumTM = None
 73 |         self.routing = None # Loaded routing matrix
 74 |         self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair
 75 | 
 76 |         self.K = None
 77 |         self.nodes = None # List of nodes to pick randomly from them
 78 |         self.ordered_edges = None
 79 |         self.edgesDict = dict() # Stores the position id of each edge in order
 80 |         self.previous_path = None
 81 | 
 82 |         self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints
 83 |         self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent.
 84 |         self.link_capacity_feature = None
 85 | 
 86 |         self.numNodes = None
 87 |         self.numEdges = None
 88 |         self.next_state = None
 89 | 
 90 |         # We store the edge that has maximum utilization
 91 |         # (src, dst, MaxUtilization)
 92 |         self.edgeMaxUti = None 
 93 |         # We store the edge that has minimum utilization
 94 |         # (src, dst, MaxUtilization)
 95 |         self.edgeMinUti = None 
 96 |         # We store the path with more bandwidth from the edge with maximum utilization
 97 |         # (src, dst, MaxBandwidth)
 98 |         self.patMaxBandwth = None 
 99 |         self.maxBandwidth = None
100 | 
101 |         self.episode_over = True
102 |         self.reward = 0
103 |         self.allPaths = dict() # Stores the paths for each src:dst pair
104 | 
105 |     def seed(self, seed):
106 |         random.seed(seed)
107 |         np.random.seed(seed)
108 |     
109 |     def add_features_to_edges(self):
110 |         incId = 1
111 |         for node in self.graph:
112 |             for adj in self.graph[node]:
113 |                 if not 'betweenness' in self.graph[node][adj][0]:
114 |                     self.graph[node][adj][0]['betweenness'] = 0
115 |                 if not 'edgeId' in self.graph[node][adj][0]:
116 |                     self.graph[node][adj][0]['edgeId'] = incId
117 |                 if not 'numsp' in self.graph[node][adj][0]:
118 |                     self.graph[node][adj][0]['numsp'] = 0
119 |                 if not 'utilization' in self.graph[node][adj][0]:
120 |                     self.graph[node][adj][0]['utilization'] = 0
121 |                 if not 'capacity' in self.graph[node][adj][0]:
122 |                     self.graph[node][adj][0]['capacity'] = 0
123 |                 if not 'weight' in self.graph[node][adj][0]:
124 |                     self.graph[node][adj][0]['weight'] = 0
125 |                 if not 'kshortp' in self.graph[node][adj][0]:
126 |                     self.graph[node][adj][0]['kshortp'] = 0
127 |                 if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge
128 |                     self.graph[node][adj][0]['crossing_paths'] = dict()
129 |                 incId = incId + 1
130 | 
131 |     def num_shortest_path(self, topology):
132 |         self.diameter = nx.diameter(self.graph)
133 |         # Iterate over all node1,node2 pairs from the graph
134 |         for n1 in range (0,self.numNodes):
135 |             for n2 in range (0,self.numNodes):
136 |                 if (n1 != n2):
137 |                     # Check if we added the element of the matrix
138 |                     if str(n1)+':'+str(n2) not in self.allPaths:
139 |                         self.allPaths[str(n1)+':'+str(n2)] = []
140 |                     # First we compute the shortest paths taking into account the diameter
141 |                     [self.allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=self.diameter*2)]
142 | 
143 |                     # We take all the paths from n1 to n2 and we order them according to the path length
144 |                     # sorted() ordena los paths de menor a mayor numero de
145 |                     # saltos y los que tienen los mismos saltos te los ordena por indice
146 |                     self.allPaths[str(n1)+':'+str(n2)] = sorted(self.allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item))
147 |                     path = 0
148 |                     while path < self.K and path < len(self.allPaths[str(n1)+':'+str(n2)]):
149 |                         currentPath = self.allPaths[str(n1)+':'+str(n2)][path]
150 |                         i = 0
151 |                         j = 1
152 | 
153 |                         # Iterate over pairs of nodes and allocate linkDemand
154 |                         while (j < len(currentPath)):
155 |                             self.graph.get_edge_data(currentPath[i], currentPath[j])[0]['numsp'] = \
156 |                                 self.graph.get_edge_data(currentPath[i], currentPath[j])[0]['numsp'] + 1
157 |                             i = i + 1
158 |                             j = j + 1
159 | 
160 |                         path = path + 1
161 | 
162 |                     # Remove paths not needed
163 |                     del self.allPaths[str(n1)+':'+str(n2)][path:len(self.allPaths[str(n1)+':'+str(n2)])]
164 |                     gc.collect()
165 |     
166 |     def decrease_links_utilization_sp(self, src, dst, init_source, final_destination):
167 |         # In this function we desallocate the bandwidth by segments. This funcion is used when we want
168 |         # to desallocate from a src to a middlepoint and then from middlepoint to a dst using the sp
169 | 
170 |         # We obtain the demand from the original source,destination pair
171 |         bw_allocated = self.TM[init_source][final_destination]
172 |         currentPath = self.shortest_paths[src,dst]
173 | 
174 |         i = 0
175 |         j = 1
176 |         while (j < len(currentPath)):
177 |             firstNode = currentPath[i]
178 |             secondNode = currentPath[j]
179 | 
180 |             self.graph[firstNode][secondNode][0]['utilization'] -= bw_allocated 
181 |             if str(init_source)+':'+str(final_destination) in self.graph[firstNode][secondNode][0]['crossing_paths']:
182 |                 del self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)]
183 |             self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization']
184 |             i = i + 1
185 |             j = j + 1
186 | 
187 |     def _get_top_k_critical_flows(self, list_ids):
188 |         self.list_eligible_demands.clear()
189 |         for linkId in list_ids:
190 |             i = linkId[1]
191 |             j = linkId[2]
192 |             for demand, value in self.graph[i][j][0]['crossing_paths'].items():
193 |                 src, dst = int(demand.split(':')[0]), int(demand.split(':')[1])
194 |                 if (src, dst, self.TM[src,dst]) not in self.list_eligible_demands:  
195 |                     self.list_eligible_demands.append((src, dst, self.TM[src,dst]))
196 | 
197 |         self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True)
198 |         if len(self.list_eligible_demands)>int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands)):
199 |             self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(self.numNodes*(self.numNodes-1)*self.percentage_demands))]
200 | 
201 |     def _generate_tm(self, tm_id):
202 |         # Sample a file randomly to initialize the tm
203 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
204 |         # This 'results_file' file is ignored!
205 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id)
206 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands"
207 |         
208 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
209 |         self.links_bw = self.defoDatasetAPI.links_bw
210 |         self.MP_matrix = self.defoDatasetAPI.MP_matrix
211 |         self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file)
212 | 
213 |         self.iter_list_elig_demn = 0
214 |         self.list_eligible_demands.clear()
215 |         min_links_bw = 1000000.0
216 |         for src in range (0,self.numNodes):
217 |             for dst in range (0,self.numNodes):
218 |                 if src!=dst:
219 |                     self.list_eligible_demands.append((src, dst, self.TM[src,dst]))
220 |                     # If we have a link between src and dst
221 |                     if src in self.graph and dst in self.graph[src]:
222 |                         # Store the link with minimum bw
223 |                         if self.links_bw[src][dst]<min_links_bw:
224 |                             min_links_bw = self.links_bw[src][dst]
225 |                         
226 |                         # Clear the link utilization and crossing paths for each link
227 |                         self.graph[src][dst][0]['utilization'] = 0.0
228 |                         self.graph[src][dst][0]['crossing_paths'].clear()
229 |         
230 |         # If we want to take the X% random demands
231 |         if self.shufle_demands:
232 |             random.shuffle(self.list_eligible_demands)
233 |             self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(len(self.list_eligible_demands)*self.percentage_demands))]
234 |         elif not self.top_K_critical_demands:
235 |             # If we want to take the x% bigger demands
236 |             self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True)
237 |             self.list_eligible_demands = self.list_eligible_demands[:int(np.ceil(len(self.list_eligible_demands)*self.percentage_demands))]
238 | 
239 |     def compute_link_utilization_reset(self):
240 |         # Allocate for each src,dst the corresponding traffic on the corresponding SP
241 |         for src in range (0,self.numNodes):
242 |             for dst in range (0,self.numNodes):
243 |                 if src!=dst:
244 |                     self.allocate_to_destination_sp(src, dst, src, dst)
245 |     
246 |     def _obtain_path_more_bandwidth_rand_link(self):
247 |         # Obtain path with largest bandwidth from the edge with highest utilization
248 |         # We sort the paths by bandwidth and pick random from the top 4
249 |         sorted_dict = list((k, v) for k, v in sorted(self.graph[self.edgeMaxUti[0]][self.edgeMaxUti[1]][0]['crossing_paths'].items(), key=lambda item: item[1], reverse=True))
250 |         path = random.randint(0, 1)
251 |         # In case there is only one bandwidth
252 |         if path>=len(sorted_dict):
253 |             path = 0
254 |         srcPath = int(sorted_dict[path][0].split(':')[0])
255 |         dstPath = int(sorted_dict[path][0].split(':')[1])
256 |         self.patMaxBandwth = (srcPath, dstPath, self.TM[srcPath][dstPath])
257 |     
258 |     def _obtain_path_from_set_rand(self):
259 |         len_demans = len(self.list_eligible_demands)-1
260 |         path = random.randint(0, len_demans)
261 |         srcPath = int(self.list_eligible_demands[path][0])
262 |         dstPath = int(self.list_eligible_demands[path][1])
263 |         self.patMaxBandwth = (srcPath, dstPath, int(self.list_eligible_demands[path][2]))
264 |     
265 |     def _obtain_demand(self):
266 |         src = self.list_eligible_demands[self.iter_list_elig_demn][0]
267 |         dst = self.list_eligible_demands[self.iter_list_elig_demn][1]
268 |         bw = self.list_eligible_demands[self.iter_list_elig_demn][2]
269 |         self.patMaxBandwth = (src, dst, int(bw))
270 |         self.iter_list_elig_demn += 1
271 |     
272 |     def get_value(self, source, destination, action):
273 |         # We get the K-middlepoints between source-destination
274 |         middlePointList = self.src_dst_k_middlepoints[str(source) +':'+ str(destination)]
275 |         middlePoint = middlePointList[action]
276 | 
277 |         # First we allocate until the middlepoint
278 |         self.allocate_to_destination_sp(source, middlePoint, source, destination)
279 |         # If we allocated to a middlepoint that is not the final destination
280 |         if middlePoint!=destination:
281 |             # Then we allocate from the middlepoint to the destination
282 |             self.allocate_to_destination_sp(middlePoint, destination, source, destination)
283 |             # We store that the pair source,destination has a middlepoint
284 |             self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
285 |         
286 |         currentValue = -1000000
287 |         # Get the maximum loaded link and it's value after allocating to the corresponding middlepoint
288 |         for i in self.graph:
289 |             for j in self.graph[i]:
290 |                 position = self.edgesDict[str(i)+':'+str(j)]
291 |                 link_capacity = self.links_bw[i][j]
292 |                 if self.edge_state[position][0]/link_capacity>currentValue:
293 |                     currentValue = self.edge_state[position][0]/link_capacity
294 |         
295 |         # Dissolve allocation step so that later we can try another action
296 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
297 |         if str(source)+':'+str(destination) in self.sp_middlepoints:
298 |             middlepoint = self.sp_middlepoints[str(source)+':'+str(destination)]
299 |             self.decrease_links_utilization_sp(source, middlepoint, source, destination)
300 |             self.decrease_links_utilization_sp(middlepoint, destination, source, destination)
301 |             del self.sp_middlepoints[str(source)+':'+str(destination)] 
302 |         else: # Remove the bandwidth allocated from the src to the destination
303 |             self.decrease_links_utilization_sp(source, destination, source, destination)
304 |         
305 |         return -currentValue  
306 | 
307 |     def _obtain_demand_hill_climbing(self):
308 |         dem_iter = 0
309 |         nextVal = -1000000
310 |         self.next_state = None
311 |         # Iterate for each demand possible
312 |         for source in range(self.numNodes):
313 |             for dest in range(self.numNodes):
314 |                 if source!=dest:
315 |                     for action in range(len(self.src_dst_k_middlepoints[str(source)+':'+str(dest)])):
316 |                         middlepoint = -1
317 |                         # First we need to desallocate the current demand before we explore all it's possible actions
318 |                         # Check if there is a middlepoint to desallocate from src-middlepoint-dst
319 |                         if str(source)+':'+str(dest) in self.sp_middlepoints:
320 |                             middlepoint = self.sp_middlepoints[str(source)+':'+str(dest)]
321 |                             self.decrease_links_utilization_sp(source, middlepoint, source, dest)
322 |                             self.decrease_links_utilization_sp(middlepoint, dest, source, dest)
323 |                             del self.sp_middlepoints[str(source)+':'+str(dest)] 
324 |                         else: # Remove the bandwidth allocated from the src to the destination
325 |                             self.decrease_links_utilization_sp(source, dest, source, dest)
326 | 
327 |                         evalState = self.get_value(source, dest, action)
328 |                         if evalState > nextVal:
329 |                             nextVal = evalState
330 |                             self.next_state = (action, source, dest)
331 |                         
332 |                         # Allocate back the demand whose actions we explored
333 |                         # If the current demand had a middlepoint, we allocate src-middlepoint-dst
334 |                         if middlepoint>=0:
335 |                             # First we allocate until the middlepoint
336 |                             self.allocate_to_destination_sp(source, middlepoint, source, dest)
337 |                             # Then we allocate from the middlepoint to the destination
338 |                             self.allocate_to_destination_sp(middlepoint, dest, source, dest)
339 |                             # We store that the pair source,destination has a middlepoint
340 |                             self.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint
341 |                         else:
342 |                             # Then we allocate from the middlepoint to the destination
343 |                             self.allocate_to_destination_sp(source, dest, source, dest)
344 |         self.patMaxBandwth = (self.next_state[1], self.next_state[2], self.TM[self.next_state[1]][self.next_state[2]])
345 | 
346 |     def compute_middlepoint_set_random(self):
347 |         # We choose the K-middlepoints for each src-dst randomly
348 |         self.src_dst_k_middlepoints = dict()
349 |         # Iterate over all node1,node2 pairs from the graph
350 |         for n1 in range (0,self.numNodes):
351 |             for n2 in range (0,self.numNodes):
352 |                 if (n1 != n2):
353 |                     num_middlepoints = 0
354 |                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)] = list()
355 |                     # We add the destination as a candidate middlepoint (in case we have direct connection)
356 |                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(n2)
357 |                     num_middlepoints += 1
358 |                     while num_middlepoints<self.K:
359 |                         middlpt = np.random.randint(0, self.numNodes)
360 |                         while middlpt==n1 or middlpt==n2 or middlpt in self.src_dst_k_middlepoints[str(n1)+':'+str(n2)]:
361 |                             middlpt = np.random.randint(0, self.numNodes)
362 |                         self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(middlpt)
363 |                         num_middlepoints += 1         
364 | 
365 |         
366 |     def mark_edges(self, action_flags, src, dst, init_source, final_destination):
367 |         currentPath = self.shortest_paths[src,dst]
368 |         
369 |         i = 0
370 |         j = 1
371 | 
372 |         while (j < len(currentPath)):
373 |             firstNode = currentPath[i]
374 |             secondNode = currentPath[j]
375 | 
376 |             action_flags[self.edgesDict[str(firstNode)+':'+str(secondNode)]] += 1.0
377 |             i = i + 1
378 |             j = j + 1
379 |     
380 |     def mark_action_to_edges(self, first_node, init_source, final_destination): 
381 |         # In this function we mark for each link which is the bw that it will allocate. This we will
382 |         # use to avoid repeated actions
383 |         action_flags = np.zeros(self.numEdges)
384 |         
385 |         # Mark until first_node
386 |         self.mark_edges(action_flags, init_source, first_node, init_source, final_destination)
387 | 
388 |         # If the first node is a middlepoint
389 |         if first_node!=final_destination:
390 |             self.mark_edges(action_flags, first_node, final_destination, init_source, final_destination)
391 |         
392 |         return action_flags
393 | 
394 |     def compute_middlepoint_set_remove_rep_actions_no_loop(self):
395 |         # In this function we compute the middlepoint set but we don't take into account the middlepoints whose 
396 |         # actions are repeated and neither those middlepoints whose SPs pass over the DST or SRC nodes
397 |         
398 |         # Compute SPs for each src,dst pair
399 |         self.compute_SPs()
400 | 
401 |         # We compute the middlepoint set for each src,dst pair and we don't consider repeated actions
402 |         self.src_dst_k_middlepoints = dict()
403 |         # Iterate over all node1,node2 pairs from the graph
404 |         for n1 in range (0,self.numNodes):
405 |             for n2 in range (0,self.numNodes):
406 |                 if (n1 != n2):
407 |                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)] = list()
408 |                     repeated_actions = list()
409 |                     for midd in range (0,self.K):
410 |                         # If the middlepoint is not the source node
411 |                         if midd!=n1:
412 |                             action_flags = self.mark_action_to_edges(midd, n1, n2)
413 |                             # If we allocated to a middlepoint that is not the final destination
414 |                             if midd!=n2:
415 |                                 # If the repeated_actions list is empty we make the following verifications
416 |                                 if len(repeated_actions) == 0:
417 |                                     #print(" A...... ")
418 | 
419 |                                     path1 = self.shortest_paths[n1, midd]
420 |                                     path2 = self.shortest_paths[midd, n2]
421 | 
422 |                                     # Check that the dst node is not in the SP to avoid loops!
423 |                                     currentPath = path1[:len(path1)-1]+path2
424 |                                     dst_counter = 0
425 |                                     for node in currentPath:
426 |                                         if node==n2 or node==n1:
427 |                                             dst_counter += 1
428 |                                     # If there is only one dst node
429 |                                     if dst_counter==2:
430 |                                         repeated_actions.append(action_flags)
431 |                                         self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
432 |                                 else:
433 |                                     #print(" B...... ")
434 |                                     repeatedAction = False
435 |                                     # Compare the current action with the previous ones
436 |                                     for previous_actions in repeated_actions:
437 |                                         subtraction = np.absolute(np.subtract(action_flags,previous_actions))
438 |                                         if np.sum(subtraction)==0.0:
439 |                                             repeatedAction = True
440 |                                             break
441 |                                     # If we didn't find any identical action, we make the following verifications
442 |                                     if not repeatedAction:                                        
443 |                                         path1 = self.shortest_paths[n1, midd]
444 |                                         path2 = self.shortest_paths[midd, n2]
445 |                                         # Check that the dst node is not in the SP to avoid loops!
446 |                                         currentPath = path1[:len(path1)-1]+path2
447 |                                         dst_counter = 0
448 |                                         for node in currentPath:
449 |                                             if node==n2 or node==n1:
450 |                                                 dst_counter += 1
451 |                                         # If there is only one dst node
452 |                                         if dst_counter==2:
453 |                                             self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
454 |                                             repeated_actions.append(action_flags)
455 | 
456 |                             else: 
457 |                                 # If it's the first action we add it to the repeated actions list
458 |                                 if len(repeated_actions) == 0:
459 |                                     #print(" C...... ")
460 |                                     self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
461 |                                     repeated_actions.append(action_flags)
462 |                                 else:
463 |                                     #print(" D...... ")
464 |                                     repeatedAction = False
465 |                                     # Compare the current action with the previous ones
466 |                                     for previous_actions in repeated_actions:
467 |                                         subtraction = np.absolute(np.subtract(action_flags,previous_actions))
468 |                                         if np.sum(subtraction)==0.0:
469 |                                             repeatedAction = True
470 |                                             break
471 |                                     
472 |                                     # If we didn't find any identical action, we add the middlepoint to the set
473 |                                     if not repeatedAction:
474 |                                         self.src_dst_k_middlepoints[str(n1)+':'+str(n2)].append(midd)
475 |                                         repeated_actions.append(action_flags)
476 | 
477 |     def compute_SPs(self):
478 |         diameter = nx.diameter(self.graph)
479 |         self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype=object)
480 |         
481 |         allPaths = dict()
482 |         sp_path = self.dataset_folder_name+"/shortest_paths.json"
483 | 
484 |         if not os.path.isfile(sp_path):
485 |             for n1 in range (0,self.numNodes):
486 |                 for n2 in range (0,self.numNodes):
487 |                     if (n1 != n2):
488 |                         allPaths[str(n1)+':'+str(n2)] = []
489 |                         # First we compute the shortest paths taking into account the diameter
490 |                         [allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=diameter*2)]                    # We take all the paths from n1 to n2 and we order them according to the path length
491 |                         # sorted() ordena los paths de menor a mayor numero de
492 |                         # saltos y los que tienen los mismos saltos te los ordena por indice
493 |                         aux_sorted_paths = sorted(allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item))                    # self.shortest_paths[n1,n2] = nx.shortest_path(self.graph, n1, n2,weight='weight')
494 |                         allPaths[str(n1)+':'+str(n2)] = aux_sorted_paths[0]
495 |         
496 |             with open(sp_path, 'w') as fp:
497 |                 json.dump(allPaths, fp)
498 |         else:
499 |             allPaths = json.load(open(sp_path))
500 | 
501 |         for n1 in range (0,self.numNodes):
502 |             for n2 in range (0,self.numNodes):
503 |                 if (n1 != n2):
504 |                     self.shortest_paths[n1,n2] = allPaths[str(n1)+':'+str(n2)]
505 |         
506 |     def _first_second(self):
507 |         # Link (1, 2) recibe trafico de los links que inyectan en el nodo 1
508 |         # un link que apunta a un nodo envía mensajes a todos los links que salen de ese nodo
509 |         first = list()
510 |         second = list()
511 | 
512 |         for i in self.graph:
513 |             for j in self.graph[i]:
514 |                 neighbour_edges = self.graph.edges(j)
515 |                 # Take output links of node 'j'
516 | 
517 |                 for m, n in neighbour_edges:
518 |                     if ((i != m or j != n) and (i != n or j != m)):
519 |                         first.append(self.edgesDict[str(i) +':'+ str(j)])
520 |                         second.append(self.edgesDict[str(m) +':'+ str(n)])
521 | 
522 |         self.first = first
523 |         self.second = second
524 | 
525 |     def generate_environment(self, dataset_folder_name, graph_topology_name, EPISODE_LENGTH, K, X):
526 |         self.episode_length = EPISODE_LENGTH
527 |         self.graph_topology_name = graph_topology_name
528 |         self.dataset_folder_name = dataset_folder_name
529 |         self.list_eligible_demands = list()
530 |         self.iter_list_elig_demn = 0
531 |         self.percentage_demands = X
532 | 
533 |         self.maxCapacity = 0 # We take the maximum capacity to normalize
534 | 
535 |         # Just select some random file, the only thing we need is the links features and the topology
536 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
537 |         # This 'results_file' file is ignored!
538 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_0"
539 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+".0.demands"
540 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
541 |         
542 |         self.graph = self.defoDatasetAPI.Gbase
543 |         self.add_features_to_edges()
544 |         self.numNodes = len(self.graph.nodes())
545 |         self.numEdges = len(self.graph.edges())
546 |         btwns = nx.edge_betweenness_centrality(self.graph)
547 | 
548 |         self.K = K
549 |         if self.K>self.numNodes:
550 |             self.K = self.numNodes
551 | 
552 |         self.edge_state = np.zeros((self.numEdges, 3))
553 |         self.betweenness_centrality = np.zeros(self.numEdges) # Used in the fully connected
554 |         self.shortest_paths = np.zeros((self.numNodes,self.numNodes),dtype="object")
555 | 
556 |         position = 0
557 |         for i in self.graph:
558 |             for j in self.graph[i]:
559 |                 self.edgesDict[str(i)+':'+str(j)] = position
560 |                 self.graph[i][j][0]['capacity'] = self.defoDatasetAPI.links_bw[i][j]
561 |                 self.graph[i][j][0]['weight'] = self.defoDatasetAPI.links_weight[i][j]
562 |                 if self.graph[i][j][0]['capacity']>self.maxCapacity:
563 |                     self.maxCapacity = self.graph[i][j][0]['capacity']
564 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
565 |                 self.betweenness_centrality[position] = btwns[i,j]
566 |                 self.graph[i][j][0]['utilization'] = 0.0
567 |                 self.graph[i][j][0]['crossing_paths'].clear()
568 |                 position += 1
569 | 
570 |         self._first_second()
571 |         self.firstTrueSize = len(self.first)
572 | 
573 |         self.link_capacity_feature = np.divide(self.edge_state[:,1], self.maxCapacity)
574 | 
575 |         # We create the list of nodes ids to pick randomly from them
576 |         self.nodes = list(range(0,self.numNodes))
577 | 
578 |         self.compute_middlepoint_set_remove_rep_actions_no_loop()
579 | 
580 |     def step(self, action, demand, source, destination):
581 |         # Action is the middlepoint. Careful because it can also be action==destination if src,dst are connected directly by an edge
582 |         self.episode_over = False
583 |         self.reward = 0
584 | 
585 |         # We get the K-middlepoints between source-destination
586 |         middlePointList = self.src_dst_k_middlepoints[str(source) +':'+ str(destination)]
587 |         middlePoint = middlePointList[action]
588 | 
589 |         # First we allocate until the middlepoint
590 |         self.allocate_to_destination_sp(source, middlePoint, source, destination)
591 |         # If we allocated to a middlepoint that is not the final destination
592 |         if middlePoint!=destination:
593 |             # Then we allocate from the middlepoint to the destination
594 |             self.allocate_to_destination_sp(middlePoint, destination, source, destination)
595 |             # We store that the pair source,destination has a middlepoint
596 |             self.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
597 |         
598 |         self.sp_middlepoints_step = self.sp_middlepoints
599 |         
600 |         # Find new maximum and minimum utilization link
601 |         old_Utilization = self.edgeMaxUti[2]
602 |         self.edgeMaxUti = (0, 0, 0)
603 |         for i in self.graph:
604 |             for j in self.graph[i]:
605 |                 position = self.edgesDict[str(i)+':'+str(j)]
606 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
607 |                 link_capacity = self.links_bw[i][j]
608 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
609 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
610 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
611 |          
612 |         self.currentVal = -self.edgeMaxUti[2]
613 | 
614 |         self.reward = np.around((old_Utilization-self.edgeMaxUti[2])*10,2)
615 | 
616 |         # If we didn't iterate over all demands 
617 |         if self.iter_list_elig_demn<len(self.list_eligible_demands):
618 |             self._obtain_demand()
619 |         else:
620 |             src = 1
621 |             dst = 2
622 |             bw = self.TM[src][dst]
623 |             self.patMaxBandwth = (src, dst, int(bw))
624 |             self.episode_over = True
625 | 
626 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
627 |         if str(self.patMaxBandwth[0])+':'+str(self.patMaxBandwth[1]) in self.sp_middlepoints:
628 |             middlepoint = self.sp_middlepoints[str(self.patMaxBandwth[0])+':'+str(self.patMaxBandwth[1])]
629 |             self.decrease_links_utilization_sp(self.patMaxBandwth[0], middlepoint, self.patMaxBandwth[0], self.patMaxBandwth[1])
630 |             self.decrease_links_utilization_sp(middlepoint, self.patMaxBandwth[1], self.patMaxBandwth[0], self.patMaxBandwth[1])
631 |             del self.sp_middlepoints[str(self.patMaxBandwth[0])+':'+str(self.patMaxBandwth[1])] 
632 |         else: # Remove the bandwidth allocated from the src to the destination
633 |             self.decrease_links_utilization_sp(self.patMaxBandwth[0], self.patMaxBandwth[1], self.patMaxBandwth[0], self.patMaxBandwth[1])
634 |         
635 |         # We desmark the bw_allocated
636 |         self.edge_state[:,2] = 0
637 | 
638 |         return self.reward, self.episode_over, 0.0, self.TM[self.patMaxBandwth[0]][self.patMaxBandwth[1]], self.patMaxBandwth[0], self.patMaxBandwth[1], self.edgeMaxUti, 0.0, np.std(self.edge_state[:,0])
639 | 
640 |     def reset(self, tm_id):
641 |         """
642 |         Reset environment and setup for new episode. 
643 |         Generate new TM but load the same routing. We remove the path with more bandwidth
644 |         from the link with more utilization to later allocate it on a new path in the act().
645 |         """
646 |         self._generate_tm(tm_id)
647 | 
648 |         self.sp_middlepoints = dict()
649 | 
650 |         # For each link we store the total sum of bandwidths of the paths crossing each link without middlepoints
651 |         self.compute_link_utilization_reset()
652 | 
653 |         # We iterate over all links in an ordered fashion and store the features to edge_state
654 |         self.edgeMaxUti = (0, 0, 0)
655 |         # This list is used to obtain the top K flows from the critical links
656 |         list_link_uti_id = list()
657 |         for i in self.graph:
658 |             for j in self.graph[i]:
659 |                 position = self.edgesDict[str(i)+':'+str(j)]
660 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
661 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
662 |                 link_capacity = self.links_bw[i][j]
663 |                 # We store the link utilization and the corresponding edge
664 |                 list_link_uti_id.append((self.edge_state[position][0], i, j))
665 |                 
666 |                 norm_edge_state_capacity = self.edge_state[position][0]/link_capacity
667 |                 if norm_edge_state_capacity>self.edgeMaxUti[2]:
668 |                     self.edgeMaxUti = (i, j, norm_edge_state_capacity)
669 |         
670 |         if self.top_K_critical_demands:
671 |             list_link_uti_id = sorted(list_link_uti_id, key=lambda tup: tup[0], reverse=True)[:self.num_critical_links]
672 |             self._get_top_k_critical_flows(list_link_uti_id)
673 | 
674 |         self.currentVal = -self.edgeMaxUti[2]
675 |         self.initial_maxLinkUti = -self.edgeMaxUti[2]
676 |         # From the link with more utilization, we obtain a random path of the 5 with more bandwidth
677 |         #self._obtain_path_more_bandwidth_rand_link()
678 |         #self._obtain_path_from_set_rand()
679 |         #self._obtain_demand_hill_climbing()
680 |         self._obtain_demand()
681 | 
682 |         # Remove bandwidth allocated for the path with more bandwidth from the link with more utilization
683 |         self.decrease_links_utilization_sp(self.patMaxBandwth[0], self.patMaxBandwth[1], self.patMaxBandwth[0], self.patMaxBandwth[1])
684 | 
685 |         # We desmark the bw_allocated
686 |         self.edge_state[:,2] = 0
687 | 
688 |         return self.TM[self.patMaxBandwth[0]][self.patMaxBandwth[1]], self.patMaxBandwth[0], self.patMaxBandwth[1]
689 |             
690 |     def allocate_to_destination_sp(self, src, dst, init_source, final_destination): 
691 |         # In this function we allocated the bandwidth by segments. This funcion is used when we want
692 |         # to allocate from a src to a middlepoint and then from middlepoint to a dst using the sp
693 |         bw_allocate = self.TM[init_source][final_destination]
694 |         currentPath = self.shortest_paths[src,dst]
695 |         
696 |         i = 0
697 |         j = 1
698 | 
699 |         while (j < len(currentPath)):
700 |             firstNode = currentPath[i]
701 |             secondNode = currentPath[j]
702 | 
703 |             self.graph[firstNode][secondNode][0]['utilization'] += bw_allocate  
704 |             self.graph[firstNode][secondNode][0]['crossing_paths'][str(init_source)+':'+str(final_destination)] = bw_allocate
705 |             self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][0] = self.graph[firstNode][secondNode][0]['utilization']
706 |             i = i + 1
707 |             j = j + 1
708 |     
709 |     def mark_action_sp(self, src, dst, init_source, final_destination): 
710 |         # In this function we mark the action in the corresponding edges of the SP between src,dst
711 |         bw_allocate = self.TM[init_source][final_destination]
712 |         currentPath = self.shortest_paths[src,dst]
713 |         
714 |         i = 0
715 |         j = 1
716 | 
717 |         while (j < len(currentPath)):
718 |             firstNode = currentPath[i]
719 |             secondNode = currentPath[j]
720 | 
721 |             self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][2] = bw_allocate/self.edge_state[self.edgesDict[str(firstNode)+':'+str(secondNode)]][1]
722 |             i = i + 1
723 |             j = j + 1


--------------------------------------------------------------------------------
/gym-graph/gym_graph/envs/environment20.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import networkx as nx
  4 | import random
  5 | from gym import error, spaces, utils
  6 | from random import choice
  7 | import pandas as pd
  8 | import pickle
  9 | import os.path
 10 | import json 
 11 | import gc
 12 | import defo_process_results as defoResults
 13 | 
 14 | class Env20(gym.Env):
 15 |     """
 16 |     Similar to environment15.py but this one is used for the SAP (instead of hill climbing)
 17 | 
 18 |     Environment used in the middlepoint routing problem.
 19 |     We are using bidirectional links in this environment!
 20 |     In this environment we make the MP between nodes and concatenate the edge features
 21 |     to the node features in the message function.
 22 |     self.edge_state[:][0] = link utilization
 23 |     self.edge_state[:][1] = link capacity
 24 |     self.edge_state[:][2] = bw allocated (the one that goes from src to dst)
 25 |     """
 26 |     def __init__(self):
 27 |         self.graph = None # Here we store the graph as DiGraph (without repeated edges)
 28 |         self.source = None
 29 |         self.destination = None
 30 |         self.demand = None
 31 | 
 32 |         self.edge_state = None
 33 |         self.graph_topology_name = None # Here we store the name of the graph topology from the repetita dataset
 34 |         self.dataset_folder_name = None # Here we store the name of the repetita dataset being used: 2015Defo, 2016TopologyZoo_unary,2016TopologyZoo_inverseCapacity, etc. 
 35 | 
 36 |         self.diameter = None
 37 | 
 38 |         # Nx Graph where the nodes have features. Betweenness is allways normalized.
 39 |         # The other features are "raw" and are being normalized before prediction
 40 |         self.between_feature = None
 41 | 
 42 |         self.nodeId = None
 43 |         self.sp_middlepoints = None # For each src,dst we store the nodeId of the sp middlepoint
 44 |         self.shortest_paths = None # For each src,dst we store the shortest path to reach d
 45 | 
 46 |         # Mean and standard deviation of link betweenness
 47 |         self.mu_bet = None
 48 |         self.std_bet = None
 49 | 
 50 |         # Episode length in timesteps
 51 |         self.episode_length = None
 52 |         self.list_eligible_demands = None # Here we store those demands from DEFO that have one middlepoint. These demands are going to be eligible by our DRL agent.
 53 |         self.iter_list_elig_demn = None
 54 | 
 55 |         # Error at the end of episode to evaluate the learning process
 56 |         self.error_evaluation = None
 57 |         # Ideal target link capacity: self.sumTM/self.numEdges
 58 |         self.target_link_capacity = None
 59 | 
 60 |         self.TM = None # Traffic matrix where self.TM[src][dst] indicates how many packets are sent from src to dst
 61 |         self.meanTM = None
 62 |         self.stdTM = None
 63 |         self.sumTM = None
 64 |         self.routing = None # Loaded routing matrix
 65 |         self.paths_Matrix_from_routing = None # We store a list of paths extracted from the routing matrix for each src-dst pair
 66 | 
 67 |         self.K = None
 68 |         self.nodes = None # List of nodes to pick randomly from them
 69 |         self.ordered_edges = None
 70 |         self.edgesDict = dict() # Stores the position id of each edge in order
 71 |         self.previous_path = None
 72 | 
 73 |         self.src_dst_k_middlepoints = None # For each src, dst, we store the k middlepoints
 74 |         self.node_to_index_dic = None # For each node from the real graph we store it's index
 75 |         self.index_to_node_lst = None # We store a list of nodes in an ordered fashion
 76 | 
 77 |         self.numNodes = None
 78 |         self.numEdges = None
 79 |         self.numSteps = 0 # As our problem can go forever, we limit it to 10 steps
 80 | 
 81 |         self.sameLink = False # Indicates if we are working with the same link
 82 | 
 83 |         # We store the edge that has maximum utilization
 84 |         # (src, dst, MaxUtilization)
 85 |         self.edgeMaxUti = None 
 86 |         # We store the edge that has minimum utilization
 87 |         # (src, dst, MaxUtilization)
 88 |         self.edgeMinUti = None 
 89 |         # We store the path with more bandwidth from the edge with maximum utilization
 90 |         # (src, dst, MaxBandwidth)
 91 |         self.patMaxBandwth = None 
 92 |         self.maxBandwidth = None
 93 | 
 94 |         self.episode_over = True
 95 |         self.reward = 0
 96 |         self.allPaths = dict() # Stores the paths for each src:dst pair
 97 | 
 98 |     def seed(self, seed):
 99 |         random.seed(seed)
100 |         np.random.seed(seed)
101 |     
102 |     def add_features_to_edges(self):
103 |         incId = 1
104 |         for node in self.graph:
105 |             for adj in self.graph[node]:
106 |                 if not 'betweenness' in self.graph[node][adj][0]:
107 |                     self.graph[node][adj][0]['betweenness'] = 0
108 |                 if not 'edgeId' in self.graph[node][adj][0]:
109 |                     self.graph[node][adj][0]['edgeId'] = incId
110 |                 if not 'numsp' in self.graph[node][adj][0]:
111 |                     self.graph[node][adj][0]['numsp'] = 0
112 |                 if not 'utilization' in self.graph[node][adj][0]:
113 |                     self.graph[node][adj][0]['utilization'] = 0
114 |                 if not 'capacity' in self.graph[node][adj][0]:
115 |                     self.graph[node][adj][0]['capacity'] = 0
116 |                 if not 'weight' in self.graph[node][adj][0]:
117 |                     self.graph[node][adj][0]['weight'] = 0
118 |                 if not 'kshortp' in self.graph[node][adj][0]:
119 |                     self.graph[node][adj][0]['kshortp'] = 0
120 |                 if not 'crossing_paths' in self.graph[node][adj][0]: # We store all the src,dst from the paths crossing each edge
121 |                     self.graph[node][adj][0]['crossing_paths'] = dict()
122 |                 incId = incId + 1
123 | 
124 |     def _generate_tm(self, tm_id):
125 |         # Sample a file randomly to initialize the tm
126 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
127 |         # This 'results_file' file is ignored!
128 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_"+str(tm_id)
129 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+'.'+str(tm_id)+".demands"
130 |         
131 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
132 |         self.links_bw = self.defoDatasetAPI.links_bw
133 |         self.TM = self.defoDatasetAPI._get_traffic_matrix(tm_file)
134 | 
135 |         self.iter_list_elig_demn = 0
136 |         self.list_eligible_demands.clear()
137 |         min_links_bw = 1000000.0
138 |         for src in range (0,self.numNodes):
139 |             for dst in range (0,self.numNodes):
140 |                 if src!=dst:
141 |                     self.list_eligible_demands.append((src, dst, self.TM[src,dst]))
142 |                     # If we have a link between src and dst
143 |                     if src in self.graph and dst in self.graph[src]:
144 |                         # Store the link with minimum bw
145 |                         if self.links_bw[src][dst]<min_links_bw:
146 |                             min_links_bw = self.links_bw[src][dst]
147 |                         
148 |                         # Clear the link utilization and crossing paths for each link
149 |                         self.graph[src][dst][0]['utilization'] = 0.0
150 |                         self.graph[src][dst][0]['crossing_paths'].clear()
151 | 
152 |         self.list_eligible_demands = sorted(self.list_eligible_demands, key=lambda tup: tup[2], reverse=True)
153 | 
154 |         self.maxBandwidth = np.amax(self.TM)
155 | 
156 |         traffic = np.copy(self.TM)
157 |         # Remove diagonal from matrix
158 |         traffic = traffic[~np.eye(traffic.shape[0], dtype=bool)].reshape(traffic.shape[0], -1)
159 | 
160 |         self.sumTM = np.sum(traffic)
161 |         self.target_link_capacity = self.sumTM/self.numEdges
162 |         self.meanTM = np.mean(traffic)
163 |         self.stdTM = np.std(traffic)
164 | 
165 |     def compute_SPs(self):
166 |         diameter = nx.diameter(self.graph)
167 |         sp_path = self.dataset_folder_name+"/K_shortest_paths.json"
168 | 
169 |         if not os.path.isfile(sp_path):
170 |             for n1 in self.graph:
171 |                 for n2 in self.graph:
172 |                     if (n1 != n2):
173 |                         # Check if we added the element of the matrix
174 |                         if str(n1)+':'+str(n2) not in self.allPaths:
175 |                             self.allPaths[str(n1)+':'+str(n2)] = []
176 | 
177 |                         # First we compute the shortest paths taking into account the diameter
178 |                         [self.allPaths[str(n1)+':'+str(n2)].append(p) for p in nx.all_simple_paths(self.graph, source=n1, target=n2, cutoff=diameter*2)]
179 | 
180 |                         # We take all the paths from n1 to n2 and we order them according to the path length
181 |                         # sorted() ordena los paths de menor a mayor numero de
182 |                         # saltos y los que tienen los mismos saltos te los ordena por indice
183 |                         self.allPaths[str(n1)+':'+str(n2)] = sorted(self.allPaths[str(n1)+':'+str(n2)], key=lambda item: (len(item), item))
184 | 
185 |                         path = 0
186 |                         while path < self.K and path < len(self.allPaths[str(n1)+':'+str(n2)]):
187 |                             path = path + 1
188 | 
189 |                         # Remove paths not needed
190 |                         del self.allPaths[str(n1)+':'+str(n2)][path:len(self.allPaths[str(n1)+':'+str(n2)])]
191 |                         gc.collect()
192 |             
193 |             with open(sp_path, 'w') as fp:
194 |                 json.dump(self.allPaths, fp)
195 |         else:
196 |             self.allPaths = json.load(open(sp_path))
197 | 
198 |     def generate_environment(self, dataset_folder_name, graph_topology_name, EPISODE_LENGTH, K):
199 |         self.episode_length = EPISODE_LENGTH
200 |         self.graph_topology_name = graph_topology_name
201 |         self.dataset_folder_name = dataset_folder_name
202 |         self.list_eligible_demands = list()
203 |         self.iter_list_elig_demn = 0
204 | 
205 |         self.maxCapacity = 0 # We take the maximum capacity to normalize
206 | 
207 |         graph_file = self.dataset_folder_name+"/"+self.graph_topology_name+".graph"
208 |         # This 'results_file' file is ignored!
209 |         results_file = self.dataset_folder_name+"/res_"+self.graph_topology_name+"_0"
210 |         tm_file = self.dataset_folder_name+"/TM/"+self.graph_topology_name+".0.demands"
211 |         self.defoDatasetAPI = defoResults.Defo_results(graph_file,results_file)
212 |         
213 |         self.node_to_index_dic = self.defoDatasetAPI.node_to_index_dic_pvt
214 |         self.index_to_node_lst = self.defoDatasetAPI.index_to_node_lst_pvt
215 | 
216 |         self.graph = self.defoDatasetAPI.Gbase
217 |         self.add_features_to_edges()
218 |         self.numNodes = len(self.graph.nodes())
219 |         self.numEdges = len(self.graph.edges())
220 | 
221 |         self.K = 5 # We try to allocate on the 10 Shortest Paths
222 | 
223 |         self.edge_state = np.zeros((self.numEdges, 3))
224 | 
225 |         position = 0
226 |         for i in self.graph:
227 |             for j in self.graph[i]:
228 |                 self.edgesDict[str(i)+':'+str(j)] = position
229 |                 self.graph[i][j][0]['capacity'] = self.defoDatasetAPI.links_bw[i][j]
230 |                 self.graph[i][j][0]['weight'] = self.defoDatasetAPI.links_weight[i][j]
231 |                 self.edge_state[position][1] = self.graph[i][j][0]['capacity']
232 |                 self.graph[i][j][0]['utilization'] = 0.0
233 |                 self.graph[i][j][0]['crossing_paths'].clear()
234 |                 position += 1
235 | 
236 |         # We create the list of nodes ids to pick randomly from them
237 |         self.nodes = list(range(0,self.numNodes))
238 |         self.compute_SPs()
239 | 
240 |     def step(self, action, demand, source, destination):
241 |         # Action is the middlepoint. Careful because it can also be action==destination if src,dst are connected directly by an edge
242 |         self.reward = 0
243 |         self.episode_over = False
244 | 
245 |         pathList = self.allPaths[str(source) +':'+ str(destination)]
246 |         currentPath = pathList[0]
247 | 
248 |         # If we can allocate it somewhere and the uti doesn't pass the link capacity
249 |         if action!=-1:
250 |             currentPath = pathList[action]
251 |         # If we can't allocate the action, we perform load balancing
252 |         else: 
253 |             action = random.randint(0, len(pathList)-1)
254 |             currentPath = pathList[action]
255 | 
256 |         i = 0
257 |         j = 1
258 | 
259 |         # 2. Iterate over pairs of nodes and allocate the demand
260 |         while j < len(currentPath):
261 |             self.graph[currentPath[i]][currentPath[j]][0]['utilization'] += demand
262 |             i = i + 1
263 |             j = j + 1
264 |         
265 |         # Find new maximum and minimum utilization link
266 |         maxUti = 0
267 |         minUti = 1000000
268 |         self.error_evaluation = 0
269 |         for i in self.graph:
270 |             for j in self.graph[i]:
271 |                 position = self.edgesDict[str(i)+':'+str(j)]
272 |                 self.edge_state[position][0] = self.graph[i][j][0]['utilization']
273 |                 link_capacity = self.links_bw[i][j]
274 |                 if self.edge_state[position][0]/link_capacity>maxUti:
275 |                     maxUti = self.edge_state[position][0]/link_capacity
276 |                     self.edgeMaxUti = (i, j, maxUti)
277 |                 if self.edge_state[position][0]/link_capacity<minUti:
278 |                     minUti = self.edge_state[position][0]/link_capacity
279 |                     self.edgeMinUti = (i, j, minUti)
280 |                 self.error_evaluation = self.error_evaluation + (self.target_link_capacity - self.edge_state[position][0])
281 |         
282 |         if self.iter_list_elig_demn<len(self.list_eligible_demands):
283 |             self._obtain_demand()
284 |         else:
285 |             src = 1
286 |             dst = 2
287 |             bw = self.TM[src][dst]
288 |             self.patMaxBandwth = (src, dst, int(bw))
289 |             self.episode_over = True
290 | 
291 |         return self.episode_over, np.absolute(self.error_evaluation), self.TM[self.patMaxBandwth[0]][self.patMaxBandwth[1]], self.patMaxBandwth[0], self.patMaxBandwth[1], self.edgeMaxUti, self.edgeMinUti[2], np.std(self.edge_state[:,0])
292 | 
293 |     def _obtain_demand(self):
294 |         src = self.list_eligible_demands[self.iter_list_elig_demn][0]
295 |         dst = self.list_eligible_demands[self.iter_list_elig_demn][1]
296 |         bw = self.list_eligible_demands[self.iter_list_elig_demn][2]
297 |         self.patMaxBandwth = (src, dst, int(bw))
298 |         self.iter_list_elig_demn += 1
299 | 
300 |     def reset(self, tm_id):
301 |         """
302 |         Reset environment and setup for new episode. 
303 |         Generate new TM but load the same routing. We remove the path with more bandwidth
304 |         from the link with more utilization to later allocate it on a new path in the act().
305 |         """
306 |         self._generate_tm(tm_id)
307 | 
308 |         # Clear the link utilization and crossing paths
309 |         for i in self.graph:
310 |             for j in self.graph[i]:
311 |                 self.graph[i][j][0]['utilization'] = 0.0
312 |                 self.graph[i][j][0]['crossing_paths'].clear()
313 |             
314 |         self._obtain_demand()
315 | 
316 |         return self.TM[self.patMaxBandwth[0]][self.patMaxBandwth[1]], self.patMaxBandwth[0], self.patMaxBandwth[1]


--------------------------------------------------------------------------------
/gym-graph/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='gym_graph',
4 |       version='0.0.1',
5 |       install_requires=['gym']  # And any other dependencies foo needs
6 | )


--------------------------------------------------------------------------------
/gym_env/gym_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(id='GraphEnv-v1', entry_point='gym_env.envs:Env1')


--------------------------------------------------------------------------------
/gym_env/gym_env/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_env.envs.env1 import Env1


--------------------------------------------------------------------------------
/gym_env/gym_env/envs/env1.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | import networkx as nx
  4 | import random
  5 | import matplotlib.pyplot as plt
  6 | import copy
  7 | 
  8 | 
  9 | def create_geant2_graph():
 10 |     Gbase = nx.Graph()
 11 |     Gbase.add_nodes_from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23])
 12 |     Gbase.add_edges_from(
 13 |         [(0, 1), (0, 2), (1, 3), (1, 6), (1, 9), (2, 3), (2, 4), (3, 6), (4, 7), (5, 3),
 14 |          (5, 8), (6, 9), (6, 8), (7, 11), (7, 8), (8, 11), (8, 20), (8, 17), (8, 18), (8, 12),
 15 |          (9, 10), (9, 13), (9, 12), (10, 13), (11, 20), (11, 14), (12, 13), (12,19), (12,21),
 16 |          (14, 15), (15, 16), (16, 17), (17,18), (18,21), (19, 23), (21,22), (22, 23)])
 17 | 
 18 |     return Gbase
 19 | 
 20 | 
 21 | def create_nsfnet_graph():
 22 |     Gbase = nx.Graph()
 23 |     Gbase.add_nodes_from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
 24 |     Gbase.add_edges_from(
 25 |         [(0, 1), (0, 2), (0, 3), (1, 2), (1, 7), (2, 5), (3, 8), (3, 4), (4, 5), (4, 6), (5, 12), (5, 13),
 26 |          (6, 7), (7, 10), (8, 9), (8, 11), (9, 10), (9, 12), (10, 11), (10, 13), (11, 12)])
 27 | 
 28 |     return Gbase
 29 | 
 30 | 
 31 | def create_gbn_graph():
 32 |     Gbase = nx.Graph()
 33 |     Gbase.add_nodes_from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
 34 |     Gbase.add_edges_from(
 35 |         [(0, 2), (0, 8), (1, 2), (1, 3), (1, 4), (2, 4), (3, 4), (3, 9), (4, 8), (4, 10), (4, 9),
 36 |          (5, 6), (5, 8), (6, 7), (7, 8), (7, 10), (9, 10), (9, 12), (10, 11), (10, 12), (11, 13),
 37 |          (12, 14), (12, 16), (13, 14), (14, 15), (15, 16)])
 38 | 
 39 |     return Gbase
 40 | 
 41 | 
 42 | def generate_graph(topology):
 43 |     """
 44 |     Generate graphs for training with the same topology.
 45 |     """
 46 |     if topology == 0:
 47 |         G = create_nsfnet_graph()
 48 |     elif topology == 1:
 49 |         G = create_geant2_graph()
 50 |     else:
 51 |         G = create_gbn_graph()
 52 | 
 53 |     idx = 0
 54 |     for i, j in G.edges():
 55 |         G.get_edge_data(i, j)['capacity'] = 1000
 56 |         G.get_edge_data(i, j)['utilization'] = 0
 57 |         G.get_edge_data(i, j)['bwAlloc'] = 0
 58 |         idx = idx + 1
 59 | 
 60 |     return G
 61 | 
 62 | 
 63 | class Env1(gym.Env):
 64 |     def __init__(self):
 65 |         self.edges_dict = None  # 對應 link 及 link 編號
 66 |         self.neighbor_edges = None  # 紀錄臨邊資訊供 gnn 使用
 67 |         self._graph = None
 68 |         self._demand_list = None
 69 |         self._demand_routing = None  # 紀錄 demand 路由路徑方便 step 更新
 70 |         self._num_edges = None
 71 |         self._ordered_edges = None
 72 |         self._graph_state = None  # DRL stata
 73 |         self._shortest_path = None  # 儲存所有 node pair的最短路
 74 |         self._demand_idx = None  # 目前待處理的 demand
 75 |         self.max_util = None  # 紀錄上一步最大利用率方便下一步計算 reward
 76 |         self._done = None  # 是否完成episode
 77 | 
 78 |     def _generate_traffic(self):
 79 |         """
 80 |         generate traffic matrix
 81 |         """
 82 |         demand_list = []
 83 |         for node1 in self._graph.nodes():
 84 |             for node2 in self._graph.nodes():
 85 |                 if node1 != node2:
 86 |                     demand = np.random.uniform(0, 30, 1)[0]
 87 |                     demand_list.append((node1, node2, demand))
 88 | 
 89 |         return demand_list
 90 | 
 91 |     def _max_link_util(self):
 92 |         """
 93 |         find link have maximum link utilization
 94 |         """
 95 |         max_util = 0
 96 |         for i in self._graph.edges():
 97 |             if self._graph.get_edge_data(*i)['utilization'] > max_util:
 98 |                 max_util = self._graph.get_edge_data(*i)['utilization']
 99 |         return max_util
100 | 
101 |     def mark_action(self, action):
102 |         """
103 |         mark action on links the path have
104 |         """
105 |         marked = copy.deepcopy(self._graph_state)
106 |         if action == -1:
107 |             return marked
108 |         demand = self._demand_list[self._demand_idx]
109 |         temp = self._shortest_path[demand[0]][action]
110 |         for i in range(len(temp) - 1):
111 |             marked[self.edges_dict[(temp[i], temp[i+1])]][2] = demand[2]
112 |         temp = self._shortest_path[action][demand[1]]
113 |         for i in range(len(temp) - 1):
114 |             marked[self.edges_dict[(temp[i], temp[i + 1])]][2] = demand[2]
115 |         return marked
116 | 
117 |     def seed(self, seed):
118 |         random.seed(seed)
119 |         np.random.seed(seed)
120 | 
121 |     def step(self, action):
122 |         if action != 0:
123 |             demand = self._demand_list[self._demand_idx]
124 |             action = self.action_space[(demand[0], demand[1])][action]
125 |             temp = self._shortest_path[demand[0]][action]
126 |             for i in range(len(temp)-1):
127 |                 self._graph[temp[i]][temp[i+1]]['bwAlloc'] += demand[2]
128 |                 self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i + 1]]['bwAlloc'] \
129 |                                                                 / self._graph[temp[i]][temp[i + 1]]['capacity']
130 |                 self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i + 1]]['utilization']
131 | 
132 |             temp = self._shortest_path[action][demand[1]]
133 |             for i in range(len(temp)-1):
134 |                 self._graph[temp[i]][temp[i+1]]['bwAlloc'] += demand[2]
135 |                 self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i+1]]['bwAlloc'] \
136 |                                                                 / self._graph[temp[i]][temp[i+1]]['capacity']
137 |                 self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i+1]]['utilization']
138 | 
139 |             temp = self._demand_routing[demand]
140 |             for i in range(len(temp) - 1):
141 |                 self._graph[temp[i]][temp[i+1]]['bwAlloc'] -= demand[2]
142 |                 self._graph[temp[i]][temp[i+1]]['utilization'] = self._graph[temp[i]][temp[i+1]]['bwAlloc'] \
143 |                                                                    / self._graph[temp[i]][temp[i + 1]]['capacity']
144 |                 self._graph_state[self.edges_dict[(temp[i], temp[i+1])]][1] = self._graph[temp[i]][temp[i+1]]['utilization']
145 |             self._demand_routing[demand] = self._shortest_path[demand[0]][action][0:-1] + self._shortest_path[action][demand[1]]
146 | 
147 |         max_util = self._max_link_util()
148 |         reward = self.max_util - max_util
149 |         self.max_util = max_util
150 | 
151 |         self._demand_idx = self._demand_idx + 1
152 |         if self._demand_idx == len(self._demand_list):
153 |             self._done = True
154 |             demand = None
155 |         else:
156 |             self._done = False
157 |             demand = self._demand_list[self._demand_idx]
158 | 
159 |         return copy.deepcopy(self._graph_state), self._done, demand, reward
160 | 
161 |     def reset(self, topology, demand_list=None):
162 |         self._graph = generate_graph(topology)
163 |         self._demand_list = demand_list
164 |         self._demand_idx = 0
165 |         self._num_edges = len(self._graph.edges())
166 |         self._ordered_edges = sorted([edge for edge in self._graph.edges()])
167 |         self.edges_dict = dict()
168 |         self._graph_state = np.zeros((self._num_edges, 3))
169 |         self.max_util = 0
170 |         self._done = False
171 | 
172 |         if self._demand_list == None:
173 |             self._demand_list = self._generate_traffic()
174 |         self._demand_list = sorted(self._demand_list, key=lambda x: x[2], reverse=True)
175 | 
176 |         idx = 0
177 |         for n1, n2 in self._ordered_edges:
178 |             self.edges_dict[(n1, n2)] = idx
179 |             self.edges_dict[(n2, n1)] = idx
180 |             self._graph_state[idx][0] = self._graph.get_edge_data(n1, n2)['capacity'] #/ self._graph.get_edge_data(n1, n2)['capacity']
181 |             self._graph_state[idx][1] = self._graph.get_edge_data(n1, n2)['utilization']
182 |             idx = idx + 1
183 | 
184 |         self.neighbor_edges = dict()
185 |         for n1, n2 in self._ordered_edges:
186 |             self.neighbor_edges[(n1, n2)] = list()
187 |             for m, n in list(self._graph.edges(n1)) + list(self._graph.edges(n2)):
188 |                 if (n1 != m or n2 != n) and (n1 != n or n2 != m):
189 |                     self.neighbor_edges[(n1, n2)].append((m, n))
190 | 
191 |         self._shortest_path = dict(nx.all_pairs_shortest_path(self._graph))
192 |         self.action_space = dict()
193 |         for i in self._graph.nodes():
194 |             for j in self._graph.nodes():
195 |                 self.action_space[(i, j)] = [-1]
196 |                 for k in self._graph.nodes():
197 |                     if k == i or k == j:
198 |                         continue
199 |                     if j not in self._shortest_path[i][k] or i not in self._shortest_path[k][j]:
200 |                         self.action_space[(i, j)].append(k)
201 | 
202 |         self._demand_routing = dict()
203 |         for i in self._demand_list:
204 |             temp = self._shortest_path[i[0]][i[1]]
205 |             for j in range(len(temp) - 1):
206 |                 self._graph[temp[j]][temp[j+1]]['bwAlloc'] += i[2]
207 |                 self._graph[temp[j]][temp[j+1]]['utilization'] = self._graph[temp[j]][temp[j+1]]['bwAlloc'] \
208 |                                                                    / self._graph[temp[j]][temp[j+1]]['capacity']
209 |                 self._graph_state[self.edges_dict[(temp[j], temp[j+1])]][1] \
210 |                     = self._graph[temp[j]][temp[j+1]]['utilization']
211 |             self._demand_routing[i] = self._shortest_path[i[0]][i[1]]
212 |         self.max_util = self._max_link_util()
213 | 
214 |         return copy.deepcopy(self._graph_state), self._demand_list[self._demand_idx]
215 | 
216 |     def render(self, mode='human'):
217 |         if mode == 'human':
218 |             pos = nx.spring_layout(self._graph)
219 |             edge_labels = nx.get_edge_attributes(self._graph, 'capacity')
220 |             nx.draw(self._graph, pos, with_labels=True)
221 |             nx.draw_networkx_edge_labels(self._graph, pos, edge_labels=edge_labels)
222 |             plt.show()
223 |             plt.clf()
224 | 


--------------------------------------------------------------------------------
/gym_env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='gym_env',
4 |       version='0.0.1',
5 |       install_requires=['gym', 'networkx']  # And any other dependencies foo needs
6 | )


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from actor_critic import PPOAC
  3 | import gym
  4 | import gym_graph
  5 | import random
  6 | import numpy as np
  7 | import os
  8 | import gc
  9 | import time
 10 | 
 11 | if __name__ == '__main__':
 12 | 
 13 |     if not os.path.exists("./Logs"):
 14 |         os.makedirs("./Logs")
 15 | 
 16 |     SEED = 9
 17 |     os.environ['PYTHONHASHSEED'] = str(SEED)
 18 |     np.random.seed(SEED)
 19 |     random.seed(SEED)
 20 |     torch.manual_seed(1)
 21 |     torch.cuda.manual_seed(1)
 22 |     torch.cuda.manual_seed_all(1)
 23 |     torch.backends.cudnn.benchmark = False
 24 |     torch.backends.cudnn.deterministic = True
 25 |     experiment_letter = "_B_NEW"
 26 |     take_critic_demands = True  # True if we want to take the demands from the most critical links, True if we want to take the largest
 27 |     percentage_demands = 15  # Percentage of demands that will be used in the optimization
 28 |     str_perctg_demands = str(percentage_demands)
 29 |     percentage_demands /= 100
 30 | 
 31 |     max_iters = 150
 32 |     EVALUATION_EPISODES = 20  # As the demand selection is deterministic, it doesn't make sense to evaluate multiple times over the same TM
 33 | 
 34 |     num_samples_top1 = int(np.ceil(percentage_demands * 380)) * 5
 35 |     num_samples_top2 = int(np.ceil(percentage_demands * 506)) * 4
 36 |     num_samples_top3 = int(np.ceil(percentage_demands * 272)) * 6
 37 | 
 38 |     num_samples_top = [num_samples_top1, num_samples_top2, num_samples_top3]
 39 | 
 40 |     differentiation_str = "Enero_3top_" + str_perctg_demands + experiment_letter
 41 |     model_dir = "./models" + differentiation_str
 42 | 
 43 |     if not os.path.exists(model_dir):
 44 |         os.makedirs(model_dir)
 45 | 
 46 |     fileLogs = open("./Logs/exp" + differentiation_str + "Logs.txt", "w")
 47 | 
 48 |     ENV_NAME = 'GraphEnv-v16'
 49 | 
 50 |     training_tm_ids = set(range(100))
 51 | 
 52 |     hyper_parameter = {
 53 |         'feature_size': 20,
 54 |         't': 5,
 55 |         'readout_units': 20,
 56 |         'episode': 20,
 57 |         'lr': 0.0002,
 58 |         'lr_decay_rate': 0.96,
 59 |         'lr_decay_step': 60,
 60 |         'mini_batch': 55,
 61 |         'gae_gamma': 0.99,
 62 |         'gae_lambda': 0.95,
 63 |         'clip_value': 0.5,
 64 |         'entropy_beta': 0.01,
 65 |         'entropy_step': 60,
 66 |         'l2_regular': 0.0001,
 67 |         'buffer_size': num_samples_top1 + num_samples_top2 + num_samples_top3,
 68 |         'update_times': 8
 69 |     }
 70 | 
 71 |     dataset_root_folder = "../Enero_datasets/dataset_sing_top/data/results_my_3_tops_unif_05-1/"
 72 |     dataset_folder_name1 = "NEW_BtAsiaPac"
 73 |     dataset_folder_name2 = "NEW_Garr199905"
 74 |     dataset_folder_name3 = "NEW_Goodnet"
 75 | 
 76 |     dataset_folder_name1 = dataset_root_folder + dataset_folder_name1
 77 |     dataset_folder_name2 = dataset_root_folder + dataset_folder_name2
 78 |     dataset_folder_name3 = dataset_root_folder + dataset_folder_name3
 79 | 
 80 |     env_training1 = gym.make(ENV_NAME)
 81 |     env_training1.seed(SEED)
 82 |     env_training1.generate_environment(dataset_folder_name1 + "/TRAIN", "BtAsiaPac", 0, 100, percentage_demands)
 83 |     env_training1.top_K_critical_demands = take_critic_demands
 84 | 
 85 |     env_training2 = gym.make(ENV_NAME)
 86 |     env_training2.seed(SEED)
 87 |     env_training2.generate_environment(dataset_folder_name2 + "/TRAIN", "Garr199905", 0, 100, percentage_demands)
 88 |     env_training2.top_K_critical_demands = take_critic_demands
 89 | 
 90 |     env_training3 = gym.make(ENV_NAME)
 91 |     env_training3.seed(SEED)
 92 |     env_training3.generate_environment(dataset_folder_name3 + "/TRAIN", "Goodnet", 0, 100, percentage_demands)
 93 |     env_training3.top_K_critical_demands = take_critic_demands
 94 | 
 95 |     env_training = [env_training1, env_training2, env_training3]
 96 | 
 97 |     env_eval1 = gym.make(ENV_NAME)
 98 |     env_eval1.seed(SEED)
 99 |     env_eval1.generate_environment(dataset_folder_name1 + "/EVALUATE", "BtAsiaPac", 0, 100, percentage_demands)
100 |     env_eval1.top_K_critical_demands = take_critic_demands
101 | 
102 |     env_eval2 = gym.make(ENV_NAME)
103 |     env_eval2.seed(SEED)
104 |     env_eval2.generate_environment(dataset_folder_name2 + "/EVALUATE", "Garr199905", 0, 100, percentage_demands)
105 |     env_eval2.top_K_critical_demands = take_critic_demands
106 | 
107 |     env_eval3 = gym.make(ENV_NAME)
108 |     env_eval3.seed(SEED)
109 |     env_eval3.generate_environment(dataset_folder_name3 + "/EVALUATE", "Goodnet", 0, 100, percentage_demands)
110 |     env_eval3.top_K_critical_demands = take_critic_demands
111 | 
112 |     env_eval = [env_eval1, env_eval2, env_eval3]
113 | 
114 |     counter_store_model = 0
115 |     max_reward = -1000
116 |     AC_policy = PPOAC(hyper_parameter)
117 |     for iters in range(100):
118 | 
119 |         if iters * hyper_parameter['episode'] >= hyper_parameter['entropy_step']:
120 |             AC_policy.entropy_beta = hyper_parameter['entropy_beta'] / 10
121 |         for e in range(hyper_parameter['episode']):
122 | 
123 |             print(f"Episode {iters*hyper_parameter['episode']+e}")
124 | 
125 |             critic_features = []
126 |             tensors = []
127 |             actions = []
128 |             values = []
129 |             masks = []
130 |             rewards = []
131 |             actions_probs = []
132 | 
133 |             total_num_samples = 0
134 | 
135 |             timer_a = time.time()
136 |             AC_policy.actor.train()
137 |             AC_policy.critic.train()
138 | 
139 |             for topo in range(len(env_training)):
140 |                 print(f"topo {topo+1}")
141 |                 number_samples_reached = False
142 |                 total_num_samples += num_samples_top[topo]
143 |                 tm_id = random.sample(training_tm_ids, 1)[0]
144 |                 while not number_samples_reached:
145 |                     demand, src, dst = env_training[topo].reset(tm_id=tm_id)
146 |                     while True:
147 |                         action_dist, tensor = AC_policy.predict(env_training[topo], src, dst)
148 | 
149 |                         critic_feature = AC_policy.critic_get_graph_features(env_training[topo])
150 |                         value = AC_policy.critic(critic_feature)[0]
151 | 
152 |                         action = np.random.choice(len(action_dist), p=action_dist.cpu().detach().numpy())
153 |                         action_one_hot = torch.nn.functional.one_hot(torch.tensor(action), num_classes=len(action_dist))
154 |                         reward, done, _, demand, src, dst, _, _, _ = env_training[topo].step(action, demand, src, dst)
155 |                         mask = not done
156 | 
157 |                         tensors.append(tensor)
158 |                         critic_features.append(critic_feature)
159 |                         actions.append(action_one_hot)
160 |                         values.append(value.cpu().detach())
161 |                         masks.append(mask)
162 |                         rewards.append(reward)
163 |                         actions_probs.append(action_dist)
164 | 
165 |                         if len(tensors) == total_num_samples:
166 |                             number_samples_reached = True
167 |                             break
168 | 
169 |                         if done:
170 |                             break
171 | 
172 |             critic_feature = AC_policy.critic_get_graph_features(env_training[-1])
173 |             value = AC_policy.critic(critic_feature)[0]
174 |             values.append(value.cpu().detach())
175 |             timer_b = time.time()
176 |             print("collect_data", timer_b - timer_a, "sec")
177 | 
178 |             timer_a = time.time()
179 |             returns, advantages = AC_policy.compute_gae(values, masks, rewards)
180 |             actor_loss, critic_loss = AC_policy.update(actions, actions_probs, tensors, critic_features, returns,
181 |                                                        advantages)
182 |             if AC_policy.scheduler.get_last_lr()[0] > 0.0001:
183 |                 AC_policy.scheduler.step()
184 |             timer_b = time.time()
185 |             print("update", timer_b - timer_a, "sec")
186 | 
187 |             fileLogs.write("a," + str(actor_loss.cpu().detach().numpy()) + ",\n")
188 |             fileLogs.write("c," + str(critic_loss.cpu().detach().numpy()) + ",\n")
189 |             fileLogs.flush()
190 | 
191 |             rewards_test = np.zeros(EVALUATION_EPISODES * 3)
192 |             error_links = np.zeros(EVALUATION_EPISODES * 3)
193 |             max_link_utis = np.zeros(EVALUATION_EPISODES * 3)
194 |             min_link_utis = np.zeros(EVALUATION_EPISODES * 3)
195 |             uti_stds = np.zeros(EVALUATION_EPISODES * 3)
196 | 
197 |             AC_policy.actor.eval()
198 |             AC_policy.critic.eval()
199 | 
200 |             timer_a = time.time()
201 |             for topo in range(len(env_eval)):
202 |                 for tm_id in range(EVALUATION_EPISODES):
203 |                     demand, src, dst = env_eval[topo].reset(tm_id=tm_id)
204 |                     total_reward = 0
205 |                     posi = EVALUATION_EPISODES * topo + tm_id
206 |                     while True:
207 |                         action_dist, _ = AC_policy.predict(env_eval[topo], src, dst)
208 |                         action = torch.argmax(action_dist)
209 | 
210 |                         reward, done, error_eval_links, demand, src, dst, max_link_uti, min_link_uti, uti_std = \
211 |                             env_eval[topo].step(action, demand, src, dst)
212 | 
213 |                         total_reward += reward
214 |                         if done:
215 |                             break
216 |                     rewards_test[posi] = total_reward
217 |                     error_links[posi] = error_eval_links
218 |                     max_link_utis[posi] = max_link_uti[2]
219 |                     min_link_utis[posi] = min_link_uti
220 |                     uti_stds[posi] = uti_std
221 | 
222 |             timer_b = time.time()
223 |             print("eval", timer_b - timer_a, "sec")
224 |             eval_mean_reward = np.mean(rewards_test)
225 |             fileLogs.write(";," + str(np.mean(uti_stds)) + ",\n")
226 |             fileLogs.write("+," + str(np.mean(error_links)) + ",\n")
227 |             fileLogs.write("<," + str(np.amax(max_link_utis)) + ",\n")
228 |             fileLogs.write(">," + str(np.amax(min_link_utis)) + ",\n")
229 |             fileLogs.write("ENTR," + str(AC_policy.entropy_beta) + ",\n")
230 |             fileLogs.write("REW," + str(eval_mean_reward) + ",\n")
231 |             fileLogs.write("lr," + str(AC_policy.scheduler.get_last_lr()[0]) + ",\n")
232 | 
233 |             if eval_mean_reward > max_reward:
234 |                 max_reward = eval_mean_reward
235 |                 fileLogs.write("MAX REWD: " + str(max_reward) + " REWD_ID: " + str(counter_store_model) + ",\n")
236 |                 torch.save(AC_policy.actor.state_dict(), model_dir + '/' + f'actor_{counter_store_model}.pt')
237 |                 torch.save(AC_policy.critic.state_dict(), model_dir + '/' + f'critic_{counter_store_model}.pt')
238 |                 counter_store_model += 1
239 | 
240 |             fileLogs.flush()
241 | 
242 |             gc.collect()
243 |     fileLogs.close()
244 |     torch.save(AC_policy.actor.state_dict(), model_dir + '/' + f'actor_final.pt')
245 |     torch.save(AC_policy.critic.state_dict(), model_dir + '/' + f'critic_final.pt')
246 | 
247 | 
248 | 
249 | 
250 | 


--------------------------------------------------------------------------------
/modelsEnero_3top_15_B_NEW/actor_60.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/actor_60.pt


--------------------------------------------------------------------------------
/modelsEnero_3top_15_B_NEW/actor_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/actor_final.pt


--------------------------------------------------------------------------------
/modelsEnero_3top_15_B_NEW/critic_60.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/critic_60.pt


--------------------------------------------------------------------------------
/modelsEnero_3top_15_B_NEW/critic_final.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/modelsEnero_3top_15_B_NEW/critic_final.pt


--------------------------------------------------------------------------------
/parse_PPO.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | import os
  4 | import matplotlib.pyplot as plt
  5 | from operator import add, sub
  6 | from scipy.signal import savgol_filter
  7 | 
  8 | def smooth(scalars, weight):  # Weight between 0 and 1
  9 |     last = scalars[0]  # First value in the plot (first timestep)
 10 |     smoothed = list()
 11 |     for point in scalars:
 12 |         smoothed_val = last * weight + (1 - weight) * point  # Calculate smoothed value
 13 |         smoothed.append(smoothed_val)                        # Save it
 14 |         last = smoothed_val                                  # Anchor the last smoothed value
 15 | 
 16 |     return smoothed
 17 | 
 18 | def read_max_load_link(standard_out_file):
 19 |     pre_optim_max_load_link, post_optim_max_load_link = 0, 0
 20 |     with open(standard_out_file) as fd:
 21 |         while (True):
 22 |             line = fd.readline()
 23 |             if line.startswith("pre-optimization"):
 24 |                 camps = line.split(" ")
 25 |                 pre_optim_max_load_link = float(camps[-1].split('\n')[0])
 26 |             elif line.startswith("post-optimization"):
 27 |                 camps = line.split(" ")
 28 |                 post_optim_max_load_link = float(camps[-1].split('\n')[0])
 29 |                 break
 30 |         return (pre_optim_max_load_link, post_optim_max_load_link)
 31 | 
 32 | if __name__ == "__main__":
 33 |     # python parse_PPO.py -d ./Logs/expSP_3top_15_B_NEWLogs.txt
 34 |     parser = argparse.ArgumentParser(description='Parse file and create plots')
 35 | 
 36 |     parser.add_argument('-d', help='data file', type=str, required=True, nargs='+')
 37 |     args = parser.parse_args()
 38 | 
 39 |     aux = args.d[0].split(".")
 40 |     aux = aux[1].split("exp")
 41 |     differentiation_str = str(aux[1].split("Logs")[0])
 42 | 
 43 |     actor_loss = []
 44 |     critic_loss = []
 45 |     avg_std = []
 46 |     max_link_uti = []
 47 |     min_link_uti = []
 48 |     defo_max_uti = []
 49 |     error_links = []
 50 |     avg_rewards = []
 51 |     learning_rate = []
 52 |     cummulative_rewards = []
 53 | 
 54 |     if not os.path.exists("./Images"):
 55 |         os.makedirs("./Images")
 56 | 
 57 |     if not os.path.exists("./Images/TRAINING/"+differentiation_str):
 58 |         os.makedirs("./Images/TRAINING/"+differentiation_str)
 59 |     
 60 |     path_to_dir = "./Images/TRAINING/"+differentiation_str+"/"
 61 | 
 62 |     model_id = 0
 63 |     # Load best model
 64 |     with open(args.d[0]) as fp:
 65 |         for line in reversed(list(fp)):
 66 |             arrayLine = line.split(":")
 67 |             if arrayLine[0]=='MAX REWD':
 68 |                 model_id = int(arrayLine[2].split(",")[0])
 69 |                 break
 70 |     
 71 |     print("Model with maximum reward: ", model_id)
 72 | 
 73 |     with open(args.d[0]) as fp:
 74 |         for line in fp:
 75 |             arrayLine = line.split(",")
 76 |             if arrayLine[0]=="<":
 77 |                 max_link_uti.append(float(arrayLine[1]))
 78 |             elif arrayLine[0]==">":
 79 |                 min_link_uti.append(float(arrayLine[1]))
 80 |             elif arrayLine[0]=="a":
 81 |                 actor_loss.append(float(arrayLine[1]))
 82 |             elif arrayLine[0]=="lr":
 83 |                 learning_rate.append(float(arrayLine[1]))
 84 |             elif arrayLine[0]==";":
 85 |                 avg_std.append(float(arrayLine[1]))
 86 |             elif arrayLine[0]=="+":
 87 |                 error_links.append(float(arrayLine[1]))
 88 |             elif arrayLine[0]=="REW":
 89 |                 if float(arrayLine[1])<-3000:
 90 |                     avg_rewards.append(-3000)
 91 |                 else:
 92 |                     avg_rewards.append(float(arrayLine[1]))
 93 |             elif arrayLine[0]=="c":
 94 |                 critic_loss.append(float(arrayLine[1]))
 95 | 
 96 |         plt.plot(actor_loss)
 97 |         plt.xlabel("Training Episode")
 98 |         plt.ylabel("ACTOR Loss")
 99 |         plt.savefig(path_to_dir+"ACTORLoss" + differentiation_str)
100 |         plt.close()
101 | 
102 |         plt.plot(critic_loss)
103 |         plt.xlabel("Training Episode")
104 |         plt.ylabel("CRITIC Loss (MSE)")
105 |         plt.yscale("log")
106 |         plt.savefig(path_to_dir+"CRITICLoss" + differentiation_str)
107 |         plt.close()
108 | 
109 |         plt.plot(max_link_uti, label="DRL Max Link Uti")
110 |         plt.plot(defo_max_uti, label="DEFO Max Link Uti", c="tab:red")
111 |         
112 |         print("DRL MAX reward: ", np.amax(avg_rewards))
113 |         plt.xlabel("Episodes")
114 |         lgd = plt.legend(loc="lower left", bbox_to_anchor=(0.07, -0.22), ncol=2, fancybox=True, shadow=True)
115 |         plt.title("GNN+AC Testing score")
116 |         plt.ylabel("Maximum link utilization")
117 |         #plt.yscale('log')
118 |         plt.savefig(path_to_dir+"MaxLinkUti" + differentiation_str, bbox_extra_artists=(lgd,), bbox_inches='tight')
119 |         plt.close()
120 | 
121 |         plt.plot(min_link_uti)
122 |         plt.xlabel("Episodes")
123 |         plt.title("GNN+AC Testing score")
124 |         plt.ylabel("Minimum link utilization")
125 |         plt.savefig(path_to_dir+"MinLinkUti" + differentiation_str)
126 |         plt.close()
127 | 
128 |         plt.plot(avg_rewards)
129 |         plt.xlabel("Episodes")
130 |         plt.title("GNN+AC Testing score")
131 |         plt.ylabel("Average reward")
132 |         plt.savefig(path_to_dir+"AvgReward" + differentiation_str)
133 |         plt.close()
134 | 
135 |         plt.plot(learning_rate)
136 |         plt.xlabel("Episodes")
137 |         plt.title("GNN+AC Testing score")
138 |         plt.ylabel("Learning rate")
139 |         plt.savefig(path_to_dir+"Lr_" + differentiation_str)
140 |         plt.close()
141 | 
142 |         plt.plot(error_links)
143 |         plt.xlabel("Episodes")
144 |         plt.title("GNN+AC Testing score")
145 |         plt.ylabel("Error link (sum_total_TM/num_links")
146 |         plt.savefig(path_to_dir+"ErrorLinks" + differentiation_str)
147 |         plt.close()
148 | 
149 |         plt.plot(avg_std)
150 |         plt.xlabel("Episodes")
151 |         plt.title("GNN+AC Testing score")
152 |         plt.ylabel("Avg std of link utilization")
153 |         plt.savefig(path_to_dir+"AvgStdUti" + differentiation_str)
154 |         plt.close()
155 | 
156 | 


--------------------------------------------------------------------------------
/requitrment.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://download.pytorch.org/whl/cu116
 2 | torch==1.13.1
 3 | networkx==2.5
 4 | gym==0.17.3
 5 | pandas
 6 | matplotlib==3.4.1
 7 | seaborn
 8 | pickle5
 9 | numpy<1.24
10 | 


--------------------------------------------------------------------------------
/runs/1/events.out.tfevents.1650360660.barry.182599.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/1/events.out.tfevents.1650360660.barry.182599.0


--------------------------------------------------------------------------------
/runs/1/events.out.tfevents.1650363931.barry.184203.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/1/events.out.tfevents.1650363931.barry.184203.0


--------------------------------------------------------------------------------
/runs/2/events.out.tfevents.1650452288.barry.199430.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Barry0310/DRL-GNN-implement/9a3d19ee170825d127be97c63b168f2f6aca672d/runs/2/events.out.tfevents.1650452288.barry.199430.0


--------------------------------------------------------------------------------
/script_eval_on_single_topology.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import gym
  3 | import os
  4 | import json
  5 | import gym_graph
  6 | import random
  7 | import argparse
  8 | import time as tt
  9 | import torch
 10 | import pickle
 11 | import sys
 12 | from actor_critic import PPOAC
 13 | sys.setrecursionlimit(2000)
 14 | 
 15 | # This script is used to evaluate a DRL agent on a single instance of a topology and a TM 
 16 | # from the repetita dataset. The eval_on_single_topology.py script calls this script for each TM
 17 | 
 18 | # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 19 | 
 20 | ENV_MIDDROUT_AGENT_SP = 'GraphEnv-v16'
 21 | ENV_SIMM_ANEAL_AGENT = 'GraphEnv-v15'
 22 | ENV_SAP_AGENT = 'GraphEnv-v20'
 23 | SEED = 9
 24 | 
 25 | percentage_demands = 15 # Percentage of demands that will be used in the optimization
 26 | str_perctg_demands = str(percentage_demands)
 27 | percentage_demands /= 100
 28 | 
 29 | os.environ['PYTHONHASHSEED']=str(SEED)
 30 | np.random.seed(SEED)
 31 | torch.manual_seed(1)
 32 | 
 33 | # Indicates how many time-steps has an episode
 34 | EPISODE_LENGTH_MIDDROUT = 100
 35 | NUM_ACTIONS = 100 # Put a very large number if we want to take all actions possible for each topology
 36 | 
 37 | MAX_NUM_EDGES = 100
 38 | 
 39 | def play_middRout_games_sp(tm_id, env_middRout_sp, agent, timesteps):
 40 |     demand, source, destination = env_middRout_sp.reset(tm_id)
 41 |     rewardAddTest = 0
 42 | 
 43 |     initMaxUti = env_middRout_sp.edgeMaxUti[2]
 44 |     OSPF_init = initMaxUti
 45 |     best_routing = env_middRout_sp.sp_middlepoints_step.copy()
 46 | 
 47 |     list_of_demands_to_change = env_middRout_sp.list_eligible_demands
 48 |     timesteps.append((0, initMaxUti))
 49 | 
 50 |     start = tt.time()
 51 |     time_start_DRL = start
 52 |     while 1:
 53 |         action_dist, tensor = agent.predict(env_middRout_sp, source, destination)
 54 |         action = torch.argmax(action_dist)
 55 |         
 56 |         reward, done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_middRout_sp.step(action, demand, source, destination)
 57 |         rewardAddTest += reward
 58 |         if maxLinkUti[2]<initMaxUti:
 59 |             initMaxUti = maxLinkUti[2]
 60 |             best_routing = env_middRout_sp.sp_middlepoints_step.copy()
 61 |             timesteps.append((tt.time()-time_start_DRL, initMaxUti))
 62 |         if done:
 63 |             break
 64 |     end = tt.time()
 65 |     return initMaxUti, end-start, OSPF_init, best_routing, list_of_demands_to_change, time_start_DRL
 66 | 
 67 | class SIMULATED_ANNEALING_SP:
 68 |     def __init__(self, env):
 69 |         self.num_actions = env.K
 70 |     
 71 |     def next_state(self, env):
 72 |         source, destination = -1, -1
 73 |         while source==destination:
 74 |             source = np.random.randint(low=0, high=env.numNodes-1)
 75 |             destination = np.random.randint(low=0, high=env.numNodes-1)
 76 |         # We explore all the possible actions with all the possible src,dst pairs 
 77 |         action = np.random.randint(low=0, high=len(env.src_dst_k_middlepoints[str(source)+':'+str(destination)]))
 78 | 
 79 |         # We des-allocate the chosen path to try to allocate it in another place
 80 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
 81 |         originalMiddlepoint = -1
 82 |         if str(source)+':'+str(destination) in env.sp_middlepoints:
 83 |             originalMiddlepoint = env.sp_middlepoints[str(source)+':'+str(destination)]
 84 |             env.decrease_links_utilization_sp(source, originalMiddlepoint, source, destination)
 85 |             env.decrease_links_utilization_sp(originalMiddlepoint, destination, source, destination)
 86 |             del env.sp_middlepoints[str(source)+':'+str(destination)] 
 87 |         else: # Remove the bandwidth allocated from the src to the destination
 88 |             env.decrease_links_utilization_sp(source, destination, source, destination)
 89 | 
 90 |         # We get the K-middlepoints between source-destination
 91 |         middlePointList = list(env.src_dst_k_middlepoints[str(source) +':'+ str(destination)])
 92 |         middlePoint = middlePointList[action]
 93 | 
 94 |         # First we allocate until the middlepoint
 95 |         env.allocate_to_destination_sp(source, middlePoint, source, destination)
 96 |         # If we allocated to a middlepoint that is not the final destination
 97 |         if middlePoint!=destination:
 98 |             # Then we allocate from the middlepoint to the destination
 99 |             env.allocate_to_destination_sp(middlePoint, destination, source, destination)
100 |             # We store that the pair source,destination has a middlepoint
101 |             env.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
102 |         
103 |         # Compute new energy for the corresponding action
104 |         energy = -1000000
105 |         position = 0
106 |         for i in env.graph:
107 |             for j in env.graph[i]:
108 |                 link_capacity = env.links_bw[i][j]
109 |                 if env.edge_state[position][0]/link_capacity>energy:
110 |                     energy = env.edge_state[position][0]/link_capacity
111 |                 position = position + 1
112 |         
113 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
114 |         if str(source)+':'+str(destination) in env.sp_middlepoints:
115 |             middlepoint = env.sp_middlepoints[str(source)+':'+str(destination)]
116 |             env.decrease_links_utilization_sp(source, middlepoint, source, destination)
117 |             env.decrease_links_utilization_sp(middlepoint, destination, source, destination)
118 |             del env.sp_middlepoints[str(source)+':'+str(destination)] 
119 |         else: # Remove the bandwidth allocated from the src to the destination
120 |             env.decrease_links_utilization_sp(source, destination, source, destination)
121 |         
122 |         # Allocate back the demand whose actions we explored
123 |         # If the current demand had a middlepoint, we allocate src-middlepoint-dst
124 |         if originalMiddlepoint>=0:
125 |             # First we allocate until the middlepoint
126 |             env.allocate_to_destination_sp(source, originalMiddlepoint, source, destination)
127 |             # Then we allocate from the middlepoint to the destination
128 |             env.allocate_to_destination_sp(originalMiddlepoint, destination, source, destination)
129 |             # We store that the pair source,destination has a middlepoint
130 |             env.sp_middlepoints[str(source)+':'+str(destination)] = originalMiddlepoint
131 |         else:
132 |             # Then we allocate from the middlepoint to the destination
133 |             env.allocate_to_destination_sp(source, destination, source, destination)
134 | 
135 |         return energy, action, source, destination
136 |         
137 | 
138 | def play_sp_simulated_annealing_games(tm_id):
139 |     env_sim_anneal = gym.make(ENV_SIMM_ANEAL_AGENT)
140 |     env_sim_anneal.seed(SEED)
141 |     env_sim_anneal.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands)
142 | 
143 |     init_energy = env_sim_anneal.reset_sp(tm_id)
144 |     sim_agent = SIMULATED_ANNEALING_SP(env_sim_anneal)
145 | 
146 |     Tmax = 1
147 |     Tmin = 0.000001
148 |     cooling_ratio = 0.000001 # best value is 0.0001 but very slow
149 |     T = Tmax
150 |     L = 4 # Number of trials per temperature value. With L=3 I get even better results
151 |     energy = init_energy
152 |     itera = 0
153 | 
154 |     start = tt.time()
155 |     while T>Tmin:
156 |         for _ in range(L):
157 |             next_energy, action, source, destination = sim_agent.next_state(env_sim_anneal)
158 |             delta_energy = (energy-next_energy)
159 |             itera += 1
160 |             # If we decreased the maximum link utilization we take the action
161 |             if delta_energy>0:
162 |                 # We des-allocate the chosen path to apply later the chosen action
163 |                 # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
164 |                 if str(source)+':'+str(destination) in env_sim_anneal.sp_middlepoints:
165 |                     middlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)]
166 |                     originalMiddlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)]
167 |                     env_sim_anneal.decrease_links_utilization_sp(source, middlepoint, source, destination)
168 |                     env_sim_anneal.decrease_links_utilization_sp(middlepoint, destination, source, destination)
169 |                     del env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 
170 |                 else: # Remove the bandwidth allocated from the src to the destination
171 |                     env_sim_anneal.decrease_links_utilization_sp(source, destination, source, destination)
172 |                 energy = env_sim_anneal.step_sp(action, source, destination)
173 |             # If not, accept the action with some probability
174 |             elif np.exp(delta_energy/T)>random.uniform(0, 1):
175 |                 # We des-allocate the chosen path to apply later the chosen action
176 |                 # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
177 |                 if str(source)+':'+str(destination) in env_sim_anneal.sp_middlepoints:
178 |                     middlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)]
179 |                     originalMiddlepoint = env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)]
180 |                     env_sim_anneal.decrease_links_utilization_sp(source, middlepoint, source, destination)
181 |                     env_sim_anneal.decrease_links_utilization_sp(middlepoint, destination, source, destination)
182 |                     del env_sim_anneal.sp_middlepoints[str(source)+':'+str(destination)] 
183 |                 else: # Remove the bandwidth allocated from the src to the destination
184 |                     env_sim_anneal.decrease_links_utilization_sp(source, destination, source, destination)
185 |                 energy = env_sim_anneal.step_sp(action, source, destination)
186 |         T -= cooling_ratio
187 |     end = tt.time()
188 |     return energy, end-start
189 | 
190 | class HILL_CLIMBING:
191 |     def __init__(self, env):
192 |         self.num_actions = env.K 
193 | 
194 |     def get_value_sp(self, env, source, destination, action):
195 |         # We get the K-middlepoints between source-destination
196 |         middlePointList = list(env.src_dst_k_middlepoints[str(source) +':'+ str(destination)])
197 |         middlePoint = middlePointList[action]
198 | 
199 |         # First we allocate until the middlepoint
200 |         env.allocate_to_destination_sp(source, middlePoint, source, destination)
201 |         # If we allocated to a middlepoint that is not the final destination
202 |         if middlePoint!=destination:
203 |             # Then we allocate from the middlepoint to the destination
204 |             env.allocate_to_destination_sp(middlePoint, destination, source, destination)
205 |             # We store that the pair source,destination has a middlepoint
206 |             env.sp_middlepoints[str(source)+':'+str(destination)] = middlePoint
207 |         
208 |         currentValue = -1000000
209 |         position = 0
210 |         # Get the maximum loaded link and it's value after allocating to the corresponding middlepoint
211 |         for i in env.graph:
212 |             for j in env.graph[i]:
213 |                 link_capacity = env.links_bw[i][j]
214 |                 if env.edge_state[position][0]/link_capacity>currentValue:
215 |                     currentValue = env.edge_state[position][0]/link_capacity
216 |                 position = position + 1
217 |         
218 |         # Dissolve allocation step so that later we can try another action
219 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
220 |         if str(source)+':'+str(destination) in env.sp_middlepoints:
221 |             middlepoint = env.sp_middlepoints[str(source)+':'+str(destination)]
222 |             env.decrease_links_utilization_sp(source, middlepoint, source, destination)
223 |             env.decrease_links_utilization_sp(middlepoint, destination, source, destination)
224 |             del env.sp_middlepoints[str(source)+':'+str(destination)] 
225 |         else: # Remove the bandwidth allocated from the src to the destination
226 |             env.decrease_links_utilization_sp(source, destination, source, destination)
227 |         
228 |         return -currentValue
229 |     
230 |     def explore_neighbourhood_sp(self, env):
231 |         dem_iter = 0
232 |         nextVal = -1000000
233 |         next_state = None
234 | 
235 |         # Iterate for each demand possible
236 |         for source in range(env.numNodes):
237 |             for dest in range(env.numNodes):
238 |                 if source!=dest:
239 |                     for action in range(len(env.src_dst_k_middlepoints[str(source)+':'+str(dest)])):
240 |                         middlepoint = -1
241 |                         # First we need to desallocate the current demand before we explore all it's possible actions
242 |                         # Check if there is a middlepoint to desallocate from src-middlepoint-dst
243 |                         if str(source)+':'+str(dest) in env.sp_middlepoints:
244 |                             middlepoint = env.sp_middlepoints[str(source)+':'+str(dest)] 
245 |                             env.decrease_links_utilization_sp(source, middlepoint, source, dest)
246 |                             env.decrease_links_utilization_sp(middlepoint, dest, source, dest)
247 |                             del env.sp_middlepoints[str(source)+':'+str(dest)] 
248 |                         # Else, there is no middlepoint and we desallocate the entire src,dst
249 |                         else: 
250 |                             # Remove the bandwidth allocated from the src to the destination
251 |                             env.decrease_links_utilization_sp(source, dest, source, dest)
252 | 
253 |                         evalState = self.get_value_sp(env, source, dest, action)
254 |                         if evalState > nextVal:
255 |                             nextVal = evalState
256 |                             next_state = (action, source, dest)
257 |                         
258 |                         # Allocate back the demand whose actions we explored
259 |                         # If the current demand had a middlepoint, we allocate src-middlepoint-dst
260 |                         if middlepoint>=0:
261 |                             # First we allocate until the middlepoint
262 |                             env.allocate_to_destination_sp(source, middlepoint, source, dest)
263 |                             # Then we allocate from the middlepoint to the destination
264 |                             env.allocate_to_destination_sp(middlepoint, dest, source, dest)
265 |                             # We store that the pair source,destination has a middlepoint
266 |                             env.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint
267 |                         else:
268 |                             # Then we allocate from the middlepoint to the destination
269 |                             env.allocate_to_destination_sp(source, dest, source, dest)
270 |         return nextVal, next_state
271 | 
272 |     def explore_neighbourhood_DRL_sp(self, env):
273 |         dem_iter = 0
274 |         nextVal = -1000000
275 |         next_state = None
276 | 
277 |         # We iterate over the top critical demands
278 |         for elem in env.list_eligible_demands:
279 |             source = elem[0]
280 |             dest = elem[1]
281 |             for action in range(len(env.src_dst_k_middlepoints[str(source)+':'+str(dest)])):
282 |                 middlepoint = -1
283 |                 # First we need to desallocate the current demand before we explore all it's possible actions
284 |                 # Check if there is a middlepoint to desallocate from src-middlepoint-dst
285 |                 if str(source)+':'+str(dest) in env.sp_middlepoints:
286 |                     middlepoint = env.sp_middlepoints[str(source)+':'+str(dest)] 
287 |                     env.decrease_links_utilization_sp(source, middlepoint, source, dest)
288 |                     env.decrease_links_utilization_sp(middlepoint, dest, source, dest)
289 |                     del env.sp_middlepoints[str(source)+':'+str(dest)] 
290 |                 # Else, there is no middlepoint and we desallocate the entire src,dst
291 |                 else: 
292 |                     # Remove the bandwidth allocated from the src to the destination
293 |                     env.decrease_links_utilization_sp(source, dest, source, dest)
294 | 
295 |                 evalState = self.get_value_sp(env, source, dest, action)
296 |                 if evalState > nextVal:
297 |                     nextVal = evalState
298 |                     next_state = (action, source, dest)
299 |                 
300 |                 # Allocate back the demand whose actions we explored
301 |                 # If the current demand had a middlepoint, we allocate src-middlepoint-dst
302 |                 if middlepoint>=0:
303 |                     # First we allocate until the middlepoint
304 |                     env.allocate_to_destination_sp(source, middlepoint, source, dest)
305 |                     # Then we allocate from the middlepoint to the destination
306 |                     env.allocate_to_destination_sp(middlepoint, dest, source, dest)
307 |                     # We store that the pair source,destination has a middlepoint
308 |                     env.sp_middlepoints[str(source)+':'+str(dest)] = middlepoint
309 |                 else:
310 |                     # Then we allocate from the middlepoint to the destination
311 |                     env.allocate_to_destination_sp(source, dest, source, dest)
312 |         return nextVal, next_state
313 | 
314 | def play_sp_hill_climbing_games(tm_id):
315 |     # Here we use sp in hill climbing to select the middlepoint and to evaluate
316 |     env_hill_climb = gym.make(ENV_SIMM_ANEAL_AGENT)
317 |     env_hill_climb.seed(SEED)
318 |     env_hill_climb.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands)
319 | 
320 |     currentVal = env_hill_climb.reset_hill_sp(tm_id)
321 |     hill_climb_agent = HILL_CLIMBING(env_hill_climb)
322 |     start = tt.time()
323 |     while 1:
324 |         nextVal, next_state = hill_climb_agent.explore_neighbourhood_sp(env_hill_climb)
325 |         # If the difference between the two edges is super small but non-zero, we break (this is because of precision reasons)
326 |         if nextVal<=currentVal or (abs((-1)*nextVal-(-1)*currentVal)<1e-4):
327 |             break
328 |         
329 |         # Before we apply the new action, we need to remove the current allocation of the chosen demand
330 |         action = next_state[0]
331 |         source = next_state[1]
332 |         dest = next_state[2]
333 |        
334 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
335 |         if str(source)+':'+str(dest) in env_hill_climb.sp_middlepoints:
336 |             middlepoint = env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)]
337 |             env_hill_climb.decrease_links_utilization_sp(source, middlepoint, source, dest)
338 |             env_hill_climb.decrease_links_utilization_sp(middlepoint, dest, source, dest)
339 |             del env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 
340 |         # If there is no middlepoint assigned to the src,dst pair
341 |         else:
342 |             # Remove the bandwidth allocated from the src to the destination using sp
343 |             env_hill_climb.decrease_links_utilization_sp(source, dest, source, dest)
344 |         
345 |         # We apply the new chosen action to the selected demand
346 |         currentVal = env_hill_climb.step_hill_sp(action, source, dest)
347 |     end = tt.time()
348 |     return currentVal*(-1), end-start
349 | 
350 | def play_DRL_GNN_sp_hill_climbing_games(tm_id, best_routing, list_of_demands_to_change, timesteps, time_start_DRL):
351 |     # Here we use sp in hill climbing to select the middlepoint and to evaluate
352 |     env_hill_climb = gym.make(ENV_SIMM_ANEAL_AGENT)
353 |     env_hill_climb.seed(SEED)
354 |     env_hill_climb.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands)
355 | 
356 |     currentVal = env_hill_climb.reset_DRL_hill_sp(tm_id, best_routing, list_of_demands_to_change)
357 |     hill_climb_agent = HILL_CLIMBING(env_hill_climb)
358 |     start = tt.time()
359 |     while 1:
360 |         nextVal, next_state = hill_climb_agent.explore_neighbourhood_DRL_sp(env_hill_climb)
361 |         # If the difference between the two edges is super small but non-zero, we break (this is because of precision reasons)
362 |         if nextVal<=currentVal or (abs((-1)*nextVal-(-1)*currentVal)<1e-4):
363 |             break
364 |         
365 |         # Before we apply the new action, we need to remove the current allocation of the chosen demand
366 |         action = next_state[0]
367 |         source = next_state[1]
368 |         dest = next_state[2]
369 |        
370 |         # Remove bandwidth allocated until the middlepoint and then from the middlepoint on
371 |         if str(source)+':'+str(dest) in env_hill_climb.sp_middlepoints:
372 |             middlepoint = env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)]
373 |             env_hill_climb.decrease_links_utilization_sp(source, middlepoint, source, dest)
374 |             env_hill_climb.decrease_links_utilization_sp(middlepoint, dest, source, dest)
375 |             del env_hill_climb.sp_middlepoints[str(source)+':'+str(dest)] 
376 |         # If there is no middlepoint assigned to the src,dst pair
377 |         else:
378 |             # Remove the bandwidth allocated from the src to the destination using sp
379 |             env_hill_climb.decrease_links_utilization_sp(source, dest, source, dest)
380 |         
381 |         # We apply the new chosen action to the selected demand
382 |         currentVal = env_hill_climb.step_hill_sp(action, source, dest)
383 |         timer = tt.time()
384 |         timesteps.append((timer-time_start_DRL, currentVal*(-1)))
385 |     end = tt.time()
386 |     return currentVal*(-1), end-start
387 | 
388 | class SAPAgent:
389 |     def __init__(self, env):
390 |         self.K = env.K
391 | 
392 |     def act(self, env, demand, n1, n2):
393 |         pathList = env.allPaths[str(n1) +':'+ str(n2)]
394 |         path = 0
395 |         allocated = 0 # Indicates 1 if we allocated the demand, 0 otherwise
396 |         while allocated==0 and path < len(pathList) and path<self.K:
397 |             currentPath = pathList[path]
398 |             can_allocate = 1 # Indicates 1 if we can allocate the demand, 0 otherwise
399 |             i = 0
400 |             j = 1
401 | 
402 |             # 1. Iterate over pairs of nodes and check if we can allocate the demand
403 |             while j < len(currentPath):
404 |                 link_capacity = env.links_bw[currentPath[i]][currentPath[j]]
405 |                 if (env.edge_state[env.edgesDict[str(currentPath[i]) + ':' + str(currentPath[j])]][0] + demand)/link_capacity > 1:
406 |                     can_allocate = 0
407 |                     break
408 |                 i = i + 1
409 |                 j = j + 1
410 | 
411 |             if can_allocate==1:
412 |                 return path
413 |             path = path + 1
414 | 
415 |         return -1
416 | 
417 | def play_sap_games(tm_id):
418 |     env_sap = gym.make(ENV_SAP_AGENT)
419 |     env_sap.seed(SEED)
420 |     env_sap.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS)
421 | 
422 |     demand, source, destination = env_sap.reset(tm_id)
423 |     sap_Agent = SAPAgent(env_sap)
424 | 
425 |     rewardAddTest = 0
426 |     start = tt.time()
427 |     while 1:
428 |         action = sap_Agent.act(env_sap, demand, source, destination)
429 | 
430 |         done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_sap.step(action, demand, source, destination)
431 |         if done:
432 |             break
433 |     end = tt.time()
434 |     return maxLinkUti[2], end-start
435 | 
436 | def play_middRout_games(tm_id, env_middRout, agent):
437 |     demand, source, destination = env_middRout.reset(tm_id)
438 |     rewardAddTest = 0
439 |     while 1:
440 |         # Change to agent.pred_action_node_distrib_sp to choose the middlepoint using only the SPs
441 |         action_dist, tensor = agent.pred_action_node_distrib_sp(env_middRout, source, destination)
442 |         action = np.argmax(action_dist)
443 |         
444 |         reward, done, error_eval_links, demand, source, destination, maxLinkUti, minLinkUti, utiStd = env_middRout.step(action, demand, source, destination)
445 |         rewardAddTest += reward
446 |         if done:
447 |             break
448 |     return rewardAddTest, maxLinkUti[2], minLinkUti, utiStd
449 | 
450 | 
451 | if __name__ == "__main__":
452 | 
453 |     hyper_parameter = {
454 |         'feature_size': 20,
455 |         't': 5,
456 |         'readout_units': 20,
457 |         'episode': 20,
458 |         'lr': 0.0002,
459 |         'lr_decay_rate': 0.96,
460 |         'lr_decay_step': 60,
461 |         'mini_batch': 55,
462 |         'gae_gamma': 0.99,
463 |         'gae_lambda': 0.95,
464 |         'clip_value': 0.5,
465 |         'entropy_beta': 0.01,
466 |         'entropy_step': 60,
467 |         'l2_regular': 0.0001,
468 |         'buffer_size': 0,
469 |         'update_times': 8
470 |     }
471 | 
472 |     # Parse logs and get best model
473 |     parser = argparse.ArgumentParser(description='Parse file and create plots')
474 | 
475 |     parser.add_argument('-t', help='DEFO demands TM file id', type=str, required=True, nargs='+')
476 |     parser.add_argument('-g', help='graph topology name', type=str, required=True, nargs='+')
477 |     parser.add_argument('-m', help='model id whose weights to load', type=str, required=True, nargs='+')
478 |     parser.add_argument('-o', help='Where to store the pckl file', type=str, required=True, nargs='+')
479 |     parser.add_argument('-d', help='differentiation string', type=str, required=True, nargs='+')
480 |     parser.add_argument('-f', help='general dataset folder name', type=str, required=True, nargs='+')
481 |     parser.add_argument('-f2', help='specific dataset folder name', type=str, required=True, nargs='+')
482 |     args = parser.parse_args()
483 | 
484 |     drl_eval_res_folder = args.o[0]
485 |     tm_id = int(args.t[0])
486 |     model_id = args.m[0]
487 |     differentiation_str = args.d[0]
488 |     graph_topology_name = args.g[0]
489 |     general_dataset_folder = args.f[0]
490 |     specific_dataset_folder = args.f2[0]
491 | 
492 |     timesteps = list()
493 |     results = np.zeros(17)
494 | 
495 |     ########### The following lines of code is to evaluate a DRL SP-based agent
496 |     env_DRL_SP = gym.make(ENV_MIDDROUT_AGENT_SP)
497 |     env_DRL_SP.seed(SEED)
498 |     env_DRL_SP.generate_environment(general_dataset_folder, graph_topology_name, EPISODE_LENGTH_MIDDROUT, NUM_ACTIONS, percentage_demands)
499 |     # Set to True f we want to take the top X% of the 5 most loaded links
500 |     env_DRL_SP.top_K_critical_demands = True
501 | 
502 |     DRL_SP_Agent = PPOAC(hyper_parameter)
503 |     model_dir = "./models" + differentiation_str
504 |     DRL_SP_Agent.actor.load_state_dict(torch.load(model_dir + f"/actor_{model_id}.pt"))
505 |     DRL_SP_Agent.actor.eval()
506 |     # Restore variables on creation if a checkpoint exists.
507 |     print("Restored DRL_SP model ", f"/actor_{model_id}.pt")
508 | 
509 |     ################################################
510 | 
511 |     # We can also use simulated annealing but it is going to take a while
512 |     max_link_uti_sim_annealing, optim_cost_SA = 1,1 #play_sp_simulated_annealing_games(tm_id)
513 |     
514 |     max_link_uti_sp_hill_climb, optim_cost_HILL = play_sp_hill_climbing_games(tm_id)
515 |     
516 |     max_link_uti_SAP, optim_cost_SAP = 1, 1 #play_sap_games(tm_id)
517 |     
518 |     max_link_uti_DRL_SP, optim_cost_DRL_GNN, OSPF_init, best_routing, list_of_demands_to_change, time_start_DRL = play_middRout_games_sp(tm_id, env_DRL_SP, DRL_SP_Agent, timesteps)
519 |     
520 |     max_link_uti_DRL_SP_HILL, optim_cost_DRL_HILL = play_DRL_GNN_sp_hill_climbing_games(tm_id, best_routing, list_of_demands_to_change, timesteps, time_start_DRL)
521 | 
522 |     new_timesteps = list()
523 |     for elem in timesteps:
524 |         new_timesteps.append((elem[0], elem[1], time_start_DRL, max_link_uti_DRL_SP))
525 | 
526 |     print("MAX UTI abans i despres d'optimitzar: ", OSPF_init, max_link_uti_DRL_SP_HILL, tm_id)
527 | 
528 |     results[3] = max_link_uti_DRL_SP_HILL 
529 |     results[4] = max_link_uti_sim_annealing
530 |     results[6] = len(env_DRL_SP.defoDatasetAPI.Gbase.edges()) # We store the number of edges to order the figures
531 |     results[7] = max_link_uti_sp_hill_climb
532 |     results[8] = max_link_uti_SAP
533 |     results[9] = max_link_uti_DRL_SP
534 |     results[11] = OSPF_init
535 |     results[12] = optim_cost_SA
536 |     results[13] = optim_cost_SAP
537 |     results[14] = optim_cost_DRL_GNN
538 |     results[15] = optim_cost_HILL
539 |     results[16] = optim_cost_DRL_GNN+optim_cost_DRL_HILL
540 | 
541 |     path_to_pckl_rewards = drl_eval_res_folder + differentiation_str+ '/'+ graph_topology_name + '/'
542 |     if not os.path.exists(path_to_pckl_rewards):
543 |         os.makedirs(path_to_pckl_rewards)
544 | 
545 |     with open(path_to_pckl_rewards + graph_topology_name +'.' + str(tm_id) + ".pckl", 'wb') as f:
546 |         pickle.dump(results, f, pickle.HIGHEST_PROTOCOL)
547 |     
548 |     with open(path_to_pckl_rewards + graph_topology_name +'.' + str(tm_id) + ".timesteps", 'w') as fp:
549 |         json.dump(new_timesteps, fp)


--------------------------------------------------------------------------------