├── README.md
├── ql.py
└── routing.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Routing Elephants Reinforcement Learning
 2 | 
 3 | Internet has become a necessity for all people and every day the traffic grows even more. Intelligently routing traffic is important to avoid network congestion and maintain high quality standards (speed and bandwidth), which is why I developed this routing algorithm based on Reinforcement Learning that seeks to maintain a decongested network.
 4 | 
 5 | The algorithm is in charge of finding the shortest route for mouse flows (green) and the most uncongested route for elephant flows (reds), the value of the throuput of each link between network switches is known and also if the flow (set of network packets with the same IP and protocol) is an elephant or a mouse.
 6 | 
 7 | ![routing](https://media.giphy.com/media/Ula6PiO5S7jzlNvL2l/giphy.gif)
 8 | 
 9 | ## Reward Per Episodie
10 | 
11 | As the Router gain experience with the number of episodes, accuracy and rewards begin to rise. It is possible to achieve an accuracy of 90% given the simplicity of the assembly, for a future project it is expected to take into account many more variables, perhaps a DNQ agent and a much larger network topology.
12 | 
13 | ![Figure 2022-03-15 015334](https://user-images.githubusercontent.com/60159274/158323359-e3d7f5fa-42e8-4d07-90bf-0a29d8112843.png)
14 | 
15 | ## Usage
16 | 
17 | Install the prerequisites and run `python routing.py` 
18 | 


--------------------------------------------------------------------------------
/ql.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import json, ast
 3 | import random
 4 | 
 5 | def init_q(s, a, type="zeros"):
 6 |     """
 7 |     @param s the number of states
 8 |     @param a the number of actions
 9 |     @param type random, ones or zeros for the initialization
10 |     """
11 |     #print(s, a)
12 |     if type == "ones":
13 |         return np.ones((s, a))
14 |     elif type == "random":
15 |         return np.random.random((s, a))
16 |     elif type == "zeros":
17 |         return np.zeros((s, a))
18 |     elif type == "inf":
19 |         return np.inf*np.ones((s, a))
20 | 
21 | def epsilon_greedy(Q, epsilon, n_actions, s, train=False):
22 |     """
23 |     @param Q Q values state x action -> value
24 |     @param epsilon for exploration
25 |     @param s number of states
26 |     @param train if true then no random actions selected
27 |     """
28 |     if train or np.random.rand() < epsilon: 
29 |         action = np.argmax(Q[s, :])
30 |         #print("escoge")
31 |     else:
32 |         action = np.random.randint(0, n_actions)
33 |         #print("entrena")
34 |     return action
35 | 
36 | 
37 | class QL_agent:
38 |     def __init__(self, alpha, gamma, epsilon,n_states, n_actions):
39 |         
40 |         self.alpha = alpha
41 |         self.gamma = gamma
42 |         self.epsilon = epsilon
43 |         self.n_actions = n_actions
44 |         self.n_states = n_states
45 |         self.Q = init_q(80, 10, type="random")
46 | 
47 |     def take_action(self,s,first_state):
48 |         if first_state:
49 |             action = epsilon_greedy(self.Q,self.epsilon,self.n_actions,s,False)
50 |         else:
51 |             s_=s
52 |             action = np.argmax(self.Q[s_, :])
53 |         return action
54 | 
55 |     def updateQ(self,reward,s,a,a_,s_,end_sate):
56 |         #print(s, a)
57 |         Q=self.Q
58 |         alpha = self.alpha
59 |         gamma = self.gamma
60 |         if end_sate:
61 |             # print("*** Terminal state")
62 |             Q[s, a] += alpha * (reward - Q[s, a])
63 | 
64 |         else:
65 |             # print("*** step")
66 |             # Q[s, a] += alpha * (reward + (gamma * np.argmax(self.Q[s_, :])) - Q[s, a])
67 |             Q[s, a] += alpha * (reward + (gamma * Q[s_, a_]) - Q[s, a])


--------------------------------------------------------------------------------
/routing.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | import itertools
  4 | import matplotlib.pyplot as plt
  5 | import gym
  6 | import ql
  7 | import time
  8 | 
  9 | G1=nx.DiGraph()
 10 | list_nodes = [1,2,3,4,5,6,7,8,9,10] #### pocisiones [0,1,2,3,4]
 11 | acciones = list_nodes
 12 | Actionsx1= [2,3]             #### acciones posibles para cada nodo
 13 | Actionsx2= [2,5]
 14 | Actionsx3= [0,1,3,4,5]
 15 | Actionsx4= [0,2,4,6,7]
 16 | Actionsx5= [2,3,5,7,8]
 17 | Actionsx6= [1,2,4,8,9]
 18 | Actionsx7= [3,7]
 19 | Actionsx8= [3,4,6,8]
 20 | Actionsx9= [4,5,7,9]
 21 | Actionsx10= [5,8]
 22 | 
 23 | G1.add_nodes_from(list_nodes)
 24 | G1.nodes()
 25 | 
 26 | #weights = [50,90,50,90,50,50,50,50,50,50,50,50,50,50,50,50,50,50]
 27 | weights = [50,90,50,90,50,90,50,50,50,90,50,50,50,50,90,50,50,50]
 28 | list_arcs1 = [(1,3,weights[0]), (3,1,weights[0]), (1,4,weights[1]) , (4,1,weights[1]) , (2,3,weights[2]), (3,2,weights[2]) , (2,6,weights[3]) , (6,2,weights[3]) , (3,4,weights[4]) , (4,3,weights[4]) , (3,5,weights[5]) , (5,3,weights[5]) ,  (3,6,weights[6]) , (6,3,weights[6]) ,(4,5,weights[7]), (5,4,weights[7]), (5,6,weights[8]), (6,5,weights[8]), (4,7,weights[9]), (7,4,weights[9]), (4,8,weights[10]), (8,4,weights[10]), (5,8,weights[11]), (8,5,weights[11]), (5,9,weights[12]), (9,5,weights[12]), (6,9,weights[13]), (9,6,weights[13]), (6,10,weights[14]), (10,6,weights[14]), (7,8,weights[15]), (8,7,weights[15]), (8,9,weights[16]), (9,8,weights[16]), (9,10,weights[17]), (10,9,weights[17])]
 29 | G1.add_weighted_edges_from(list_arcs1)
 30 | G1.edges()
 31 | 
 32 | G1.nodes[1]['pos'] = (0,-2)
 33 | G1.nodes[2]['pos'] = (0,2)
 34 | G1.nodes[3]['pos'] = (2.5,0)
 35 | G1.nodes[4]['pos'] = (5,-5)
 36 | G1.nodes[5]['pos'] = (6,0)
 37 | G1.nodes[6]['pos'] = (5,5)
 38 | G1.nodes[7]['pos'] = (10,-9)
 39 | G1.nodes[8]['pos'] = (10,-3)
 40 | G1.nodes[9]['pos'] = (10,3)
 41 | G1.nodes[10]['pos'] = (10,9)
 42 | 
 43 | node_pos=nx.get_node_attributes(G1,'pos')
 44 | nx.draw_networkx(G1, node_pos,node_size=450)
 45 | arc_weight=nx.get_edge_attributes(G1,'weight')
 46 | nx.draw_networkx_edge_labels(G1, node_pos, edge_labels=arc_weight)
 47 | 
 48 | 
 49 | l=[[1,2,3,4,5,6,7,8,9,10], [7,8,9,10], ['E','R']]
 50 | s = list(itertools.product(*l))
 51 | bandera=s
 52 | print(bandera)
 53 | 
 54 | def pesoEnlace(est, a):
 55 |     origen = bandera[est][0] 
 56 |     destino = a + 1
 57 |     for x in range(0,len(list_arcs1)):
 58 |         if (list_arcs1[x][0] == origen and list_arcs1[x][1] == destino):
 59 |             peso = list_arcs1[x][2]
 60 |     return peso 
 61 | 
 62 | def randomWeight():
 63 |     pesos = np.random.randint(20, 70, size=16)
 64 |     return pesos
 65 | 
 66 | def reset():
 67 |     aleatorio = np.random.randint(0, 80, size=1)
 68 |     return aleatorio[0]
 69 | def resetTest():
 70 |     aleatorio = np.random.randint(0, 16, size=1)
 71 |     return aleatorio[0]
 72 | def render(col,cond):
 73 |     map = []
 74 |     for node in G1:
 75 |         if node in col and cond == 'R':
 76 |             map.append('green')
 77 |         elif node in col and cond == 'E':
 78 |             map.append('red')
 79 |         else:
 80 |             map.append('gray')
 81 |     #nx.draw(G1, node_color=map, with_labels=True)
 82 |     nx.draw_networkx(G1, node_pos,node_size=450,node_color=map)
 83 |     nx.draw_networkx_edge_labels(G1, node_pos, edge_labels=arc_weight)
 84 |     plt.show()        
 85 |     
 86 |   
 87 | def ActionsXorigen(a1 ,a2 ,a3 ,a4 ,a5, a6, a7, a8, a9, a10 ,origen):  
 88 |     if (origen==1):
 89 |         return a1
 90 |     elif (origen==2):
 91 |         return a2
 92 |     elif (origen==3):
 93 |         return a3
 94 |     elif (origen==4):
 95 |         return a4
 96 |     elif (origen==5):
 97 |         return a5
 98 |     elif (origen==6):
 99 |         return a6
100 |     elif (origen==7):
101 |         return a7
102 |     elif (origen==8):
103 |         return a8
104 |     elif (origen==9):
105 |         return a9
106 |     else:
107 |         return a10
108 |      
109 | def step(s, a, posiblesAcciones, G1, saltos, _s):
110 |     info={}
111 |     imposibles = 0
112 |     for x in range(0,len(posiblesAcciones)):
113 |         if (a == posiblesAcciones[x]):
114 |             imposibles = 1        
115 |     if(imposibles == 0):                        # el destino no s vecino o se queda quieto
116 |         reward = -70
117 |         s_ = s
118 |         done = False
119 |     else:
120 |         if (bandera[s][0] == bandera [s][1]):
121 |             reward = 100
122 |             s_ = s
123 |             done = True
124 |     
125 |         else:
126 |             done = False
127 |             suma = a + 1
128 |             for x in range(0,len(bandera)):
129 |                 if (suma == bandera[x][0] and bandera[s][1] == bandera[x][1] and bandera[s][2] == bandera[x][2]):
130 |                     s_ = x
131 |                     break
132 |             #print(bandera[s][0], suma)
133 |             #print (p)
134 |             #if (bandera[s][2]=="E"):   
135 |             if (s_ == _s):
136 |                 reward = -130
137 |             else:
138 |                 reward = -10* saltos
139 |                 if (bandera[s][0] == 1 and bandera[s][1] == 10 and a == 2  or bandera[s][0] == 2 and bandera[s][1] == 7 and a == 2):
140 |                     reward = reward + 3
141 |                 if (bandera[s][2] == 'E'):
142 |                     if (pesoEnlace(s, a) > 79):
143 |                         reward = reward - 130
144 |                         
145 |                                     
146 |     #print (bandera[s],a,posiblesAcciones,s_,reward)
147 |     _s = s
148 |     return _s,s_,reward,done,info
149 | 
150 | 
151 | if __name__ =="__main__":
152 |     t = time.time()
153 |     alpha = 0.4
154 |     gamma = 0.999
155 |     epsilon = 0.976
156 |     episodes = 400000
157 |     max_steps = 2500
158 |     n_tests = 16
159 |     n_states, n_actions = 80, 10
160 |     agente = ql.QL_agent(alpha, gamma, epsilon, n_states,n_actions) #(alpha, gamma, epsilon, episodes, n_states, n_actions)
161 |     
162 |     episode_rewards = []
163 |     
164 |     for episode in range(episodes):
165 |         print("Episode: {0}".format(episode))
166 |         s = reset() 
167 |         _s = s
168 |         episode_reward = 0
169 |         steps = 0
170 |         done = False
171 |         while steps < max_steps:
172 |             steps += 1    
173 |             a = agente.take_action(s,True)
174 |             o = bandera[s][0]                        #origen
175 |             acc = ActionsXorigen(Actionsx1,Actionsx2,Actionsx3,Actionsx4,Actionsx5,Actionsx6,Actionsx7,Actionsx8,Actionsx9,Actionsx10,o)     #acciones para dicho origen                   
176 |             _s, s_, reward, done, info = step(s,a,acc,G1,steps,_s)
177 |             #print(bandera[s],a,acc,s_,reward)
178 |             episode_reward += reward
179 |             a_ = np.argmax(agente.Q[s_,:])
180 |             agente.updateQ(reward,s,a,a_,s_,done) 
181 |             s, a = s_ , a_
182 |             if done:
183 |                 end_ep = time.time()
184 |                 episode_rewards.append(episode_reward)
185 |                 break   
186 |     print(bandera)
187 |     print(acciones)
188 |     #Test model 
189 |                               
190 |     for test in range(n_tests):
191 |         print("Test #{0}".format(test))
192 |         s = test                              #######################################reset
193 |         _s = s
194 |         done = False
195 |         epsilon = 0
196 |         st=0
197 |         steps = 0
198 |         color=[]
199 |         while True:
200 |             time.sleep(1)
201 |             o = bandera[s][0]                        #origen
202 |             acc = ActionsXorigen(Actionsx1,Actionsx2,Actionsx3,Actionsx4,Actionsx5,Actionsx6,Actionsx7,Actionsx8,Actionsx9,Actionsx10,o)
203 |             #env.render()
204 |             steps += 1
205 |             if(st == 0):
206 |                 first_state=False;
207 |             else:
208 |                 first_state=True;
209 |             print("Estado actual: {0}".format(bandera[s]))
210 |             color.append(bandera[s][0])
211 |             a = agente.take_action(s,first_state)
212 |             print("Chose action {0} for state {1}".format(a,s))
213 |             #print(_s, s)
214 |             first_state=True
215 |             st=st+1;
216 |             _s, s, reward, done, info = step(s,a,acc,G1,steps,_s)
217 |             print(acc,reward,done)
218 |             if done:
219 |                 render(color,bandera[s][2])
220 |                 print("Reached goal!")
221 |                 color.clear()
222 |                 break                
223 |                 time.sleep(6)
224 |             
225 |     plt.xlabel("Episodes")
226 |     plt.ylabel("Reward")
227 |     plt.title("")
228 |     plt.plot(episode_rewards,'b')
229 |     plt.legend()
230 |     plt.show()             
231 |     """       
232 |     print(bandera)
233 |     print(acciones)
234 |     
235 |     """
236 |     
237 |     


--------------------------------------------------------------------------------