├── README.md ├── ql.py └── routing.py /README.md: -------------------------------------------------------------------------------- 1 | # Routing Elephants Reinforcement Learning 2 | 3 | Internet has become a necessity for all people and every day the traffic grows even more. Intelligently routing traffic is important to avoid network congestion and maintain high quality standards (speed and bandwidth), which is why I developed this routing algorithm based on Reinforcement Learning that seeks to maintain a decongested network. 4 | 5 | The algorithm is in charge of finding the shortest route for mouse flows (green) and the most uncongested route for elephant flows (reds), the value of the throuput of each link between network switches is known and also if the flow (set of network packets with the same IP and protocol) is an elephant or a mouse. 6 | 7 | ![routing](https://media.giphy.com/media/Ula6PiO5S7jzlNvL2l/giphy.gif) 8 | 9 | ## Reward Per Episodie 10 | 11 | As the Router gain experience with the number of episodes, accuracy and rewards begin to rise. It is possible to achieve an accuracy of 90% given the simplicity of the assembly, for a future project it is expected to take into account many more variables, perhaps a DNQ agent and a much larger network topology. 12 | 13 | ![Figure 2022-03-15 015334](https://user-images.githubusercontent.com/60159274/158323359-e3d7f5fa-42e8-4d07-90bf-0a29d8112843.png) 14 | 15 | ## Usage 16 | 17 | Install the prerequisites and run `python routing.py` 18 | -------------------------------------------------------------------------------- /ql.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json, ast 3 | import random 4 | 5 | def init_q(s, a, type="zeros"): 6 | """ 7 | @param s the number of states 8 | @param a the number of actions 9 | @param type random, ones or zeros for the initialization 10 | """ 11 | #print(s, a) 12 | if type == "ones": 13 | return np.ones((s, a)) 14 | elif type == "random": 15 | return np.random.random((s, a)) 16 | elif type == "zeros": 17 | return np.zeros((s, a)) 18 | elif type == "inf": 19 | return np.inf*np.ones((s, a)) 20 | 21 | def epsilon_greedy(Q, epsilon, n_actions, s, train=False): 22 | """ 23 | @param Q Q values state x action -> value 24 | @param epsilon for exploration 25 | @param s number of states 26 | @param train if true then no random actions selected 27 | """ 28 | if train or np.random.rand() < epsilon: 29 | action = np.argmax(Q[s, :]) 30 | #print("escoge") 31 | else: 32 | action = np.random.randint(0, n_actions) 33 | #print("entrena") 34 | return action 35 | 36 | 37 | class QL_agent: 38 | def __init__(self, alpha, gamma, epsilon,n_states, n_actions): 39 | 40 | self.alpha = alpha 41 | self.gamma = gamma 42 | self.epsilon = epsilon 43 | self.n_actions = n_actions 44 | self.n_states = n_states 45 | self.Q = init_q(80, 10, type="random") 46 | 47 | def take_action(self,s,first_state): 48 | if first_state: 49 | action = epsilon_greedy(self.Q,self.epsilon,self.n_actions,s,False) 50 | else: 51 | s_=s 52 | action = np.argmax(self.Q[s_, :]) 53 | return action 54 | 55 | def updateQ(self,reward,s,a,a_,s_,end_sate): 56 | #print(s, a) 57 | Q=self.Q 58 | alpha = self.alpha 59 | gamma = self.gamma 60 | if end_sate: 61 | # print("*** Terminal state") 62 | Q[s, a] += alpha * (reward - Q[s, a]) 63 | 64 | else: 65 | # print("*** step") 66 | # Q[s, a] += alpha * (reward + (gamma * np.argmax(self.Q[s_, :])) - Q[s, a]) 67 | Q[s, a] += alpha * (reward + (gamma * Q[s_, a_]) - Q[s, a]) -------------------------------------------------------------------------------- /routing.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import itertools 4 | import matplotlib.pyplot as plt 5 | import gym 6 | import ql 7 | import time 8 | 9 | G1=nx.DiGraph() 10 | list_nodes = [1,2,3,4,5,6,7,8,9,10] #### pocisiones [0,1,2,3,4] 11 | acciones = list_nodes 12 | Actionsx1= [2,3] #### acciones posibles para cada nodo 13 | Actionsx2= [2,5] 14 | Actionsx3= [0,1,3,4,5] 15 | Actionsx4= [0,2,4,6,7] 16 | Actionsx5= [2,3,5,7,8] 17 | Actionsx6= [1,2,4,8,9] 18 | Actionsx7= [3,7] 19 | Actionsx8= [3,4,6,8] 20 | Actionsx9= [4,5,7,9] 21 | Actionsx10= [5,8] 22 | 23 | G1.add_nodes_from(list_nodes) 24 | G1.nodes() 25 | 26 | #weights = [50,90,50,90,50,50,50,50,50,50,50,50,50,50,50,50,50,50] 27 | weights = [50,90,50,90,50,90,50,50,50,90,50,50,50,50,90,50,50,50] 28 | list_arcs1 = [(1,3,weights[0]), (3,1,weights[0]), (1,4,weights[1]) , (4,1,weights[1]) , (2,3,weights[2]), (3,2,weights[2]) , (2,6,weights[3]) , (6,2,weights[3]) , (3,4,weights[4]) , (4,3,weights[4]) , (3,5,weights[5]) , (5,3,weights[5]) , (3,6,weights[6]) , (6,3,weights[6]) ,(4,5,weights[7]), (5,4,weights[7]), (5,6,weights[8]), (6,5,weights[8]), (4,7,weights[9]), (7,4,weights[9]), (4,8,weights[10]), (8,4,weights[10]), (5,8,weights[11]), (8,5,weights[11]), (5,9,weights[12]), (9,5,weights[12]), (6,9,weights[13]), (9,6,weights[13]), (6,10,weights[14]), (10,6,weights[14]), (7,8,weights[15]), (8,7,weights[15]), (8,9,weights[16]), (9,8,weights[16]), (9,10,weights[17]), (10,9,weights[17])] 29 | G1.add_weighted_edges_from(list_arcs1) 30 | G1.edges() 31 | 32 | G1.nodes[1]['pos'] = (0,-2) 33 | G1.nodes[2]['pos'] = (0,2) 34 | G1.nodes[3]['pos'] = (2.5,0) 35 | G1.nodes[4]['pos'] = (5,-5) 36 | G1.nodes[5]['pos'] = (6,0) 37 | G1.nodes[6]['pos'] = (5,5) 38 | G1.nodes[7]['pos'] = (10,-9) 39 | G1.nodes[8]['pos'] = (10,-3) 40 | G1.nodes[9]['pos'] = (10,3) 41 | G1.nodes[10]['pos'] = (10,9) 42 | 43 | node_pos=nx.get_node_attributes(G1,'pos') 44 | nx.draw_networkx(G1, node_pos,node_size=450) 45 | arc_weight=nx.get_edge_attributes(G1,'weight') 46 | nx.draw_networkx_edge_labels(G1, node_pos, edge_labels=arc_weight) 47 | 48 | 49 | l=[[1,2,3,4,5,6,7,8,9,10], [7,8,9,10], ['E','R']] 50 | s = list(itertools.product(*l)) 51 | bandera=s 52 | print(bandera) 53 | 54 | def pesoEnlace(est, a): 55 | origen = bandera[est][0] 56 | destino = a + 1 57 | for x in range(0,len(list_arcs1)): 58 | if (list_arcs1[x][0] == origen and list_arcs1[x][1] == destino): 59 | peso = list_arcs1[x][2] 60 | return peso 61 | 62 | def randomWeight(): 63 | pesos = np.random.randint(20, 70, size=16) 64 | return pesos 65 | 66 | def reset(): 67 | aleatorio = np.random.randint(0, 80, size=1) 68 | return aleatorio[0] 69 | def resetTest(): 70 | aleatorio = np.random.randint(0, 16, size=1) 71 | return aleatorio[0] 72 | def render(col,cond): 73 | map = [] 74 | for node in G1: 75 | if node in col and cond == 'R': 76 | map.append('green') 77 | elif node in col and cond == 'E': 78 | map.append('red') 79 | else: 80 | map.append('gray') 81 | #nx.draw(G1, node_color=map, with_labels=True) 82 | nx.draw_networkx(G1, node_pos,node_size=450,node_color=map) 83 | nx.draw_networkx_edge_labels(G1, node_pos, edge_labels=arc_weight) 84 | plt.show() 85 | 86 | 87 | def ActionsXorigen(a1 ,a2 ,a3 ,a4 ,a5, a6, a7, a8, a9, a10 ,origen): 88 | if (origen==1): 89 | return a1 90 | elif (origen==2): 91 | return a2 92 | elif (origen==3): 93 | return a3 94 | elif (origen==4): 95 | return a4 96 | elif (origen==5): 97 | return a5 98 | elif (origen==6): 99 | return a6 100 | elif (origen==7): 101 | return a7 102 | elif (origen==8): 103 | return a8 104 | elif (origen==9): 105 | return a9 106 | else: 107 | return a10 108 | 109 | def step(s, a, posiblesAcciones, G1, saltos, _s): 110 | info={} 111 | imposibles = 0 112 | for x in range(0,len(posiblesAcciones)): 113 | if (a == posiblesAcciones[x]): 114 | imposibles = 1 115 | if(imposibles == 0): # el destino no s vecino o se queda quieto 116 | reward = -70 117 | s_ = s 118 | done = False 119 | else: 120 | if (bandera[s][0] == bandera [s][1]): 121 | reward = 100 122 | s_ = s 123 | done = True 124 | 125 | else: 126 | done = False 127 | suma = a + 1 128 | for x in range(0,len(bandera)): 129 | if (suma == bandera[x][0] and bandera[s][1] == bandera[x][1] and bandera[s][2] == bandera[x][2]): 130 | s_ = x 131 | break 132 | #print(bandera[s][0], suma) 133 | #print (p) 134 | #if (bandera[s][2]=="E"): 135 | if (s_ == _s): 136 | reward = -130 137 | else: 138 | reward = -10* saltos 139 | if (bandera[s][0] == 1 and bandera[s][1] == 10 and a == 2 or bandera[s][0] == 2 and bandera[s][1] == 7 and a == 2): 140 | reward = reward + 3 141 | if (bandera[s][2] == 'E'): 142 | if (pesoEnlace(s, a) > 79): 143 | reward = reward - 130 144 | 145 | 146 | #print (bandera[s],a,posiblesAcciones,s_,reward) 147 | _s = s 148 | return _s,s_,reward,done,info 149 | 150 | 151 | if __name__ =="__main__": 152 | t = time.time() 153 | alpha = 0.4 154 | gamma = 0.999 155 | epsilon = 0.976 156 | episodes = 400000 157 | max_steps = 2500 158 | n_tests = 16 159 | n_states, n_actions = 80, 10 160 | agente = ql.QL_agent(alpha, gamma, epsilon, n_states,n_actions) #(alpha, gamma, epsilon, episodes, n_states, n_actions) 161 | 162 | episode_rewards = [] 163 | 164 | for episode in range(episodes): 165 | print("Episode: {0}".format(episode)) 166 | s = reset() 167 | _s = s 168 | episode_reward = 0 169 | steps = 0 170 | done = False 171 | while steps < max_steps: 172 | steps += 1 173 | a = agente.take_action(s,True) 174 | o = bandera[s][0] #origen 175 | acc = ActionsXorigen(Actionsx1,Actionsx2,Actionsx3,Actionsx4,Actionsx5,Actionsx6,Actionsx7,Actionsx8,Actionsx9,Actionsx10,o) #acciones para dicho origen 176 | _s, s_, reward, done, info = step(s,a,acc,G1,steps,_s) 177 | #print(bandera[s],a,acc,s_,reward) 178 | episode_reward += reward 179 | a_ = np.argmax(agente.Q[s_,:]) 180 | agente.updateQ(reward,s,a,a_,s_,done) 181 | s, a = s_ , a_ 182 | if done: 183 | end_ep = time.time() 184 | episode_rewards.append(episode_reward) 185 | break 186 | print(bandera) 187 | print(acciones) 188 | #Test model 189 | 190 | for test in range(n_tests): 191 | print("Test #{0}".format(test)) 192 | s = test #######################################reset 193 | _s = s 194 | done = False 195 | epsilon = 0 196 | st=0 197 | steps = 0 198 | color=[] 199 | while True: 200 | time.sleep(1) 201 | o = bandera[s][0] #origen 202 | acc = ActionsXorigen(Actionsx1,Actionsx2,Actionsx3,Actionsx4,Actionsx5,Actionsx6,Actionsx7,Actionsx8,Actionsx9,Actionsx10,o) 203 | #env.render() 204 | steps += 1 205 | if(st == 0): 206 | first_state=False; 207 | else: 208 | first_state=True; 209 | print("Estado actual: {0}".format(bandera[s])) 210 | color.append(bandera[s][0]) 211 | a = agente.take_action(s,first_state) 212 | print("Chose action {0} for state {1}".format(a,s)) 213 | #print(_s, s) 214 | first_state=True 215 | st=st+1; 216 | _s, s, reward, done, info = step(s,a,acc,G1,steps,_s) 217 | print(acc,reward,done) 218 | if done: 219 | render(color,bandera[s][2]) 220 | print("Reached goal!") 221 | color.clear() 222 | break 223 | time.sleep(6) 224 | 225 | plt.xlabel("Episodes") 226 | plt.ylabel("Reward") 227 | plt.title("") 228 | plt.plot(episode_rewards,'b') 229 | plt.legend() 230 | plt.show() 231 | """ 232 | print(bandera) 233 | print(acciones) 234 | 235 | """ 236 | 237 | --------------------------------------------------------------------------------