├── .gitignore ├── README.md ├── dynetwork.py ├── mobility.py ├── our_agent.py ├── our_env.py ├── packet.py ├── simulation.py └── update_edges.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .git/ 3 | __pycache__/ 4 | plots/ 5 | q-learning/ 6 | *.npy 7 | rewardplots/ 8 | Result/ 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement-Learning-Routing-Algorithm-in-Robot-Network 2 | 3 | ## Introduction 4 | 5 | We implement a simulation of a mobile robot network routing protocol based on multi-agent reinforcement learning. 6 | 7 | A mobile robot network is a kind of mobile ad-hoc network that connects mobile robots together. This project simulates the packet routing behavior in the network. The network randomly generates packets to perform routing task which is driven by multi-agent reinforcement learning routing algorithm. The antenna communication range, mobility model, moving speed, node number, packet number, cache queue length, et. can be set manually. 8 | 9 | ## Dependence 10 | 11 | - Python 3.7 12 | - NetworkX 13 | - Matplotlab 14 | - OpenAI Gym 15 | - numpy 16 | - pymobility 17 | 18 | ## Code Structure 19 | 20 | - our_agent: routing algorithm based on multi-agent reinforcement learning. 21 | - out_env: the simulation environment. 22 | - mobility: define mobility patterns of the robot nodes. 23 | - dynetwork: dynamically drives the network to route packets. 24 | - packet: define packets parameters. 25 | - simulation: set the learning and testing stage; collect result. 26 | -------------------------------------------------------------------------------- /dynetwork.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import numpy as np 4 | import packet 5 | import copy 6 | 7 | ''' 8 | Class created to store network and network attributes as well as generate packets 9 | File contains functions: 10 | randomGeneratePackets: initialize packets to network in the beginning, assign predefined number of packet to 11 | send in the network. 12 | GeneratePacket: generate additional packets as previous packets are delivered to keep network working in 13 | specified load, randomly choose a start node if it has the capacity to send packet, randomly choose a 14 | destination. 15 | ''' 16 | # Do not understand parameter 'wait' 17 | 18 | 19 | class DynamicNetwork(object): 20 | def __init__(self, network, max_initializations=1000, packets=None, rejections=0, deliveries=0,): 21 | 22 | '''shared attributes''' 23 | self._network = copy.deepcopy(network) 24 | self._num_nodes = None 25 | self._max_initializations = max_initializations 26 | 27 | '''q_learning attributes''' 28 | self._packets = packets 29 | self._rejections = rejections 30 | self._deliveries = deliveries # record the number of packets that are successfully delivered 31 | self.delayed_queue = [] 32 | self._stripped_list = [] 33 | self._delivery_times = [] # record delivery time of all packets 34 | self._initializations = 0 35 | self._max_queue_length = 0 36 | self._purgatory = [] 37 | self._avg_q_len_arr = [] 38 | self._num_empty_node=[] 39 | self._num_capacity_node = [] 40 | self._num_working_node = [] 41 | 42 | '''shortest path attributes''' 43 | self.sp_packets = packets 44 | self.sp_rejections = rejections 45 | self.sp_deliveries = deliveries 46 | self.sp_delayed_queue = [] 47 | self.sp_stripped_list = [] 48 | self.sp_delivery_times = [] 49 | self.sp_initializations = 0 50 | self.sp_max_queue_length = 0 51 | self.sp_purgatory = [] 52 | self.sp_avg_q_len_arr = [] 53 | self.sp_num_capacity_node = [] 54 | self.sp_num_working_node = [] 55 | self.sp_num_empty_node=[] 56 | 57 | ''' Function used to generate packets 58 | handle both first initialization or later additional injections ''' 59 | def randomGeneratePackets(self, num_packets_to_generate, sp): 60 | tempList = {} 61 | self.num_nodes = len(list(self._network.nodes())) 62 | notfull = list(range(self.num_nodes)) 63 | for index in range(num_packets_to_generate): 64 | curPack, notfull = self.GeneratePacket(index = index, wait = 0, midSim = False, notfull=copy.deepcopy(notfull)) 65 | '''put curPack into startNode's queue''' 66 | self._network.nodes[curPack.get_startPos()]['sending_queue'].append(curPack.get_index()) 67 | if sp: 68 | self._network.nodes[curPack.get_startPos()]['sp_sending_queue'].append(curPack.get_index()) 69 | tempList[index] = curPack 70 | '''create Packets Object''' 71 | packetsObj = packet.Packets(tempList) 72 | 73 | '''Assign Packets Object to the network''' 74 | self._packets = copy.deepcopy(packetsObj) 75 | if sp: 76 | self.sp_packets = copy.deepcopy(packetsObj) 77 | del packetsObj 78 | del tempList 79 | 80 | 81 | """ called by randomGeneratePackets 82 | when generating additional packets after previous packets are delivered """ 83 | def GeneratePacket(self, index, sp=False, wait = 0, midSim = True, notfull=None): 84 | """checks to see if we have exceed the maximum number of packets allotted in the simulation""" 85 | if sp: 86 | initializations = self.sp_initializations 87 | sending_queue = 'sp_sending_queue' 88 | receiving_queue = 'sp_receiving_queue' 89 | packets = self.sp_packets 90 | purgatory = self.sp_purgatory 91 | else: 92 | initializations = self._initializations 93 | sending_queue = 'sending_queue' 94 | receiving_queue = 'receiving_queue' 95 | packets = self._packets 96 | purgatory = self._purgatory 97 | 98 | if initializations >= self._max_initializations: 99 | pass 100 | elif wait <= 0: 101 | """ creates a list of not full nodes to check during new packet creation """ 102 | if midSim: 103 | notfull = list(range(self.num_nodes)) 104 | startNode = random.choice(notfull) 105 | endNode = random.randint(0, self._network.number_of_nodes() - 1) 106 | """ searches through notfull list until an available node is located for initial packet assignment """ 107 | while (len(self._network.nodes[startNode][sending_queue]) + len(self._network.nodes[startNode][receiving_queue]) 108 | >= self._network.nodes[startNode]['max_receive_capacity']): 109 | notfull.remove(startNode) 110 | try: 111 | startNode = random.choice(notfull) 112 | except: 113 | print("Error: All Nodes are Full") 114 | return 115 | """ searches through notfull list until an available node is located for initial packet assignment """ 116 | 117 | """ assigns the packet different delivery destination than starting point """ 118 | while (startNode == endNode): 119 | endNode = random.randint(0, self.num_nodes-1) 120 | curPack = packet.Packet(startNode, endNode, startNode, index, 0) 121 | if midSim: 122 | """ appends newly generated packet to startNodes queue """ 123 | packets.packetList[index] = curPack 124 | if sp: 125 | self.sp_initializations += 1 126 | else: 127 | self._initializations += 1 128 | self._network.nodes[curPack.get_startPos()][receiving_queue].append((curPack.get_index(), 0)) 129 | try: 130 | purgatory.remove((index, wait)) 131 | except: 132 | pass 133 | return 134 | return curPack, notfull 135 | else: 136 | purgatory.append((index, wait - 1)) -------------------------------------------------------------------------------- /mobility.py: -------------------------------------------------------------------------------- 1 | from pymobility.models.mobility import random_waypoint 2 | from pymobility.models.mobility import gauss_markov 3 | import copy 4 | 5 | '''This file add mobility to the nodes of the network''' 6 | 7 | 8 | class Mobility(object): 9 | def __init__(self, mobility_model, nnodes, min_speed, max_speed): 10 | self.trajectory = {} # record trajectory of all nodes. 11 | self.step = 0 # record steps 12 | self.node_number = nnodes 13 | if mobility_model == 'random_waypoint': # instance of mobility model, actually a iter generator 14 | self.mb = random_waypoint(nnodes, dimensions=(1, 1), velocity=(min_speed, max_speed), wt_max=1.0) 15 | elif mobility_model == 'gauss_markov': 16 | self.mb = gauss_markov(nnodes, dimensions=(1, 1), velocity_mean=(min_speed + max_speed) / 2) 17 | else: 18 | print('Undefined mobility model') 19 | 20 | '''get next position of all nodes, return a dictionary''' 21 | def get_next_way_point(self): 22 | positions = next(self.mb) 23 | node_positions = {} 24 | for i in range(self.node_number): 25 | node_positions[i] = positions[i].copy() 26 | 27 | # record trajectory of nodes 28 | for i in range(self.node_number): 29 | self.trajectory.setdefault(i, []).append(positions[i].copy()) 30 | 31 | return node_positions 32 | 33 | '''assign current positions generated by Mobility model to nodes in network''' 34 | def assign_position_to_nodes(self, dyNetwork, positions): 35 | for nodeIndex in range(dyNetwork._network.number_of_nodes()): 36 | dyNetwork._network.nodes[nodeIndex]['pos'] = positions[nodeIndex].copy() 37 | 38 | '''Print trajectory of all nodes''' 39 | def print_trajectory(self): 40 | print("Each Nodes' trajectory are:") 41 | print(self.trajectory) -------------------------------------------------------------------------------- /our_agent.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | import random 5 | 6 | ''' 7 | The agent file defines a learning agent and its hyperparameters 8 | Q-table is a independent data structure which is not stored in each node. 9 | File contains functions: 10 | generate_q_table: initialize Q-table 11 | act: returns which next node to send packet to 12 | learn: update Q-table after receiving corresponding rewards 13 | ''' 14 | class QAgent(object): 15 | 16 | def __init__(self, dynetwork): 17 | """ 18 | learning rate: The amount of information that we wish to update our equation with, should be within (0,1] 19 | epsilon: probability that packets move randomly, instead of referencing routing policy 20 | discount: Degree to which we wish to maximize future rewards, value between (0,1) 21 | decay_rate: decays epsilon 22 | update_epsilon: utilized in our_env.router, only allows epsilon to decay once per time-step 23 | self.q: stores q-values 24 | 25 | """ 26 | self.config = { 27 | "learning_rate": 0.3, 28 | "epsilon": 0.3, 29 | "discount": 0.9, 30 | "decay_rate": 0.999, 31 | "update_epsilon": False, 32 | } 33 | self.number_of_nodes = dynetwork._network.number_of_nodes() 34 | self.q = self.generate_q_table(dynetwork._network) 35 | 36 | ''' Use this function to initialize the q-table, the q-table is stable since the network is not mobile''' 37 | def generate_q_table(self, network): 38 | print("Begin to generate_q_table") 39 | q_table = {} 40 | num_nodes = network.number_of_nodes() 41 | for currpos in range(num_nodes): 42 | nlist = list(range(num_nodes)) 43 | for dest in range(num_nodes): 44 | q_table[(currpos, dest)] = {} 45 | for action in nlist: 46 | if currpos != dest: 47 | ''' Initialize 0 Q-table except destination ''' 48 | q_table[(currpos, dest)][action] = 0 49 | ''' Initialize using Shortest Path''' 50 | else: 51 | # TODO: Initialize q_table value when current node is destination 52 | q_table[(currpos, dest)][action] = 10 # Why set 10 if current node is destination? 53 | print("End of generate_q_table") 54 | return q_table 55 | 56 | '''Returns best action for a given state, action is the next step node number. ''' 57 | def act(self, state, neighbor): 58 | ''' We will either random explore or refer to Q-table with probability epsilon ''' 59 | if random.uniform(0, 1) < self.config['epsilon']: 60 | """ checks if the packet's current node has any available neighbors """ 61 | if not bool(neighbor): # In python, blank {}, [], () are all False 62 | return None 63 | else: 64 | next_step = random.choice(neighbor) # Explore action space 65 | else: 66 | temp_neighbor_dict = {n: self.q[state][n] for n in self.q[state] if n in neighbor} # { expression for x in X [if condition] for y in Y [if condition]...} 67 | """ checks if the packet's current node has any available neighbors """ 68 | if not bool(temp_neighbor_dict): 69 | return None 70 | else: 71 | next_step = max(temp_neighbor_dict, key=temp_neighbor_dict.get) 72 | if self.config['update_epsilon']: 73 | self.config['epsilon'] = self.config["decay_rate"] * self.config['epsilon'] 74 | self.config['update_epsilon'] = False 75 | return next_step 76 | 77 | """updates q-table given current state, reward, and action where a state is a (Node, destination) pair and an action is a step to of the neighbors of the Node """ 78 | def learn(self, current_event, reward, action): 79 | if (action == None) or (reward == None): 80 | pass 81 | else: 82 | n = current_event[0] 83 | dest = current_event[1] 84 | max_q = max(self.q[(action, dest)].values()) # change to max if necessary 85 | 86 | """ Q learning algorithm """ 87 | self.q[(n, dest)][action] = self.q[(n, dest)][action] + (self.config["learning_rate"])*(reward + self.config["discount"] * max_q - self.q[(n, dest)][action]) 88 | 89 | 90 | """Class Multi_QAgent inherit class QAgent to perform multi-agent reinforcement learning""" 91 | class Multi_QAgent(QAgent): 92 | 93 | def __init__(self, dynetwork): 94 | QAgent.__init__(self, dynetwork) 95 | self.config = { 96 | "learning_rate": 0.3, 97 | "epsilon": 0.3, 98 | "discount": 0.9, 99 | "decay_rate": 0.999, 100 | "update_epsilon": False, 101 | "delta_win": 0.0025, 102 | "delta_lose": 0.01 103 | } 104 | (self.policy, self.mean_policy) = self.generate_strategy_table(dynetwork._network) 105 | self.counter = self.generate_counter() 106 | (self.old_neighbors, self.new_neighbors) = self.generate_neighbor_table() 107 | 108 | """Initialize the counter""" 109 | def generate_counter(self): 110 | print("Begin to generate counter") 111 | counter = {} 112 | for currpos in range(self.number_of_nodes): 113 | nlist = list(range(self.number_of_nodes)) 114 | for dest in range(self.number_of_nodes): 115 | if currpos != dest: 116 | counter[(currpos, dest)] = 0 117 | print("End of generate counter") 118 | return counter 119 | 120 | """Initialize the strategy-table""" 121 | def generate_strategy_table(self, network): 122 | print("Begin to generate_strategy_table") 123 | strategy_table = {} 124 | average_strategy_table = {} 125 | num_nodes = network.number_of_nodes() 126 | for currpos in range(num_nodes): 127 | nlist = list(range(num_nodes)) 128 | for dest in range(num_nodes): 129 | strategy_table[(currpos, dest)] = {} 130 | average_strategy_table[(currpos, dest)] = {} 131 | for action in nlist: 132 | if (currpos != dest) and (currpos != action): 133 | '''Initialize 1/|A| in strategy-table and average strategy table except destination''' 134 | strategy_table[(currpos, dest)][action] = 1 / (network.number_of_nodes() - 1) 135 | average_strategy_table[(currpos, dest)][action] = 1 / (network.number_of_nodes() - 1) 136 | print("End of generate_strategy_table") 137 | return strategy_table, average_strategy_table 138 | 139 | """Initialize neighbors history records""" 140 | def generate_neighbor_table(self): 141 | print("Begin to generate old and new neighbor table") 142 | new_neighbor_table = dict.fromkeys(range(self.number_of_nodes), set()) 143 | old_neighbor_table = dict.fromkeys(range(self.number_of_nodes), set()) 144 | print("new_neighbor_table:", new_neighbor_table) 145 | print("old_neighbor_table:", old_neighbor_table) 146 | return old_neighbor_table, new_neighbor_table 147 | 148 | """Returns best action for a given state, action is the next step node number, depends on exploration-exploitation , strategy-table and q-table""" 149 | def act(self, state, neighbor): 150 | # TODO: exploration and exploitation 151 | if random.uniform(0, 1) < self.neighbors_variation(state, neighbor) * self.config['epsilon'] + 0.3: 152 | if not bool(neighbor): 153 | return None 154 | else: 155 | next_step = random.choice(neighbor) 156 | return next_step 157 | else: 158 | # TODO: policy_value_list may all be 0 159 | temp_neighbor_strategy_dict = {n: self.policy[state][n] for n in self.policy[state] if n in neighbor} 160 | policy_key_list = list(temp_neighbor_strategy_dict.keys()) 161 | policy_value_list = list(temp_neighbor_strategy_dict.values()) 162 | if not bool(temp_neighbor_strategy_dict): 163 | return None 164 | else: 165 | try: 166 | if np.sum(policy_value_list) == 1.0: 167 | next_step = np.random.choice(a=policy_key_list, p=policy_value_list) 168 | return next_step 169 | else: 170 | if not any(policy_value_list): 171 | next_step = random.choice(neighbor) 172 | return next_step 173 | else: 174 | policy_value_list_new = np.divide(policy_value_list, sum(policy_value_list)) 175 | next_step = np.random.choice(policy_key_list, p=policy_value_list_new) 176 | return next_step 177 | except ValueError: 178 | print("Value Error, " + str(policy_value_list)) 179 | sys.exit() 180 | 181 | """update q-table, policy, mean-policy""" 182 | def learn(self, current_event, reward, action): 183 | if (action == None) or (reward == None): 184 | pass 185 | else: 186 | cur_pos = current_event[0] 187 | dest = current_event[1] 188 | max_q = max(self.q[(action, dest)].values()) 189 | 190 | """update q-table""" 191 | self.q[(cur_pos, dest)][action] = self.q[(cur_pos, dest)][action] + (self.config["learning_rate"])*(reward + self.config["discount"] * max_q - self.q[(cur_pos, dest)][action]) 192 | 193 | """update mean-policy""" 194 | self.update_mean_pi(current_event) 195 | 196 | """update policy""" 197 | self.update_pi(current_event) 198 | return 199 | 200 | """calculate delta""" 201 | def delta(self, state): 202 | sum_policy = 0.0 203 | sum_mean_policy = 0.0 204 | for i in self.policy[(state[0], state[1])].keys(): 205 | sum_policy += (self.policy[state[0], state[1]][i] * self.q[state[0], state[1]][i]) 206 | sum_mean_policy += (self.mean_policy[state[0], state[1]][i] * self.q[state[0], state[1]][i]) 207 | if (sum_policy > sum_mean_policy): 208 | return self.config["delta_win"] 209 | else: 210 | return self.config["delta_lose"] 211 | 212 | """update policy table""" 213 | def update_pi(self, state): 214 | maxQValueIndex = max(self.q[(state[0], state[1])], key=self.q[(state[0], state[1])].get) 215 | for i in self.policy[(state[0], state[1])].keys(): 216 | d_plus = self.delta(state) 217 | d_minus = ((-1.0) * d_plus) / ((self.number_of_nodes - 1) - 1.0) 218 | if (i == maxQValueIndex): 219 | self.policy[(state[0], state[1])][i] = min(1.0, self.policy[(state[0], state[1])][i] + d_plus) 220 | else: 221 | self.policy[(state[0], state[1])][i] = max(0.0, self.policy[(state[0], state[1])][i] + d_minus) 222 | return 223 | 224 | """update mean-policy table""" 225 | def update_mean_pi(self, state): 226 | self.counter[(state[0], state[1])] += 1 227 | for i in self.policy[(state[0], state[1])].keys(): 228 | self.mean_policy[(state[0], state[1])][i] += ((1.0/self.counter[(state[0], state[1])]) * (self.policy[(state[0], state[1])][i]) - self.mean_policy[(state[0], state[1])][i]) 229 | return 230 | 231 | """calculate the variation of the number of neighbor nodes""" 232 | def neighbors_variation(self, state, new_neighbors): 233 | cur_node = state[0] 234 | self.old_neighbors[cur_node] = self.new_neighbors[cur_node] 235 | self.new_neighbors[cur_node] = set(new_neighbors) 236 | union = len(self.new_neighbors[cur_node].union(self.old_neighbors[cur_node])) 237 | if union == 0: 238 | return 1.0 239 | inter = len(self.new_neighbors[cur_node].intersection(self.old_neighbors[cur_node])) 240 | return (union - inter) / union 241 | -------------------------------------------------------------------------------- /our_env.py: -------------------------------------------------------------------------------- 1 | import dynetwork 2 | import packet 3 | import update_edges as UE 4 | from our_agent import QAgent 5 | import mobility 6 | 7 | import gym 8 | from gym import error 9 | from gym.utils import closer 10 | import networkx as nx 11 | 12 | import copy 13 | import numpy as np 14 | import math 15 | import os 16 | import random 17 | import matplotlib 18 | matplotlib.use('Agg') 19 | import matplotlib.pyplot as plt 20 | import matplotlib.image as mgimg 21 | from matplotlib import animation 22 | from random import randint 23 | 24 | 25 | """ This class contains our gym environment which contains all of the necessary components for agents to take actions and receive rewards. file contains functions: 26 | 27 | change_network: edge deletion/re-establish, edge weight change 28 | purgatory: queue to generate additional queues as previous packets are delivered 29 | step: obtain rewards for updating Q-table after an action *** 30 | is_capacity: check if next node is full and unable to receive packets 31 | send_packet: attempt to send packet to next node 32 | reset: reset environment after each episode 33 | resetForTest: reset environment for each trial (test for different network loads) 34 | get_state: obtain packet's position info 35 | update_queues: update each nodes packet holding queue 36 | update_time: update packet delivery time *** 37 | calc_avg_delivery: helper function to calculate delivery time *** 38 | router: used to route all packets in ONE time stamp 39 | updateWhole: helper function update network environment and packets status 40 | """ 41 | 42 | class dynetworkEnv(gym.Env): 43 | 44 | '''Initialization of the network''' 45 | def __init__(self): 46 | self.nnodes = 20 # The node queue will be full if there are too few nodes, when generate packet 47 | self.radius = 0.2 # The antenna communication range, the whole map size is 1*1 48 | self.nedges = 3 # ABANDON Number of edges to attach from a new node to existing nodes 49 | self.minSpeed = 0.005 50 | self.maxSpeed = 0.005 # define the min and max speed of node 51 | self.mobility_model = 'gauss_markov' 52 | self.mb = None 53 | 54 | self.max_queue = 150 55 | self.max_transmit = 10 56 | self.npackets = 2000 57 | self.max_initializations = 5000 58 | self.max_edge_weight = 10 59 | self.min_edge_removal = 0 60 | self.max_edge_removal = 10 61 | self.edge_change_type = 'sinusoidal' # Not in use 62 | self.network_type = 'geometric_graph' # use: random_geometric_graph(n, radius, dim=2, pos=None, p=2, seed=None) 63 | self.router_type = 'dijkstra' 64 | self.initial_dynetwork = None 65 | self.dynetwork = None 66 | self.print_edge_weights = True 67 | 68 | '''For Q-Learning''' 69 | '''current packet, i.e. first item in the dynetwork's packet list''' 70 | self.packet = -1 # indicate the packet that is being processed 71 | self.curr_queue = [] # indicate a queue that is being processed 72 | self.remaining = [] # indicate a queue containing packets that send forward unsuccessfully 73 | self.nodes_traversed = 0 74 | 75 | '''For Shortest Path''' 76 | self.sp_packet = -1 77 | self.sp_curr_queue = [] 78 | self.sp_remaining = [] 79 | self.sp_nodes_traversed = 0 80 | self.preds = None 81 | 82 | """Initiate mobility model here""" 83 | self.mb = mobility.Mobility(self.mobility_model, self.nnodes, self.minSpeed, self.maxSpeed) 84 | init_pos = self.mb.get_next_way_point() # get a dict of node position 85 | print("min speed is :", self.minSpeed, " max speed is :", self.maxSpeed) 86 | 87 | '''Initialize a dynetwork object using Networkx and dynetwork.py''' 88 | """use random_geometric_graph(n, radius, dim=2, pos=None, p=2, seed=None)""" 89 | if self.network_type == 'geometric_graph': 90 | network = nx.random_geometric_graph(self.nnodes, self.radius, pos=init_pos) 91 | else: 92 | network = nx.gnm_random_graph(self.nnodes, self.nedges) 93 | print("The number of network nodes is ", self.nnodes) 94 | 95 | '''node attributes''' 96 | nx.set_node_attributes(network, copy.deepcopy(self.max_transmit), 'max_send_capacity') 97 | nx.set_node_attributes(network, copy.deepcopy(self.max_queue), 'max_receive_capacity') 98 | 99 | '''Q-Learning specific, set attributes''' 100 | receiving_queue_dict, sending_queue_dict = {}, {} 101 | for i in range(self.nnodes): 102 | temp = {'receiving_queue': []} 103 | temp2 = {'sending_queue': []} 104 | receiving_queue_dict.update({i: temp}) # update or add 105 | sending_queue_dict.update({i: temp2}) 106 | del temp, temp2 107 | nx.set_node_attributes(network, receiving_queue_dict) # Set attributes keyed by node number 108 | nx.set_node_attributes(network, sending_queue_dict) 109 | nx.set_node_attributes(network, 0, 'max_queue_len') # Set attributes as the same in all nodes 110 | nx.set_node_attributes(network, 0, 'avg_q_len_array') 111 | nx.set_node_attributes(network, 0, 'growth') 112 | 113 | '''Shortest Path specific, set attributes''' 114 | sp_receiving_queue_dict, sp_sending_queue_dict = {}, {} 115 | for i in range(self.nnodes): 116 | temp = {'sp_receiving_queue': []} 117 | temp2 = {'sp_sending_queue': []} 118 | sp_receiving_queue_dict.update({i: temp}) 119 | sp_sending_queue_dict.update({i: temp2}) 120 | del temp, temp2 121 | nx.set_node_attributes(network, sp_receiving_queue_dict) 122 | nx.set_node_attributes(network, sp_sending_queue_dict) 123 | nx.set_node_attributes(network, 0, 'sp_max_queue_len') 124 | nx.set_node_attributes(network, 0, 'sp_avg_q_len_array') 125 | 126 | '''Edge attributes''' 127 | nx.set_edge_attributes(network, 0, 'edge_delay') 128 | nx.set_edge_attributes(network, 0, 'sine_state') 129 | for s_edge, e_edge in network.edges: 130 | network[s_edge][e_edge]['edge_delay'] = random.randint(2, self.max_edge_weight) 131 | network[s_edge][e_edge]['initial_weight'] = network[s_edge][e_edge]['edge_delay'] 132 | network[s_edge][e_edge]['sine_state'] = random.uniform(0, math.pi) 133 | 134 | '''make a copy so that we can preserve the initial state of the network''' 135 | self.initial_dynetwork = dynetwork.DynamicNetwork(copy.deepcopy(network), self.max_initializations) 136 | 137 | '''Saves the graph into .gexf file''' 138 | script_dir = os.path.dirname(__file__) # Return the dir of this script 139 | results_dir = os.path.join(script_dir, 'q-learning/') # Join several path 140 | if not os.path.isdir(results_dir): 141 | os.makedirs(results_dir) 142 | 143 | # TODO: write_gexf of geometric graph 144 | # nx.write_gexf(network, results_dir + "graph.gexf") 145 | 146 | self.dynetwork = copy.deepcopy(self.initial_dynetwork) 147 | '''use dynetwork class method randomGeneratePackets to populate the network with packets''' 148 | self.dynetwork.randomGeneratePackets(copy.deepcopy(self.npackets), False) 149 | 150 | # TODO: positions to plot nodes 151 | self._positions = nx.spring_layout(self.dynetwork._network) # Position nodes, return a dictionary of positions keyed by node. 152 | 153 | '''Test here''' 154 | ''' 155 | for nodeIndex in range(self.dynetwork._network.number_of_nodes()): 156 | node = self.dynetwork._network.nodes[nodeIndex] 157 | print("Node " + str(nodeIndex) + "'s init position is " + str(node['pos'])) 158 | print("First edges:") 159 | print(self.dynetwork._network.edges.data()) 160 | 161 | a1 = self.mb.get_next_way_point() 162 | self.mb.assign_position_to_nodes(self.dynetwork, a1) 163 | for nodeIndex in range(self.dynetwork._network.number_of_nodes()): 164 | node = self.dynetwork._network.nodes[nodeIndex] 165 | print("Node " + str(nodeIndex) + "'s second position is " + str(node['pos'])) 166 | UE.calculate_nodes_connection(self.dynetwork,self.radius) 167 | print("Second edges:") 168 | print(self.dynetwork._network.edges.data()) 169 | 170 | a2 = self.mb.get_next_way_point() 171 | self.mb.assign_position_to_nodes(self.dynetwork, a2) 172 | UE.calculate_nodes_connection(self.dynetwork, self.radius) 173 | for nodeIndex in range(self.dynetwork._network.number_of_nodes()): 174 | node = self.dynetwork._network.nodes[nodeIndex] 175 | print("Node " + str(nodeIndex) + "'s third position is " + str(node['pos'])) 176 | UE.calculate_nodes_connection(self.dynetwork, self.radius) 177 | print("Third edges:") 178 | print(self.dynetwork._network.edges.data()) 179 | ''' 180 | 181 | '''helper function to update learning environment in each time stamp''' 182 | def updateWhole(self, agent, q=True, sp = False, rewardfun='reward5', savesteps=False): 183 | 184 | # TODO: change network 185 | self.change_network() 186 | 187 | if q: 188 | self.purgatory(False) 189 | self.update_queues(False) 190 | self.update_time(False) 191 | self.router(agent, rewardfun, savesteps) 192 | 193 | if sp: 194 | self.purgatory(True) 195 | self.update_queues(True) 196 | self.update_time(True) 197 | self.sp_router(self.router_type, 'delay', savesteps) 198 | 199 | '''Use to update edges in network, depending on mobility of nodes and communication range''' 200 | def change_network(self): 201 | ''' 202 | previous edge update methods 203 | UE.Delete(self.dynetwork, self.min_edge_removal, self.max_edge_removal) 204 | UE.Restore(self.dynetwork) 205 | if self.edge_change_type == 'none': 206 | pass 207 | elif self.edge_change_type == 'sinusoidal': 208 | UE.Sinusoidal(self.dynetwork) 209 | else: 210 | UE.Random_Walk(self.dynetwork) 211 | ''' 212 | temp_position = self.mb.get_next_way_point() 213 | self.mb.assign_position_to_nodes(self.dynetwork, temp_position) 214 | UE.calculate_nodes_connection(self.dynetwork, self.radius) 215 | 216 | '''Method for emptying 'purgatory' which holds indices of packets that have 217 | been delivered so they may be reused''' 218 | def purgatory(self, sp=False): 219 | if sp: 220 | temp_purgatory = copy.deepcopy(self.dynetwork.sp_purgatory) 221 | self.dynetwork.sp_purgatory = [] 222 | else: 223 | temp_purgatory = copy.deepcopy(self.dynetwork._purgatory) 224 | self.dynetwork._purgatory = [] 225 | for (index, weight) in temp_purgatory: 226 | self.dynetwork.GeneratePacket(index, sp, weight) # weight = wait? 227 | 228 | '''Takes packets which are now ready to be sent and puts them in the sending queue of the node ''' 229 | def update_queues(self, sp=False): 230 | if sp: 231 | sending_queue = 'sp_sending_queue' 232 | receiving_queue = 'sp_receiving_queue' 233 | else: 234 | sending_queue = 'sending_queue' 235 | receiving_queue = 'receiving_queue' 236 | 237 | for nodeIdx in self.dynetwork._network.nodes: 238 | node = self.dynetwork._network.nodes[nodeIdx] 239 | if not sp: 240 | node['growth'] = len(node[receiving_queue]) # What does growth mean? 241 | queue = copy.deepcopy(node[receiving_queue]) 242 | for elt in queue: 243 | '''increment packet delivery time step''' 244 | pkt = elt[0] 245 | if elt[1] == 0: # elt[1]==0 means this packet is ready to be sent, take the pkt to sending queue from receiving queue 246 | node[sending_queue].append(pkt) 247 | node[receiving_queue].remove(elt) 248 | else: # If pkt is not ready to be sent, update the corresponding item in the receiving queue. 249 | idx = node[receiving_queue].index(elt) 250 | node[receiving_queue][idx] = (pkt, elt[1] - 1) 251 | 252 | ''' Update time spent in queues for each packets, for every packet in receiving and sending queue, plus one time step ''' 253 | def update_time(self, sp=False): 254 | if sp: 255 | sending_queue = 'sp_sending_queue' 256 | receiving_queue = 'sp_receiving_queue' 257 | packets = self.dynetwork.sp_packets 258 | else: 259 | sending_queue = 'sending_queue' 260 | receiving_queue = 'receiving_queue' 261 | packets = self.dynetwork._packets 262 | 263 | for nodeIdx in self.dynetwork._network.nodes: 264 | for elt in self.dynetwork._network.nodes[nodeIdx][receiving_queue]: 265 | '''increment packet delivery time step''' 266 | pkt = elt[0] 267 | curr_time = packets.packetList[pkt].get_time() 268 | packets.packetList[pkt].set_time(curr_time + 1) 269 | for c_pkt in self.dynetwork._network.nodes[nodeIdx][sending_queue]: 270 | curr_time = packets.packetList[c_pkt].get_time() 271 | packets.packetList[c_pkt].set_time(curr_time + 1) 272 | 273 | 274 | ''' -----------------Q-Learning Functions---------------- ''' 275 | 276 | ''' return packet's position and destination''' 277 | def get_state(self, pktIdx): 278 | pkt = self.dynetwork._packets.packetList[self.packet] 279 | return (pkt.get_curPos(), pkt.get_endPos()) 280 | 281 | '''loop sending queue in the loop of all nodes, record some congestion measure''' 282 | def router(self, agent, rewardfun='reward5', savesteps=False): 283 | node_queue_lengths = [0] 284 | num_nodes_at_capacity = 0 285 | num_nonEmpty_nodes = 0 286 | '''iterate all nodes''' 287 | for nodeIdx in self.dynetwork._network.nodes: 288 | """ the self.nodes_traversed tracks the number of nodes we have looped over, guarunteeing that each packet will have the same epsilon at each time step""" 289 | self.nodes_traversed += 1 290 | if self.nodes_traversed == self.nnodes: 291 | agent.config['update_epsilon'] = True 292 | self.nodes_traversed = 0 293 | node = self.dynetwork._network.nodes[nodeIdx] 294 | '''provides pointer for queue of current node''' 295 | self.curr_queue = node['sending_queue'] 296 | sending_capacity = node['max_send_capacity'] 297 | queue_size = len(self.curr_queue) 298 | 299 | '''Congestion Measure #1: max queue len''' 300 | if(queue_size > self.dynetwork._max_queue_length): 301 | self.dynetwork._max_queue_length = queue_size 302 | 303 | '''Congestion Measure #2: avg queue len pt1''' 304 | if(queue_size > 0): 305 | node_queue_lengths.append(queue_size) 306 | num_nonEmpty_nodes += 1 # Node whose queue is not empty 307 | ''' Congestion Measure #3: avg percent at capacity''' 308 | if(queue_size > sending_capacity): 309 | '''increment number of nodes that are at capacity''' 310 | num_nodes_at_capacity += 1 # full load node 311 | 312 | '''stores packets which currently have no destination path''' 313 | # store packets which cannot be send to neighbor, reset the list for each node of the loop 314 | self.remaining = [] 315 | sendctr = 0 # count the number of packets which brings reward 316 | 317 | '''loop the sending queue of current node''' 318 | for i in range(queue_size): 319 | '''when node cannot send anymore packets break and move to next node''' 320 | if sendctr == sending_capacity: 321 | self.dynetwork._rejections +=(1*(len(node['sending_queue']))) 322 | break 323 | self.packet = self.curr_queue[0] # current processing packet 324 | pkt_state = self.get_state(copy.deepcopy(self.packet)) # get_state return the cur_pos and dest_pos 325 | nlist = list(self.dynetwork._network.neighbors(pkt_state[0])) # neighbors(G,n) returns a list of nodes connected to node n. 326 | action = agent.act(pkt_state, nlist) 327 | reward, self.remaining, self.curr_queue, action = self.step(action, pkt_state[0], rewardfun, savesteps) 328 | if reward != None: 329 | sendctr += 1 330 | agent.learn(pkt_state, reward, action) 331 | 332 | node['sending_queue'] = self.remaining + node['sending_queue'] 333 | 334 | '''Congestion Measure #2: avg queue length pt2''' 335 | if len(node_queue_lengths) > 1: 336 | self.dynetwork._avg_q_len_arr.append(np.average(node_queue_lengths[1:])) 337 | '''Congestion Measure #3: percent node at capacity''' 338 | self.dynetwork._num_capacity_node.append(num_nodes_at_capacity) 339 | self.dynetwork._num_working_node.append(num_nonEmpty_nodes) 340 | '''Congestion Mesure #4: percent empty nodes''' 341 | self.dynetwork._num_empty_node.append(self.nnodes - num_nonEmpty_nodes) 342 | 343 | """ given an neighboring node (action), will check if node has a available space in that queue. if it does not, the packet stays at current queue. else, packet is sent to action node's queue. """ 344 | def step(self, action, curNode = None, rewardfun='reward5', savesteps=False): 345 | reward = None 346 | 347 | """ checks if action is None, in which case current node has no neighbors and also checks to see if target node has space in queue """ 348 | 349 | if (action == None) or (self.is_capacity(action, False)): 350 | self.curr_queue.remove(self.packet) # sending queue remove the packet 351 | self.remaining.append(self.packet) # still remain the packet 352 | self.dynetwork._rejections += 1 353 | else: 354 | reward = self.send_packet(action, rewardfun, savesteps) 355 | pkt = self.dynetwork._packets.packetList[self.packet] 356 | return reward, self.remaining, self.curr_queue, action 357 | 358 | ''' 359 | Given next_step, send packet to next_step. 360 | add edge delay to packet time. 361 | Check if the node is full/other considerations beforehand. 362 | ''' 363 | def send_packet(self, next_step, rewardfun='reward1', savesteps=False): 364 | reward = 0 365 | pkt = self.dynetwork._packets.packetList[self.packet] 366 | curr_node = pkt.get_curPos() 367 | dest_node = pkt.get_endPos() 368 | weight = self.dynetwork._network[curr_node][next_step]['edge_delay'] 369 | pkt.set_curPos(next_step) 370 | if savesteps: 371 | pkt.add_step(next_step) 372 | self.dynetwork._packets.packetList[self.packet].set_time(pkt.get_time() + weight) 373 | if pkt.get_curPos() == dest_node: 374 | """ if packet has reached destination, a new packet is created with the same 'ID' (packet index) but a new destination, which is then redistributed to another node """ 375 | self.dynetwork._delivery_times.append(self.dynetwork._packets.packetList[self.packet].get_time()) 376 | self.dynetwork._deliveries += 1 377 | # TODO:When a packet reaches destination 378 | # self.dynetwork.GeneratePacket(self.packet, False, random.randint(0, 5)) # the use of wait 379 | self.curr_queue.remove(self.packet) 380 | reward = 5 # TODO: reward of the termination (Rmax) 381 | else: 382 | self.curr_queue.remove(self.packet) 383 | try: 384 | if rewardfun == 'reward1': 385 | reward = self.reward1(curr_node, next_step) 386 | if rewardfun == 'reward2': 387 | reward = self.reward2() 388 | except nx.NetworkXNoPath: 389 | """ if the node the packet was just sent to has no available path to dest_node, we assign a reward of -50 """ 390 | reward = -5 # TODO: reward of void area/dead end (-Rmax) 391 | self.dynetwork._network.nodes[next_step]['receiving_queue'].append( 392 | (self.packet, weight)) 393 | return reward 394 | 395 | '''-----------------------------Reward Functions----------------------------''' 396 | 397 | 398 | '''The expected reward function, which considers edge delay, energy, buffer''' 399 | # TODO: reward function 400 | def reward1(self, cur_pos, next_step): 401 | link_delay = self.dynetwork._network[cur_pos][next_step]['edge_delay'] 402 | buf_factor = 1 - len(self.dynetwork._network.nodes[next_step]['receiving_queue']) / self.max_queue 403 | curnode_last_geopos = self.mb.trajectory[cur_pos][-2] 404 | curnode_now_geopos = self.mb.trajectory[cur_pos][-1] 405 | nextstep_last_geopos = self.mb.trajectory[next_step][-2] 406 | nextstep_now_geopos = self.mb.trajectory[next_step][-1] 407 | curnode_angle = math.atan((curnode_now_geopos[1] - curnode_last_geopos[1]) / (curnode_now_geopos[0] - curnode_last_geopos[0])) 408 | nextstep_angle = math.atan((nextstep_now_geopos[1] - nextstep_last_geopos[1]) / (nextstep_now_geopos[0] - nextstep_last_geopos[0])) 409 | mobility_factor = (math.cos(nextstep_angle - curnode_angle) + 2) / 3 # range from 1/3 to 1 410 | pos_factor = 1 - (math.sqrt(math.pow(nextstep_now_geopos[0] - curnode_now_geopos[0], 2) + math.pow(nextstep_now_geopos[1] - curnode_now_geopos[1], 2))) / self.radius 411 | 412 | """ 413 | print("link_delay = ", link_delay, "delayfactor = ", math.exp(-link_delay)) 414 | print("angle1 = ", curnode_angle, "angle2 = ", nextstep_angle) 415 | print("mobility_factor = ", mobility_factor) 416 | print("buf_factor = ", buf_factor) 417 | print("pos_factor = ", pos_factor) 418 | """ 419 | 420 | w1 = 0.4 421 | w2 = 0.3 422 | w3 = 0.3 423 | 424 | reward = w1 * math.exp(-link_delay) * w2 * mobility_factor + w3 * buf_factor 425 | return reward 426 | 427 | '''reward function that returns a constant value''' 428 | def reward2(self): 429 | return 0.5 430 | 431 | '''--------------------SHORTEST PATH-----------------''' 432 | 433 | def sp_router(self, router_type='dijkstra', weight='delay', savesteps=False): 434 | if str.lower(router_type) != 'dijkstra': 435 | if weight == 'delay': 436 | self.preds, _ = nx.floyd_warshall_predecessor_and_distance(self.dynetwork._network, weight='edge_delay') # Find shortest path between any two nodes by floyd algorithm 437 | else: 438 | self.preds, _ = nx.floyd_warshall_predecessor_and_distance(self.dynetwork._network) 439 | temp_node_queue_lens = [0] 440 | temp_num_nodes_at_capacity = 0 441 | temp_num_nonEmpty_node = 0 442 | self.update_queues(True) 443 | self.update_time(True) 444 | 445 | '''iterate all nodes''' 446 | for node in self.dynetwork._network.nodes: 447 | '''provides pointer for queue of current node''' 448 | curr_queue = self.dynetwork._network.nodes[node]['sp_sending_queue'] 449 | sending_capacity = self.dynetwork._network.nodes[node]['max_send_capacity'] 450 | queue_size = len(curr_queue) 451 | 452 | '''Congestion Measure #1: max queue length''' 453 | if(queue_size > self.dynetwork.sp_max_queue_length): 454 | self.dynetwork.sp_max_queue_length = queue_size 455 | '''Congestion Measure #2: average queue length''' 456 | if(queue_size > 0): 457 | temp_node_queue_lens.append(queue_size) 458 | temp_num_nonEmpty_node += 1 459 | 460 | '''Congestion Measure #3: average percentage of active nodes at capacity''' 461 | if(queue_size > sending_capacity): 462 | temp_num_nodes_at_capacity += 1 463 | 464 | '''stores packets which currently have no path to destination''' 465 | remaining = [] 466 | sendctr = 0 467 | 468 | for i in range(queue_size): 469 | '''when node cannot send anymore packets, break and move on to next node''' 470 | if sendctr == sending_capacity: 471 | self.dynetwork.sp_rejections +=(len(self.dynetwork._network.nodes[node]['sp_sending_queue'])) 472 | break 473 | remaining, curr_queue, sent = self.handle_node_packet(curr_queue, remaining, router_type, weight, savesteps) 474 | if sent: 475 | sendctr += 1 476 | self.dynetwork._network.nodes[node]['sp_sending_queue'] = remaining + self.dynetwork._network.nodes[node]['sp_sending_queue'] 477 | 478 | '''Congestion Measure #2: average queue length''' 479 | if len(temp_node_queue_lens) > 1: 480 | self.dynetwork.sp_avg_q_len_arr.append(np.average(temp_node_queue_lens[1:])) 481 | 482 | '''Congestion Measure #3: percentage of nodes at capacity''' 483 | self.dynetwork.sp_num_capacity_node.append(temp_num_nodes_at_capacity) 484 | self.dynetwork.sp_num_working_node.append(temp_num_nonEmpty_node) 485 | self.dynetwork.sp_num_empty_node.append((self.nnodes - temp_num_nonEmpty_node)/self.nnodes) 486 | 487 | '''helper function to move packets to their corresponding queues''' 488 | def handle_node_packet(self, curr_queue, remaining, router_type, weight, savesteps=False): 489 | pkt = curr_queue[0] 490 | currPos = self.dynetwork.sp_packets.packetList[pkt].get_curPos() 491 | destPos = self.dynetwork.sp_packets.packetList[pkt].get_endPos() 492 | sent = False 493 | try: 494 | if currPos == destPos: 495 | curr_queue.remove(pkt) 496 | else: 497 | next_step = self.get_next_step(currPos, destPos, router_type, weight) 498 | if self.is_capacity(next_step, True): 499 | curr_queue.remove(pkt) 500 | remaining.append(pkt) 501 | self.dynetwork.sp_rejections += 1 502 | else: 503 | self.sp_send_packet(pkt, currPos, next_step, savesteps) 504 | curr_queue.remove(pkt) 505 | sent = True 506 | except (nx.NetworkXNoPath, KeyError): 507 | curr_queue.remove(pkt) 508 | remaining.append(pkt) 509 | return remaining, curr_queue, sent 510 | 511 | '''return the node for packet to route to in the next step using shortest path algorithm''' 512 | def get_next_step(self, currPos, destPos, router_type, weight): 513 | if str.lower(router_type) == 'dijkstra' and weight == 'delay': 514 | return nx.dijkstra_path(self.dynetwork._network, currPos, destPos, weight='edge_delay')[1] 515 | elif str.lower(router_type) == 'dijkstra': 516 | return nx.dijkstra_path(self.dynetwork._network, currPos, destPos)[1] 517 | else: 518 | return nx.reconstruct_path(currPos, destPos, self.preds)[1] 519 | 520 | '''helper function to route one pacaket''' 521 | def sp_send_packet(self, pkt, curr, next_step, savesteps=False): 522 | if savesteps: 523 | self.dynetwork.sp_packets.packetList[pkt].add_step(next_step) 524 | self.dynetwork.sp_packets.packetList[pkt].set_curPos(next_step) 525 | weight = self.dynetwork._network[curr][next_step]['edge_delay'] 526 | curr_time = self.dynetwork.sp_packets.packetList[pkt].get_time() 527 | self.dynetwork.sp_packets.packetList[pkt].set_time(curr_time + weight) 528 | if self.dynetwork.sp_packets.packetList[pkt].get_curPos() == self.dynetwork.sp_packets.packetList[pkt].get_endPos(): 529 | new_time = self.dynetwork.sp_packets.packetList[pkt].get_time() 530 | self.dynetwork.sp_delivery_times.append(new_time) 531 | self.dynetwork.sp_deliveries += 1 532 | self.dynetwork.GeneratePacket(pkt, True, randint(0,5)) 533 | else: 534 | self.dynetwork._network.nodes[next_step]['sp_receiving_queue'].append((pkt, weight)) 535 | 536 | 537 | '''----SHARED FUNCTIONS BETWEEN Q-LEARNING AND SHORTEST PATH----''' 538 | 539 | """ checks to see if there is space in target_nodes queue """ 540 | def is_capacity(self, target_node, sp = False): 541 | if sp: 542 | sending_queue = 'sp_sending_queue' 543 | receiving_queue = 'sp_receiving_queue' 544 | else: 545 | sending_queue = 'sending_queue' 546 | receiving_queue = 'receiving_queue' 547 | 548 | total_queue_len = len(self.dynetwork._network.nodes[target_node][sending_queue]) + \ 549 | len(self.dynetwork._network.nodes[target_node][receiving_queue]) 550 | return total_queue_len >= self.dynetwork._network.nodes[target_node]['max_receive_capacity'] 551 | 552 | """ this function resets the environment """ 553 | def reset(self, curLoad, sp): 554 | self.dynetwork = copy.deepcopy(self.initial_dynetwork) 555 | if curLoad != None: 556 | self.npackets = curLoad 557 | self.dynetwork.randomGeneratePackets(self.npackets, sp) 558 | print('Environment reset') 559 | 560 | '''helper function to calculate delivery times''' 561 | def calc_avg_delivery(self): 562 | delivery_times = self.dynetwork._delivery_times 563 | return(sum(delivery_times)/len(delivery_times)) 564 | 565 | ''' Save an image of the current state of the network''' 566 | def render(self, i = 0): 567 | node_labels = {} 568 | for node in self.dynetwork._network.nodes: 569 | node_labels[node] = len(self.dynetwork._network.nodes[node]['sending_queue']) + len( 570 | self.dynetwork._network.nodes[node]['receiving_queue']) 571 | nx.draw(self.dynetwork._network, pos=self._positions, 572 | labels=node_labels, font_weight='bold') 573 | if self.print_edge_weights: 574 | edge_labels = nx.get_edge_attributes( 575 | self.dynetwork._network, 'edge_delay') 576 | nx.draw_networkx_edge_labels( 577 | self.dynetwork._network, pos=self._positions, edge_labels=edge_labels) 578 | script_dir1 = os.path.dirname(__file__) 579 | results_dir1 = os.path.join(script_dir1, 'network_images/') 580 | if not os.path.isdir(results_dir1): 581 | os.makedirs(results_dir1) 582 | plt.axis('off') 583 | plt.figtext(0.1, 0.1, "total injections: "+ str(self.max_initializations + self.dynetwork._initializations)) 584 | plt.savefig("network_images/dynet" + str(i) + ".png") 585 | plt.clf() 586 | 587 | '''helper function to generate animations of the routing process''' 588 | def routing_example(self, agent, curLoad): 589 | 590 | '''create directory''' 591 | script_dir = os.path.dirname(__file__) 592 | anim_dir = os.path.join(script_dir, 'animations/') 593 | if not os.path.isdir(anim_dir): 594 | os.makedirs(anim_dir) 595 | 596 | '''track the first packet''' 597 | t1 = 0 598 | t2 = 0 599 | try: 600 | q_first_packet = self.dynetwork._packets.packetList[(self.dynetwork._network.nodes[0]['sending_queue'])[-1]] 601 | sp_first_packet = self.dynetwork.sp_packets.packetList[(self.dynetwork._network.nodes[0]['sp_sending_queue'])[-1]] 602 | q_current_node = q_first_packet.get_startPos() 603 | sp_current_node = sp_first_packet.get_startPos() 604 | destination = q_first_packet.get_endPos() 605 | q_nodes = [] # record queue length of every node in every step 606 | s_nodes = [] 607 | edges = [] # record edges of every step 608 | edge_labels = [] 609 | q_traversed = [] # record packet traversed edges 610 | s_traversed = [] 611 | q_current = [q_current_node] # record packet traversed nodes 612 | s_current = [sp_current_node] 613 | 614 | '''keep on routing until both Q-learning and SP finish''' 615 | while (q_current_node is not destination) or (sp_current_node is not destination): 616 | '''I should really use a dictionary instead of a million lists of size t...''' 617 | self.updateWhole(agent, learn=False, q=True, sp=True, rewardfun='reward5', savesteps=True) 618 | if q_current_node is not destination: 619 | t1+=1 620 | if sp_current_node is not destination: 621 | t2+=1 622 | q_current_node = q_first_packet._steps[-1] 623 | sp_current_node = sp_first_packet._steps[-1] 624 | q_current.append(q_first_packet.get_curPos()) 625 | s_current.append(sp_first_packet.get_curPos()) 626 | e = self.dynetwork._network.edges 627 | edges.append(e) 628 | q_node_label = {} 629 | sp_node_label = {} 630 | for node in self.dynetwork._network.nodes: 631 | q_node_label[node] = str(node) + ":" + str(len(self.dynetwork._network.nodes[node]['sending_queue']) + len(self.dynetwork._network.nodes[node]['receiving_queue'])) 632 | sp_node_label[node] = str(node) + ":" + str(len(self.dynetwork._network.nodes[node]['sp_sending_queue']) + len(self.dynetwork._network.nodes[node]['sp_receiving_queue'])) 633 | q_nodes.append(q_node_label) 634 | s_nodes.append(sp_node_label) 635 | temp = zip(q_first_packet._steps, q_first_packet._steps[1:]) 636 | path = [] 637 | for (a,b) in temp: 638 | if a > b: 639 | path.append((b,a)) 640 | else: 641 | path.append((a,b)) 642 | q_traversed.append([x for x in list(path) if x in e]) # if x is a current edge and is one path of the packet 643 | temp1 = zip(sp_first_packet._steps, sp_first_packet._steps[1:]) 644 | path1 = [] 645 | for (a,b) in temp1: 646 | if a > b: 647 | path1.append((b,a)) 648 | else: 649 | path1.append((a,b)) 650 | s_traversed.append([x for x in list(path1) if x in e]) 651 | if self.print_edge_weights: 652 | edge_labels.append(nx.get_edge_attributes(self.dynetwork._network, 'edge_delay')) 653 | 654 | 655 | print("Packet %i traversed from Node %i to Node %i." % (q_first_packet.get_index(), q_first_packet.get_startPos(), q_first_packet.get_endPos())) 656 | print("Q-Learning: %i time steps" % (1+q_first_packet.get_time())) 657 | print(q_first_packet._steps) 658 | print("Shortest Path: %i time steps" % sp_first_packet.get_time()) 659 | print(sp_first_packet._steps) 660 | 661 | '''animate traversal process for both''' 662 | fig = plt.figure(figsize=(9.6, 7.2)) 663 | plt.clf() 664 | plt.axis('off') 665 | 666 | def q_animate(i): 667 | plt.clf() 668 | plt.title("Q-Learning for Load of "+str(curLoad)) 669 | plt.figtext(0, 0, "Path:" + str(q_first_packet._steps) + "\nTime: "+ str(1+q_first_packet.get_time()), fontsize='x-large') 670 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, node_color='#CCE5FF') 671 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, nodelist=[0, destination], node_color='#FFB266') 672 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, nodelist=[q_current[i+1]], node_size=500) 673 | nx.draw_networkx_labels(self.dynetwork._network, pos=self._positions, labels=q_nodes[i], font_weight = 'bold') 674 | nx.draw_networkx_edges(self.dynetwork._network, pos=self._positions, edgelist=edges[i]) 675 | nx.draw_networkx_edges(self.dynetwork._network, pos=self._positions, edgelist=q_traversed[i], width=3.0) 676 | if self.print_edge_weights: 677 | nx.draw_networkx_edge_labels(self.dynetwork._network, pos=self._positions, edge_labels=edge_labels[i]) 678 | 679 | anim = animation.FuncAnimation(fig, q_animate, frames=t1, interval=50, repeat_delay=1000) 680 | Writer = animation.writers['ffmpeg'] 681 | writer = Writer(fps=2, metadata=dict(artist='Me'), bitrate=1800) 682 | anim.save(anim_dir+"q_load"+str(curLoad)+".mp4", writer=writer) 683 | 684 | fig = plt.figure(figsize=(9.6, 7.2)) 685 | plt.clf() 686 | plt.axis('off') 687 | 688 | def s_animate(i): 689 | plt.clf() 690 | plt.title("Shortest Path for Load of "+str(curLoad)) 691 | plt.figtext(0, 0, "Path:" + str(sp_first_packet._steps) + "\nTime: "+ str(sp_first_packet.get_time()), fontsize='x-large') 692 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, node_color='#CCE5FF') 693 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, nodelist=[0, destination], node_color='#FFB266') 694 | nx.draw_networkx_nodes(self.dynetwork._network, pos=self._positions, nodelist=[s_current[i+1]], node_size=500) 695 | nx.draw_networkx_labels(self.dynetwork._network, pos=self._positions, labels=s_nodes[i], font_weight = 'bold') 696 | nx.draw_networkx_edges(self.dynetwork._network, pos=self._positions, edgelist=edges[i]) 697 | nx.draw_networkx_edges(self.dynetwork._network, pos=self._positions, edgelist=s_traversed[i], width=3.0) 698 | if self.print_edge_weights: 699 | nx.draw_networkx_edge_labels(self.dynetwork._network, pos=self._positions, edge_labels=edge_labels[i]) 700 | 701 | anim = animation.FuncAnimation(fig, s_animate, frames=t2, interval=50, repeat_delay=1000) 702 | Writer = animation.writers['ffmpeg'] 703 | writer = Writer(fps=2, metadata=dict(artist='Me'), bitrate=1800) 704 | anim.save(anim_dir+"sp_load"+str(curLoad)+".mp4", writer=writer) 705 | 706 | except: 707 | pass -------------------------------------------------------------------------------- /packet.py: -------------------------------------------------------------------------------- 1 | '''Class representing packet which stores the starting position, current position, 2 | destination node, and time steps sent alive''' 3 | # TODO: The packet is at least classified as Data packet and HELLO packet, or even more, such as ACK packet 4 | 5 | 6 | class Packet(object): 7 | def __init__(self, startPos, endPos, curPos, index, weight, time = 0): 8 | self._startPos = startPos 9 | self._endPos = endPos 10 | self._curPos = curPos 11 | self._index = index 12 | self._weight = weight 13 | self._time = time 14 | self._steps = [startPos] 15 | 16 | def get_startPos(self): 17 | # print("getter method called") 18 | return self._startPos 19 | 20 | def get_endPos(self): 21 | # print("getter method called") 22 | return self._endPos 23 | 24 | def get_curPos(self): 25 | # print("getter method called") 26 | return self._curPos 27 | 28 | def get_index(self): 29 | # rint("getter method called") 30 | return self._index 31 | 32 | def get_weight(self): 33 | # print("getter method called") 34 | return self._weight 35 | 36 | def get_time(self): 37 | # print("getter method called") 38 | return self._time 39 | 40 | def set_startPos(self, startNode): 41 | # print("setter method called") 42 | self._startPos = startNode 43 | 44 | def set_endPos(self, endNode): 45 | # print("setter method called") 46 | self._endPos = endNode 47 | 48 | def set_curPos(self, curNode): 49 | # print("setter method called") 50 | self._curPos = curNode 51 | 52 | def set_index(self, index): 53 | # print("getter method called") 54 | self._index = index 55 | 56 | def set_weight(self, weight): 57 | # print("setter method called") 58 | self._weight = weight 59 | 60 | def set_time(self, time): 61 | # print("getter method called") 62 | self._time = time 63 | 64 | def add_step(self, step): 65 | (self._steps).append(step) 66 | 67 | 68 | '''Class which stores all the packets in the network''' 69 | 70 | 71 | class Packets(object): 72 | def __init__(self, packetList): 73 | self.packetList = packetList 74 | self.num_Packets = len(packetList) -------------------------------------------------------------------------------- /simulation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from our_agent import Multi_QAgent 4 | from our_env import * 5 | import matplotlib.pyplot as plt 6 | ''' 7 | This program generates a network, teaches a Q-learning agent 8 | to route packets, and tests both the learned Q-routing policy 9 | and Shortest Path for routing on the network over various 10 | network loads. 11 | ''' 12 | 13 | '''One episode starts with initialization of all the packets and ends with delivery of all of 14 | env.npackets + env.max_initializations packets OR after time_steps.''' 15 | numEpisode = 40 16 | '''Max length of one episode''' 17 | time_steps = 2000 18 | '''Specify learn method''' 19 | learn_method = 'Multi-Q-learning' 20 | '''Specify reward function (listed in our_env.py)''' 21 | rewardfunction = 'reward1' 22 | '''Mark true to generate plots of performance while learning''' 23 | learning_plot = True 24 | '''Mark true to generate plots of performance for different test network loads''' 25 | comparison_plots = True 26 | '''Number of times to repeat each value in network_load list''' 27 | trials = 10 28 | '''Mark true to perform shortest path simultaneously during testing for comparison to Q-learning''' 29 | sp = False 30 | '''Initialize environment''' 31 | env = dynetworkEnv() 32 | '''Specify list of network loads to test''' 33 | network_load = np.arange(2000, 2500, 500) 34 | print("Network load for test: ", network_load) 35 | for i in network_load: 36 | if i <= 0: 37 | print("Error: Network load must be positive.") 38 | exit() 39 | if i >= env.nnodes*env.max_queue: 40 | print("Error: Network load cannot exceed nodes times max queue size.") 41 | env.reset(max(network_load), False) 42 | if learn_method == "Q-learning": 43 | agent = QAgent(env.dynetwork) 44 | elif learn_method == "Multi-Q-learning": 45 | agent = Multi_QAgent(env.dynetwork) 46 | else: 47 | print("No assigned algorithm") 48 | sys.exit() 49 | print("Algorithm is ", learn_method) 50 | 51 | '''Performance Measures for Q-Learning While Learning''' 52 | avg_deliv_learning = [] 53 | avg_q_len_learning = [] 54 | delivery_ratio = [] 55 | 56 | # In each episode, update the network for time_steps times. In each time step, update the whole network, which means 57 | # 1.update edges 2.generate packet 3.update queue 4. update packet time in queue 5.route all nodes. 58 | '''----------------------LEARNING PROCESS--------------------------''' 59 | for i_episode in range(numEpisode): 60 | print("---------- Episode:", i_episode+1," ----------") 61 | step = [] 62 | deliveries = [] 63 | '''iterate each time step try to finish routing within time_steps''' 64 | for t in range(time_steps): 65 | '''key function that obtain action and update Q-table''' 66 | env.updateWhole(agent, rewardfun=rewardfunction) 67 | 68 | '''store atributes for performance measures''' 69 | step.append(t) 70 | deliveries.append(copy.deepcopy(env.dynetwork._deliveries)) 71 | 72 | if (env.dynetwork._deliveries >= (env.npackets + env.dynetwork._max_initializations)): 73 | print("done!") 74 | break 75 | 76 | '''Save all performance measures''' 77 | avg_deliv_learning.append(env.calc_avg_delivery()) 78 | avg_q_len_learning.append(np.average(env.dynetwork._avg_q_len_arr)) 79 | delivery_ratio.append(env.dynetwork._deliveries/max(network_load)) 80 | print("end to end delay: ", env.calc_avg_delivery()) 81 | print("delivery ratio: ", env.dynetwork._deliveries/max(network_load)) 82 | print("average queue length: ", np.average(env.dynetwork._avg_q_len_arr)) 83 | 84 | env.reset(max(network_load), False) # Use the max network load to learn 85 | 86 | script_dir = os.path.dirname(__file__) 87 | results_dir = os.path.join(script_dir, 'plots/') 88 | if not os.path.isdir(results_dir): 89 | os.makedirs(results_dir) 90 | learn_results_dir = os.path.join(script_dir, 'plots/learnRes/') 91 | if not os.path.isdir(learn_results_dir): 92 | os.makedirs(learn_results_dir) 93 | 94 | '''Produces plots while learning ''' 95 | print("**********Learning result per episode**********") 96 | if learning_plot: 97 | print("Average Delivery Time") 98 | print(avg_deliv_learning) 99 | plt.clf() 100 | plt.title("Average Delivery Time Per Episode") 101 | plt.plot(list(range(1, numEpisode + 1)), avg_deliv_learning) 102 | plt.xlabel('Episode') 103 | plt.ylabel('Delay') 104 | plt.savefig(learn_results_dir + "delay.png") 105 | np.save("avg_deliv_learning", avg_deliv_learning) 106 | plt.clf() 107 | 108 | print("Average Queue Length") 109 | print(avg_q_len_learning) 110 | plt.clf() 111 | plt.title("Average Num of Pkts a Node Hold Per Episode") 112 | plt.plot(list(range(1, numEpisode + 1)), avg_q_len_learning) 113 | plt.xlabel('Episode') 114 | plt.ylabel('Average Number of Packets being hold by a Node') 115 | plt.savefig(learn_results_dir + "avg_q_len_learning.png") 116 | np.save("avg_q_len_learning", avg_q_len_learning) 117 | plt.clf() 118 | 119 | print("Delivery ratio") 120 | print(delivery_ratio) 121 | plt.clf() 122 | plt.title("Delivery Ratio Per Episode") 123 | plt.plot(list(range(1, numEpisode + 1)), delivery_ratio) 124 | plt.xlabel("Episode") 125 | plt.ylabel("Delivery Ratio") 126 | plt.savefig(learn_results_dir + "delivery_ratio.png") 127 | np.save("delivery_ratio", delivery_ratio) 128 | plt.clf() 129 | print("**********End of learning result**********") 130 | 131 | 132 | '''--------------------------TESTING PROCESS--------------------------''' 133 | '''Performance Measures for Q-Learning''' 134 | avg_deliv = [] 135 | avg_q_len = [] 136 | delivery_ratio = [] 137 | 138 | for i in range(len(network_load)): 139 | curLoad = network_load[i] 140 | 141 | print("---------- Testing Load of ", curLoad," ----------") 142 | for currTrial in range(trials): 143 | env.reset(curLoad, True) 144 | 145 | step = [] 146 | deliveries = [] 147 | 148 | '''iterate each time step try to finish routing within time_steps''' 149 | for t in range(time_steps): 150 | 151 | total = env.npackets + env.dynetwork._max_initializations 152 | q_done = (env.dynetwork._deliveries >= total) 153 | if sp: 154 | s_done = (env.dynetwork.sp_deliveries >= total) 155 | else: 156 | s_done = True 157 | env.updateWhole(agent, q=not q_done, sp=not s_done, rewardfun=rewardfunction, savesteps=False) 158 | 159 | if q_done and s_done: 160 | print("Finished trial ", currTrial) 161 | break 162 | 163 | '''STATS MEASURES''' 164 | avg_deliv.append(env.calc_avg_delivery()) 165 | avg_q_len.append(np.average(env.dynetwork._avg_q_len_arr)) 166 | delivery_ratio.append(env.dynetwork._deliveries/curLoad) 167 | 168 | '''Plot of test''' 169 | print("**********Test result**********") 170 | print("Delay") 171 | print(avg_deliv) 172 | print("Average queue length") 173 | print(avg_q_len) 174 | print("Delivery ratio") 175 | print(delivery_ratio) 176 | print("**********End of test result**********") 177 | -------------------------------------------------------------------------------- /update_edges.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import math 4 | 5 | ''' Functions to handle edges in our network. ''' 6 | 7 | ''' Randomly deletes some number of edges between min_edge_removal and max_edge_removal ''' 8 | def Delete(dyNetwork, min_edge_removal, max_edge_removal): 9 | edges = dyNetwork._network.edges() 10 | deletion_number = random.randint(min_edge_removal, min(max_edge_removal, len(edges) - 1)) 11 | strip = random.sample(edges, k=deletion_number) 12 | temp = [] 13 | for s_edge, e_edge in strip: 14 | temp.append((s_edge,e_edge,dyNetwork._network[s_edge][e_edge])) 15 | strip = temp 16 | dyNetwork._network.remove_edges_from(strip) 17 | dyNetwork._stripped_list.extend(strip) 18 | 19 | 20 | ''' Randomly restores some edges we have deleted ''' 21 | def Restore(dyNetwork): 22 | restore_number = random.randint(0, len(dyNetwork._stripped_list)) 23 | restore = random.choices(dyNetwork._stripped_list, k=restore_number) 24 | dyNetwork._network.add_edges_from(restore) 25 | 26 | 27 | ''' Randomly change edge weights ''' 28 | # edge weight is actually edge delay 29 | def Random_Walk(dyNetwork): 30 | for s_edge, e_edge in dyNetwork._network.edges(): 31 | try: 32 | changed = random.randint(-2, 2) + dyNetwork._network[s_edge][e_edge]['edge_delay'] 33 | dyNetwork._network[s_edge][e_edge]['edge_delay'] = max(changed, 1) 34 | except: 35 | print(s_edge, e_edge) 36 | 37 | 38 | ''' Change edge weights so that the edge weight changes will be roughly sinusoidal across the simulation ''' 39 | # Why sine_state step is pi/6? Maybe it is just a super parameter. 40 | def Sinusoidal(dyNetwork): 41 | for s_edge, e_edge in dyNetwork._network.edges(): 42 | dyNetwork._network[s_edge][e_edge]['edge_delay'] = max(1, int(dyNetwork._network[s_edge][e_edge]['initial_weight']* (1 + 0.5 * math.sin(dyNetwork._network[s_edge][e_edge]['sine_state'])))) 43 | dyNetwork._network[s_edge][e_edge]['sine_state'] += math.pi/6 44 | 45 | 46 | ''' Not in use. If it were used the edge weight would be the average of the number of packets in each queue of the endpoints of the edge. ''' 47 | def Average(dyNetwork): 48 | for node1, node2 in dyNetwork._network.edges(data = False): 49 | tot_queue1 = dyNetwork._network.nodes[node1]['sending_queue'] 50 | tot_queue2 = dyNetwork._network.nodes[node2]['sending_queue'] 51 | avg = np.avg([tot_queue1, tot_queue2]) 52 | dyNetwork._network[node1][node2]['edge_delay'] = avg 53 | del tot_queue1, tot_queue2 54 | 55 | 56 | ''' Judge whether two nodes are in connection depending on the positions and communication radius''' 57 | def calculate_nodes_connection(dyNetwork, radius): 58 | for i in range(dyNetwork._network.number_of_nodes()): 59 | for j in range(i, dyNetwork._network.number_of_nodes()): 60 | if i != j: 61 | pos1 = dyNetwork._network.nodes[i]['pos'] 62 | pos2 = dyNetwork._network.nodes[j]['pos'] 63 | if math.sqrt(math.pow((pos1[0]-pos2[0]), 2) +math.pow((pos1[1]-pos2[1]), 2)) <= radius: 64 | dyNetwork._network.add_edge(i, j) 65 | 66 | # TODO: Here we set the edge delay as 1 temporarily 67 | dyNetwork._network[i][j]['edge_delay'] = 1 68 | else: 69 | if dyNetwork._network.has_edge(i, j): 70 | dyNetwork._network.remove_edge(i, j) 71 | --------------------------------------------------------------------------------