├── .idea ├── .gitignore ├── externalDependencies.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── uav_swarm_learning_preliminary.iml └── vcs.xml ├── Agent ├── Agent.py └── __init__.py ├── Config.py ├── Dockerfile ├── Drone.py ├── LICENSE ├── Model ├── Model.py └── __init__.py ├── Program.py ├── README.md ├── Tests ├── ProgramTests.py └── __init__.py └── data.json /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/externalDependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/uav_swarm_learning_preliminary.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Agent/Agent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import Config 3 | 4 | 5 | class Agent: 6 | 7 | def __init__(self, name, number, autonomy_time, speed, minimum_image_size, position, environment_matrix): 8 | self.__status = 'start' 9 | self.__model = None 10 | if not Config.GLOBAL_MODEL: 11 | import numpy as np 12 | input_matrix = np.dstack((environment_matrix, environment_matrix, environment_matrix)) 13 | from Model.Model import create_model 14 | self.__model = create_model(input_matrix.shape) 15 | self.__number = number # maybe it is better to create random color: tuple(np.random.randint(256, size=3)) 16 | self.__name = name 17 | self.__autonomy_time = autonomy_time 18 | self.__time_move_region_horizontal = float(minimum_image_size[0]) / float(speed) 19 | self.__time_move_region_vertical = float(minimum_image_size[1]) / float(speed) 20 | self.__position = position 21 | self.__movements = [] 22 | self.__valid_actions = [] 23 | self.__memory = [] 24 | self.__reward = 0 25 | self.__cycle_count = 0 26 | self.__actions_taken = 0 27 | self.__valid_taken_actions = 0 28 | 29 | # Getters and setters 30 | def get_name(self): 31 | return self.__name 32 | 33 | def get_status(self): 34 | return self.__status 35 | 36 | def get_time_move_region_lateral(self): 37 | return self.__time_move_region_horizontal 38 | 39 | def get_time_move_region_vertical(self): 40 | return self.__time_move_region_vertical 41 | 42 | def get_autonomy_time(self): 43 | return self.__autonomy_time 44 | 45 | def get_number(self): 46 | return self.__number 47 | 48 | def get_reward(self): 49 | return self.__reward 50 | 51 | def get_position(self): 52 | return self.__position 53 | 54 | def set_position(self, new_position): 55 | self.__position = new_position 56 | 57 | def set_movements(self, movement_list): 58 | self.__movements = movement_list 59 | 60 | def set_status(self, status): 61 | self.__status 62 | 63 | def get_model(self): 64 | return self.__model 65 | 66 | def get_actions_taken(self): 67 | return self.__actions_taken 68 | 69 | def set_actions_taken(self, number): 70 | self.__actions_taken = number 71 | 72 | def get_valid_taken_actions(self): 73 | return self.__valid_taken_actions 74 | 75 | def set_valid_taken_actions(self, number): 76 | self.__valid_taken_actions = number 77 | 78 | def increase_valid_taken_actions(self): 79 | self.__valid_taken_actions += 1 80 | 81 | # Other methods 82 | def reset_movements(self): 83 | self.__movements = [] 84 | 85 | def decrease_autonomy_time(self, amount): 86 | self.__autonomy_time -= amount 87 | 88 | def compute_valid_actions(self, environment): 89 | # Get total number of columns 90 | (rows, columns) = environment.shape 91 | 92 | # Get actual position 93 | (row, column) = self.__position 94 | 95 | actions = [] # Possible actions list 96 | import numpy as np 97 | if column - 1 >= 0 and np.round(environment[row, column - 1]) > 0: 98 | # left 99 | actions.append(0) 100 | if row - 1 >= 0 and np.round(environment[row - 1, column]) > 0: 101 | # up 102 | actions.append(1) 103 | if column + 1 < columns and np.round(environment[row, column + 1]) > 0: 104 | # right 105 | actions.append(2) 106 | if row + 1 < rows and np.round(environment[row + 1, column]) > 0: 107 | # down 108 | actions.append(3) 109 | 110 | self.__valid_actions = actions 111 | 112 | def do_action(self, chosen_action, environment, prev_visited_map, prev_agent_map): 113 | # Update number of actions taken 114 | self.__actions_taken += 1 115 | 116 | # Copy values for updating 117 | new_agent_map = prev_agent_map.copy() 118 | new_visited_map = prev_visited_map.copy() 119 | 120 | # Get valid actions agent can do 121 | self.compute_valid_actions(environment) 122 | 123 | # Update environment and agent's position 124 | prev_autonomy = self.__autonomy_time 125 | if chosen_action in self.__valid_actions: 126 | new_agent_map[self.__position[0], self.__position[1]] = False 127 | 128 | # Store all valid chosen movements for printing it 129 | self.__movements.append(Config.ACTIONS_DICT[chosen_action]) 130 | 131 | (row, col) = self.__position 132 | if chosen_action == 0: # left 133 | (row, col) = (row, col - 1) 134 | if chosen_action == 1: # up 135 | (row, col) = (row - 1, col) 136 | if chosen_action == 2: # right 137 | (row, col) = (row, col + 1) 138 | if chosen_action == 3: # down 139 | (row, col) = (row + 1, col) 140 | self.__position = (row, col) 141 | # environment[row, col] = self.__number 142 | if prev_visited_map[row, col]: 143 | self.__cycle_count += 1 144 | self.__reward = Config.VISITED_CELL_REWARD - float(self.__cycle_count) / float(Config.ENVIRONMENT_ROWS * Config.ENVIRONMENT_COLUMNS) 145 | else: 146 | import numpy as np 147 | # Reward is increased when there are less remaining new cells and added random component in order to 148 | # emulate Skinners variable rewards, so each UAV will have different rewards for new cells and it is 149 | # less possible to go to the same cell as another uav in the same iteraction 150 | self.__reward = Config.NEW_CELL_REWARD * (1.0 + max(Config.ENVIRONMENT_ROWS, 151 | Config.ENVIRONMENT_COLUMNS) / np.count_nonzero( 152 | new_visited_map == False)) 153 | 154 | new_visited_map[row, col] = True # In future asign it to agent's number 155 | new_agent_map[row, col] = True 156 | 157 | else: 158 | self.__reward = Config.NO_CELL_REWARD 159 | 160 | # Return tuple: (updated environment, penalization) 161 | return new_visited_map, new_agent_map, self.__reward 162 | 163 | def memorize(self, observation): 164 | # Update agent memory 165 | import Config 166 | self.__memory.append(observation) 167 | while len(self.__memory) > Config.MEMORY_SIZE: # Forget old data if memory is full 168 | self.__memory.pop(0) 169 | 170 | # --------------- Multiple models --------------- 171 | 172 | def prepare_data(self, environment): 173 | # Prepare data for training 174 | import Config 175 | import numpy as np 176 | aux = np.array([(self.__memory[0][0], self.__memory[0][0])]) 177 | env_size = aux.shape 178 | data_size = Config.MEMORY_SIZE 179 | mem_size = len(self.__memory) # in case we didn't use all memory 180 | data_size = min(mem_size, data_size) 181 | inputs1 = [] 182 | inputs2 = [] 183 | outputs = np.zeros((data_size, len(Config.ACTIONS_DICT.keys())), 184 | dtype=float) # We have len(Config.ACTIONS_DICT.keys() actions 185 | 186 | # For each observation at memory 187 | for i, j in enumerate(np.random.choice(range(mem_size), data_size, replace=False)): 188 | # i = ordered memory position, j = random memory position ===> more generalization if ANN is 189 | # trained without order 190 | 191 | # Get observation i from memory 192 | prev_visited_map, actual_visited_map, prev_agent_map, actual_agent_map, chosen_action, reward, status = \ 193 | self.__memory[j] 194 | 195 | # Convert data to int for training ANN 196 | prev_visited_map = np.array(prev_visited_map, dtype=int) 197 | prev_agent_map = np.array(prev_agent_map, dtype=int) 198 | actual_visited_map = np.array(actual_visited_map, dtype=int) 199 | actual_agent_map = np.array(actual_agent_map, dtype=int) 200 | 201 | # Save observation i if it has not been saved before 202 | if len(inputs1) <= i: 203 | inputs1.append(prev_visited_map) # Visited positions as inputs 204 | inputs2.append(prev_agent_map) # Agents positions as inputs 205 | 206 | # Save target values. Theoretically, chosen/taken action will have non-zero values, remain actions' 207 | # values are 0. There should be no target values (0) for actions not taken. 208 | outputs[i] = self.predict(prev_visited_map, prev_agent_map, environment) 209 | 210 | # Compute max expected Q value 211 | predicted_q_values = self.predict(actual_visited_map, actual_agent_map, environment) 212 | max_q_sa = np.argmax(predicted_q_values) 213 | 214 | # Apply Q-function 215 | if status == 'finish': 216 | outputs[i, chosen_action] = reward 217 | else: 218 | outputs[i, chosen_action] = reward + Config.GAMMA * max_q_sa 219 | return inputs1, inputs2, outputs 220 | 221 | def learn(self, environment): 222 | import Config 223 | inputs1, inputs2, outputs = self.prepare_data(environment) 224 | ann_input = [] 225 | for t in zip(inputs1, inputs2): # For each observation 226 | import numpy as np 227 | r, c = t[0].shape 228 | ann_input.append(np.dstack((t[0], t[1], np.zeros((r, c), float)))) # 3D matrix with data 229 | 230 | # Fit ANN 231 | ann_input = np.asarray(ann_input) 232 | history = self.__model.fit([ann_input], outputs, epochs=Config.EPOCHS, batch_size=Config.BATCH_SIZE, 233 | verbose=Config.VERBOSE) 234 | return history 235 | 236 | def predict(self, environment1, environment2, environment3): 237 | import numpy as np 238 | ann_input = np.dstack((environment1, environment2, environment3)) # 3D matrix with data 239 | 240 | # Predict Q-table values 241 | return self.__model.predict(np.array([ann_input]))[0] # [0] only with Keras 242 | 243 | # --------------- Global Model --------------- 244 | 245 | def learn_global_model(self, environment, model): 246 | import Config 247 | inputs1, inputs2, outputs = self.prepare_data_global_model(environment, model) 248 | ann_input = [] 249 | for t in zip(inputs1, inputs2): # For each observation 250 | import numpy as np 251 | r, c = t[0].shape 252 | ann_input.append(np.dstack((t[0], t[1], np.zeros((r, c), float)))) # 3D matrix with data 253 | 254 | # Fit ANN 255 | history = model.fit([ann_input], outputs, epochs=Config.EPOCHS, batch_size=Config.BATCH_SIZE, 256 | verbose=Config.VERBOSE) 257 | return history 258 | 259 | def prepare_data_global_model(self, environment, model): 260 | # Prepare data for training 261 | import Config 262 | import numpy as np 263 | aux = np.array([(self.__memory[0][0], self.__memory[0][0])]) 264 | env_size = aux.shape 265 | data_size = Config.MEMORY_SIZE 266 | mem_size = len(self.__memory) # in case we didn't use all memory 267 | data_size = min(mem_size, data_size) 268 | inputs1 = [] 269 | inputs2 = [] 270 | outputs = np.zeros((data_size, len(Config.ACTIONS_DICT.keys())), 271 | dtype=float) # We have len(Config.ACTIONS_DICT.keys() actions 272 | 273 | # For each observation at memory 274 | for i, j in enumerate(np.random.choice(range(mem_size), data_size, replace=False)): 275 | # i = ordered memory position, j = random memory position ===> more generalization if ANN is 276 | # trained without order 277 | 278 | # Get observation i from memory 279 | prev_visited_map, actual_visited_map, prev_agent_map, actual_agent_map, chosen_action, reward, status = \ 280 | self.__memory[j] 281 | 282 | # Convert data to int for training ANN 283 | prev_visited_map = np.array(prev_visited_map, dtype=int) 284 | prev_agent_map = np.array(prev_agent_map, dtype=int) 285 | actual_visited_map = np.array(actual_visited_map, dtype=int) 286 | actual_agent_map = np.array(actual_agent_map, dtype=int) 287 | 288 | # Save observation i if it has not been saved before 289 | if len(inputs1) <= i: 290 | inputs1.append(prev_visited_map) # Visited positions as inputs 291 | inputs2.append(prev_agent_map) # Agents positions as inputs 292 | 293 | # Save target values. Theoretically, chosen/taken action will have non-zero values, remain actions' 294 | # values are 0. There should be no target values (0) for actions not taken. 295 | outputs[i] = self.predict_global_model(prev_visited_map, prev_agent_map, environment, model) 296 | 297 | # Compute max expected Q value 298 | predicted_q_values = self.predict_global_model(actual_visited_map, actual_agent_map, environment, model) 299 | max_q_sa = np.argmax(predicted_q_values) 300 | 301 | # Apply Q-function 302 | if status == 'finish': 303 | outputs[i, chosen_action] = reward 304 | else: 305 | outputs[i, chosen_action] = reward + Config.GAMMA * max_q_sa 306 | return inputs1, inputs2, outputs 307 | 308 | def predict_global_model(self, environment1, environment2, environment3, model): 309 | import numpy as np 310 | r, c = environment1.shape 311 | ann_input = np.dstack((environment1, environment2, environment3)) # 3D matrix with data 312 | 313 | # Predict Q-table values 314 | return model.predict(np.array([ann_input]))[0] # [0] only with Keras 315 | -------------------------------------------------------------------------------- /Agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheMVS/uav_swarm_reinforcement_learning/1f61b2b74b5575372928fe4e53a53b1e6cb630ea/Agent/__init__.py -------------------------------------------------------------------------------- /Config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | SEED = None # None if we want a random seed or a number if we want to specify a seed 4 | 5 | LOAD_MAP_FILE = False # For loading map from a file 6 | 7 | # FILES PATH 8 | BASE_ROUTE = './' 9 | DATA_ROUTE = 'data.json' 10 | MAP_ROUTE = 'map.txt' 11 | EPSILONS_ROUTE = 'epsilons.csv' 12 | SIMULATIONS_TIMES_ROUTE = 'simulations_times.csv' 13 | SIMULATIONS_REWARDS = 'simulations_rewards.csv' 14 | SIMULATIONS_COVERAGES = 'simulations_coverages.csv' 15 | SIMULATION_COVERAGES = 'simulation_coverages.csv' 16 | 17 | # Episode configuration 18 | SIMULATIONS = 500 19 | PRINT_SIMULATIONS = False # Print for checkpoint on console 20 | SIMULATIONS_CHECKPOINT = 100 # Number of episodes that must happen before printing checkpoint 21 | 22 | # Environment configuration 23 | ENVIRONMENT_ROWS = 9 24 | ENVIRONMENT_COLUMNS = 9 25 | SQUARE = True # Make environment polygon squared 26 | START_CORNER_0_0 = True 27 | 28 | # Agent Configuration 29 | ACTIONS_DICT = { 30 | 0: 'left', 31 | 1: 'up', 32 | 2: 'right', 33 | 3: 'down', 34 | } 35 | NEW_CELL_REWARD = 358.736076826821 36 | VISITED_CELL_REWARD = -31.1376955791041 37 | NO_CELL_REWARD = -225.171111437135 38 | 39 | # Learning process configuration 40 | GLOBAL_MODEL = False 41 | EPSILON = 0.468937067929711 # 0.498082374999 42 | MIN_EPSILON = 0.05 43 | GAMMA = 0.902865796260127 44 | MEMORY_SIZE = 60 45 | BATCH_SIZE = 63 46 | EPOCHS = 2 47 | VERBOSE = 0 48 | EPSILON_DECAY = 0.929049010143763 49 | 50 | # Model 51 | DENSE1_SIZE = 167 52 | DENSE1_ACTIVATION = 'linear' 53 | DENSE2_SIZE = len(ACTIONS_DICT) 54 | DENSE2_ACTIVATION = 'softmax' 55 | OPTIMIZER = 'RMSprop' 56 | 57 | # Early experiment stopping 58 | MAXIMUM_WAIT_HOURS = 0.5 59 | COMPLETENESS_COVERAGE = 1.0 # float between 0 and 1 60 | MAXIMUM_UNCHANGED_ENVIRONMENT_EPISODES = 9999999999 61 | 62 | # Unit tests 63 | UNIT_TESTS = False -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | #Download base image ubuntu 20.04 2 | FROM ubuntu:20.04 3 | 4 | # Author information 5 | LABEL maintainer="a.puentec@udc.es" 6 | LABEL version="1.0" 7 | LABEL description="UAV SWARM PATH PLANNING WITH \ REINFORCEMENT LEARNING FOR FIELD PROSPECTING: \ PRELIMINARY RESULTS" 8 | 9 | # Set root 10 | USER root 11 | 12 | # Disable Prompt During Packages Installation 13 | ARG DEBIAN_FRONTEND=noninteractive 14 | 15 | # Update Ubuntu Software repository 16 | RUN apt update 17 | 18 | # Install pip 19 | RUN apt install -y python3-pip vim 20 | 21 | COPY . main/ 22 | 23 | # Install Python packages and add py file 24 | RUN pip3 install numpy==1.19.2 shapely tensorflow keras matplotlib 25 | CMD ["python3", "Program.py"] 26 | CMD ["vim", "Config.py"] 27 | CMD ["vim", "data.json"] 28 | 29 | WORKDIR main/ -------------------------------------------------------------------------------- /Drone.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | class Drone: 3 | __name = '' 4 | __battery_time = 0 # minutes 5 | __speed = 0 # m/s 6 | __image_size = (0, 0) # (width, height) 7 | __height = 0 # m 8 | 9 | def __init__(self, name, battery_time, speed, image_size, height, image_angle): 10 | self.__name = name 11 | self.__battery_time = battery_time 12 | self.__speed = speed 13 | self.__image_size = image_size 14 | self.__height = height 15 | self.__image_angle = image_angle 16 | 17 | def get_name(self): 18 | return self.__name 19 | 20 | def get_battery_time(self): 21 | return self.__battery_time 22 | 23 | def get_speed(self): 24 | return self.__speed 25 | 26 | def get_image_size(self): 27 | return self.__image_size 28 | 29 | def get_height(self): 30 | return self.__height 31 | 32 | def get_image_angle(self): 33 | return self.__image_angle 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 TheMVS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Model/Model.py: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | 4 | def create_model(input_shape): 5 | from keras import Input, models 6 | from keras.layers import Flatten, Dense 7 | 8 | y = Input(shape=input_shape) 9 | z = Flatten()(y) 10 | z = Dense(Config.DENSE1_SIZE, activation=Config.DENSE1_ACTIVATION)(z) 11 | z = Dense(Config.DENSE2_SIZE, activation=Config.DENSE2_ACTIVATION)(z) 12 | 13 | # our model will accept the inputs of the two branches and 14 | # then output a single value 15 | model = models.Model(y, z) 16 | 17 | model.compile(optimizer=Config.OPTIMIZER, loss='mse') 18 | model.summary() 19 | return model -------------------------------------------------------------------------------- /Model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheMVS/uav_swarm_reinforcement_learning/1f61b2b74b5575372928fe4e53a53b1e6cb630ea/Model/__init__.py -------------------------------------------------------------------------------- /Program.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import Config 4 | import cProfile 5 | import re 6 | 7 | cProfile.run('re.compile("foo|bar")') 8 | 9 | 10 | class Program: 11 | 'Common base class for all programs' 12 | __drones = [] 13 | __agents = [] 14 | __points = [] 15 | __original_environment = None 16 | __operator_position = None 17 | __drone_initial_position = (0, 0) 18 | 19 | def __init__(self): 20 | return 21 | 22 | # Getters and setters 23 | def get_drones(self): 24 | return self.__drones 25 | 26 | def get_points(self): 27 | return self.__points 28 | 29 | def get_agents(self): 30 | return self.__agents 31 | 32 | def get_environment(self): 33 | return self.__original_environment 34 | 35 | def set_environment(self, environment): 36 | self.__original_environment = environment 37 | 38 | # Other methods 39 | def normalize_coordinate_value(self, value): 40 | # Shapely has float point precision errors 41 | min = -90.0 42 | max = 90.0 43 | 44 | num = float(value) - min 45 | denom = max - min 46 | 47 | return num / denom 48 | 49 | def denormalize_coordinate_value(self, norm_value): 50 | # Shapely has float point precision errors 51 | min = -90.0 52 | max = 90.0 53 | 54 | denom = max - min 55 | 56 | return float(norm_value) * denom + min 57 | 58 | def read_data(self): 59 | # Load data from JSON 60 | import json 61 | from Drone import Drone 62 | 63 | with open(Config.BASE_ROUTE + Config.DATA_ROUTE) as json_file: 64 | data = json.load(json_file) 65 | self.__drones = [] 66 | for d in data['drones']: # Drones info 67 | self.__drones.append( 68 | Drone(d['name'].replace(" ", "_"), d['battery_time'], d['speed'], 69 | (d['image_size']['w'], d['image_size']['h']), 70 | d['height'], d['image_angle'])) 71 | 72 | self.__points = [] 73 | for p in data['points']: # Map info 74 | self.__points.append( 75 | (self.normalize_coordinate_value(p['lat']), self.normalize_coordinate_value(p['long']))) 76 | self.__agents = [] 77 | 78 | from shapely.geometry import Point # Operator's info 79 | self.__operator_position = Point((self.normalize_coordinate_value(data['operator_position']['lat']), 80 | self.normalize_coordinate_value(data['operator_position']['long']))) 81 | 82 | def compute_minimum_area(self, drones): 83 | # Get drones minimum image area (supposing a triangle composed of two rectangle triangles) 84 | areas = [] 85 | for drone in drones: 86 | import numpy as np 87 | a = drone.get_height() 88 | A = np.deg2rad(drone.get_image_angle() / 2.0) 89 | C = np.deg2rad(90.0) 90 | B = np.deg2rad(180.0 - 90 - (drone.get_image_angle() / 2.0)) 91 | b = a * np.sin(C) / np.sin(B) 92 | c = a * np.sin(A) / np.sin(B) 93 | 94 | image_width = c * 2.0 95 | image_height = drone.get_image_size()[1] * (image_width / drone.get_image_size()[0]) 96 | 97 | areas.append((image_width * image_height, (image_width, image_height))) 98 | return min(areas, key=lambda t: t[0])[1] 99 | 100 | def compute_environment(self): 101 | drones = self.__drones 102 | points = self.__points 103 | 104 | # 1.- Get polygon giving a list of points 105 | from shapely.geometry import Polygon 106 | polygon = Polygon(points) 107 | import matplotlib 108 | matplotlib.use('Agg') 109 | import matplotlib.pyplot as plt 110 | plt.plot(*polygon.exterior.xy) # Only for Python 3 111 | plt.savefig(Config.BASE_ROUTE + 'field_polygon.png') 112 | plt.clf() 113 | 114 | # 2.- Get minimum bounding rectangle 115 | # 2.1.- We need coordinates closest to south (min_x), north (max_x), west (min_y) and east (max_y) 116 | min_x = min(points, key=lambda t: t[0])[0] 117 | max_x = max(points, key=lambda t: t[0])[0] 118 | min_y = min(points, key=lambda t: t[1])[1] 119 | max_y = max(points, key=lambda t: t[1])[1] 120 | 121 | # 2.2.- Get number of squares verticaly (num_v) and horizontaly (num_h) giving drones' minimum image rectangle 122 | import math 123 | num_v = Config.ENVIRONMENT_ROWS 124 | num_h = Config.ENVIRONMENT_COLUMNS 125 | 126 | # 3.3.- Create a numpy matrix with a cell for each image square 127 | import numpy as np 128 | environment = np.zeros((num_h, num_v)) 129 | 130 | # 3.4.- Get coordinates deltas for computing points 131 | d_v = (max_y - min_y) / num_v 132 | d_h = (max_x - min_x) / num_h 133 | 134 | # 3.4 Get original operator's point 135 | from shapely.ops import nearest_points 136 | closest_point = nearest_points(polygon.exterior, self.__operator_position)[0] 137 | 138 | # 3.5.- Check visitable squares as 1 139 | import itertools 140 | for (i, j) in itertools.product(list(range(num_v)), list(range(num_h))): # i: [0, num_v-1], j: [0, num_h-1] 141 | sp1 = (j * d_h + min_x, (num_v - i) * d_v + min_y) 142 | sp2 = ((j + 1) * d_h + min_x, (num_v - i) * d_v + min_y) 143 | sp3 = (j * d_h + min_x, (num_v - (i + 1)) * d_v + min_y) 144 | sp4 = ((j + 1) * d_h + min_x, (num_v - (i + 1)) * d_v + min_y) 145 | square = Polygon([sp1, sp2, sp4, sp3]) 146 | 147 | if Config.SQUARE: 148 | environment[num_h - (j + 1), num_v - (i + 1)] = 1.0 # Marked as navigable square 149 | 150 | if polygon.intersects(square.buffer(1e-9)) or polygon.contains(square.buffer(1e-9)): 151 | 152 | if not Config.SQUARE: 153 | environment[num_h - (j + 1), num_v - (i + 1)] = 1.0 # Marked as navigable square 154 | 155 | if Config.START_CORNER_0_0 and Config.SQUARE: 156 | self.__drone_initial_position = (0, 0) 157 | elif closest_point.within(square) or closest_point.intersects(square): 158 | self.__drone_initial_position = ( 159 | num_h - (j + 1), num_v - (i + 1)) # Set operator's position as initial position 160 | 161 | self.__original_environment = environment 162 | 163 | import numpy as np 164 | np.savetxt(Config.BASE_ROUTE + Config.MAP_ROUTE, environment) 165 | 166 | import matplotlib 167 | matplotlib.use('Agg') # For running in SO without graphical environment 168 | import matplotlib.pyplot as plt 169 | from matplotlib.ticker import MaxNLocator 170 | ax = plt.figure().gca() 171 | ax.invert_yaxis() 172 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 173 | ax.yaxis.set_major_locator(MaxNLocator(integer=True)) 174 | computed_environment = environment.copy() 175 | computed_environment[self.__drone_initial_position] = 3 176 | ax.pcolor(computed_environment, cmap='Greys', edgecolors='gray') 177 | plt.savefig(Config.BASE_ROUTE + 'computed_environment.png') 178 | plt.clf() 179 | 180 | return environment 181 | 182 | def reset(self): 183 | # Reset environment and agents position 184 | for drone_number in range(len(self.__drones)): 185 | self.__agents[drone_number].set_position(self.__drone_initial_position) 186 | self.__agents[drone_number].reset_movements() 187 | self.__agents[drone_number].set_actions_taken(0) 188 | self.__agents[drone_number].set_valid_taken_actions(0) 189 | 190 | def save_agents_movements(self, done_count): 191 | for agent in self.__agents: 192 | with open(Config.BASE_ROUTE + 'drones_movement_agent' + str(agent.get_number() * 10) + '_done' + str( 193 | done_count) + '_.txt', 194 | 'w') as f: 195 | f.write(agent.get_name() + ", value %s: " % movement) 196 | for movement in agent.get_movements: 197 | f.write("%s, " % movement) 198 | f.write('\n') 199 | 200 | def compute_path(self): 201 | count = 0 202 | from Agent.Agent import Agent 203 | self.__agents = [] 204 | for drone in program.get_drones(): # Create Reinforcement Learning Agents 205 | self.__agents.append( 206 | Agent(drone.get_name(), count, drone.get_battery_time(), 207 | drone.get_speed(), 208 | program.compute_minimum_area(self.__drones), (0, 0), self.__original_environment)) 209 | 210 | count += 1 211 | 212 | # Get number of observation episodes 213 | number_episodes = Config.SIMULATIONS 214 | 215 | import time 216 | global_execution_start_time = time.time() 217 | start_number = 0 218 | done_count = 0 # Number of times problem has been solved 219 | 220 | # Get epsilon 221 | epsilon = Config.EPSILON 222 | 223 | # Save epsilon for plotting 224 | epsilons = [epsilon] 225 | 226 | # Total repetitions in all episodes 227 | total_unchanged_environment_episodes_count = 0 228 | 229 | # Maximum coverage overall 230 | max_coverage = 0.0 231 | 232 | # Max coverage lists for plotting for the whole experiment 233 | max_coverages = [] 234 | 235 | # Simulations' times 236 | episodes_time = [] 237 | 238 | # Simulations' total rewards 239 | rewards_episodes = [] 240 | 241 | # Store total actions taken per observation 242 | episode_total_actions = [] 243 | episode_total_valid_actions = [] 244 | 245 | valid_actions_taken_agent = [] 246 | 247 | # Compute episodes 248 | for episode_number in range(start_number, number_episodes): 249 | 250 | # Reset agents and environment 251 | program.reset() 252 | 253 | # Update heatmap 254 | heatmap = self.get_environment() * 0.0 255 | for element in self.__agents: 256 | (x, y) = element.get_position() 257 | heatmap[x][y] += 1.0 258 | 259 | # Add minimum max coverage 260 | max_coverages.append(0.0) 261 | 262 | # Add max coverage observation 263 | coverages_episode = [0.0] 264 | 265 | # Reset unchanged environments count 266 | unchanged_environment_episodes_count = 0 267 | 268 | # Create ANN if necessary 269 | if (Config.GLOBAL_MODEL): 270 | from numpy import dstack 271 | input_matrix = dstack((self.get_environment(), self.get_environment(), self.get_environment())) 272 | from Model.Model import create_model 273 | model = create_model(input_matrix.shape) 274 | 275 | # Get initial environment for starting observation 276 | actual_environment = program.get_environment() 277 | 278 | # Get visited positions map and agent position map 279 | import numpy as np 280 | actual_visited_map = np.array(actual_environment * 0.0, dtype=bool) # Changed to bool for first experiments 281 | drone_map = np.array(actual_environment * 0.0, dtype=bool) # Changed to bool for first experiments 282 | 283 | # Rewards and for plotting 284 | rewards_episodes.append(0.0) 285 | rewards = [] 286 | action_rewards = [] 287 | for _ in self.__agents: 288 | rewards.append([0]) 289 | action_rewards.append([0]) 290 | 291 | # Mark agents positions as true 292 | for agent in self.__agents: 293 | (i, j) = agent.get_position() 294 | drone_map[i, j] = True 295 | actual_visited_map[i, j] = True 296 | 297 | # Print trace every 100 episodes 298 | if episode_number % Config.SIMULATIONS_CHECKPOINT == 0 and Config.PRINT_SIMULATIONS: 299 | print("Episode {} of {}".format(episode_number + 1, number_episodes)) 300 | 301 | # Compute paths 302 | done = False 303 | episode_counter = 0 304 | visited_list = [] # store each agent's visited squares 305 | visited_list.append(actual_visited_map) # store each agent's visited squares 306 | 307 | # Add new values to actions lists 308 | episode_total_actions.append(0.0) 309 | episode_total_valid_actions.append(0.0) 310 | 311 | if len(valid_actions_taken_agent): 312 | for element in self.get_agents(): 313 | valid_actions_taken_agent[element.get_number()].append(0.0) 314 | else: 315 | for _ in self.get_agents(): 316 | valid_actions_taken_agent.append([0.0]) 317 | 318 | # Store trendline_slope 319 | trendline_slope = -1.0 320 | 321 | import time 322 | start_time = time.time() 323 | while not done: 324 | 325 | # Get previous environment (this way all agents would act at the same time) 326 | prev_visited_map = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool).copy() 327 | prev_drone_map = drone_map.copy() 328 | drone_position_list = [] # store each agent's position 329 | 330 | # For each agent compute 1 action 331 | for agent in program.get_agents(): 332 | 333 | # Make decision 334 | import numpy as np 335 | rand_number = np.random.random() 336 | 337 | if rand_number < epsilon: 338 | random_action = True 339 | # Get random action 340 | chosen_action = np.random.randint(0, len(Config.ACTIONS_DICT.keys())) 341 | else: 342 | random_action = False 343 | # Decide one action 344 | if not Config.GLOBAL_MODEL: 345 | chosen_action = np.argmax(agent.predict(np.array(prev_visited_map, dtype=int), 346 | np.array(prev_drone_map, dtype=int), 347 | self.get_environment(), )) 348 | else: 349 | chosen_action = np.argmax(agent.predict_global_model(np.array(prev_visited_map, dtype=int), 350 | np.array(prev_drone_map, dtype=int), 351 | self.get_environment(), 352 | model)) 353 | 354 | episode_total_actions[episode_number] += 1.0 355 | 356 | # Get agent's position before doing action for printing it in a file 357 | prev_position = agent.get_position() 358 | 359 | # Update environment according to action 360 | actual_visited_map, actual_drone_map, reward = agent.do_action(chosen_action, 361 | self.__original_environment, 362 | prev_visited_map, prev_drone_map) 363 | 364 | (r, c) = agent.get_position() 365 | heatmap[r][c] += 1.0 366 | 367 | # Plot heatmap 368 | import matplotlib 369 | matplotlib.use('Agg') # For running in SO without graphical environment 370 | import matplotlib.pyplot as plt 371 | plt.plot(rewards[agent.get_number()]) 372 | fig, ax = plt.subplots() 373 | im = ax.imshow(heatmap) 374 | for r in range(Config.ENVIRONMENT_ROWS): 375 | for c in range(Config.ENVIRONMENT_COLUMNS): 376 | text = ax.text(c, r, heatmap[r, c], ha="center", va="center", color="w") 377 | fig.tight_layout() 378 | plt.savefig('heatmap_episode_' + str(episode_number) + '.png') 379 | plt.clf() 380 | 381 | # Plot agent's reward graph 382 | from numpy import sum 383 | rewards[agent.get_number()].append(sum(rewards[agent.get_number()]) + agent.get_reward()) 384 | action_rewards[agent.get_number()].append(agent.get_reward()) 385 | rewards_episodes[episode_number] += agent.get_reward() 386 | import matplotlib 387 | matplotlib.use('Agg') # For running in SO without graphical environment 388 | import matplotlib.pyplot as plt 389 | plt.plot(rewards[agent.get_number()]) 390 | plt.savefig('total_reward_evolution_drone_' + str(agent.get_number()) + '.png') 391 | plt.clf() 392 | plt.plot(action_rewards[agent.get_number()]) 393 | plt.savefig('action_reward_evolution_drone_' + str(agent.get_number()) + '.png') 394 | plt.clf() 395 | 396 | if (prev_visited_map != actual_visited_map).any(): 397 | agent.increase_valid_taken_actions() 398 | episode_total_valid_actions[episode_number] += 1.0 399 | 400 | # Store the number of times in a row that the environment does not change 401 | if (prev_visited_map == actual_visited_map).all(): 402 | unchanged_environment_episodes_count += 1 403 | else: 404 | unchanged_environment_episodes_count = 0 405 | 406 | # Save taken action in a file 407 | with open( 408 | Config.BASE_ROUTE + 'actions_' + str(agent.get_number()) + '_' + agent.get_name() + '.csv', 409 | 'a+') as f: 410 | if not episode_counter: 411 | agent.set_status('flying') 412 | f.write( 413 | 'action_code, action_name, prev_position, actual_position, valid, visited, random_action, environment_shape, actions_taken, valid_taken_actions, unchanged_episodes\n') 414 | f.write(str(chosen_action) + ', ' + Config.ACTIONS_DICT[chosen_action] + ', ' + str( 415 | prev_position) + ', ' + str(agent.get_position()) + ', ' + str( 416 | prev_position != agent.get_position()) 417 | + ', ' + str((prev_position != agent.get_position()) and 418 | (prev_visited_map[agent.get_position()[0], agent.get_position()[1]])) 419 | + ', ' + str(random_action) 420 | + ', ' + str(self.__original_environment.shape) + ', ' + str(agent.get_actions_taken()) 421 | + ', ' + str(agent.get_valid_taken_actions()) + ', ' + str( 422 | unchanged_environment_episodes_count) + '\n') 423 | 424 | # Memorize new memory observation 425 | observation = ( 426 | prev_visited_map, actual_visited_map, prev_drone_map, actual_drone_map, chosen_action, 427 | reward, agent.get_status()) 428 | agent.memorize(observation) 429 | 430 | # Save agent results for merging with the remaining agents 431 | visited_list.append(actual_visited_map + (1.0 - self.get_environment())) 432 | import matplotlib 433 | matplotlib.use('Agg') # For running in SO without graphical environment 434 | import matplotlib.pyplot as plt 435 | plt.imshow(np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool), cmap='Greys', 436 | interpolation='nearest') 437 | plt.savefig(Config.BASE_ROUTE + 'combined_visited_list.png') 438 | plt.clf() 439 | 440 | drone_position_list.append(actual_drone_map) 441 | 442 | # Train 443 | if not Config.GLOBAL_MODEL: 444 | agent_history = agent.learn(self.get_environment()) 445 | agent.get_model().save(str(agent.get_number()) + '_local_model.h5') 446 | else: 447 | agent_history = agent.learn_global_model(self.get_environment(), model) 448 | model.save('global_model.h5') 449 | 450 | # Check experiment stopping 451 | waiting_hours = float(time.time() - start_time) / 60.0 / 60.0 # Convert seconds to hours 452 | 453 | import numpy as np 454 | borders_matrix = 1.0 - np.ceil(self.get_environment()) 455 | visited_matrix = np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=float) 456 | visited_matrix = np.where(visited_matrix >= 1.0, 1.0, visited_matrix) 457 | only_visited_cells_matrix = visited_matrix - borders_matrix 458 | 459 | visited_cells_count = float(np.count_nonzero(only_visited_cells_matrix == 1.0)) 460 | visitable_cells_count = float(np.count_nonzero(self.get_environment() == 1.0)) 461 | coverage = visited_cells_count / visitable_cells_count 462 | 463 | max_coverage = max(coverage, max_coverage) 464 | max_coverages[episode_number] = max(coverage, max_coverages[episode_number]) 465 | coverages_episode.append(coverage) 466 | 467 | valid_actions_taken_agent[agent.get_number()][episode_number] = agent.get_valid_taken_actions() 468 | 469 | if unchanged_environment_episodes_count >= Config.MAXIMUM_UNCHANGED_ENVIRONMENT_EPISODES: 470 | total_unchanged_environment_episodes_count += unchanged_environment_episodes_count 471 | done = True 472 | break 473 | elif waiting_hours >= Config.MAXIMUM_WAIT_HOURS and coverage < Config.COMPLETENESS_COVERAGE: 474 | total_unchanged_environment_episodes_count += unchanged_environment_episodes_count 475 | done = True 476 | break 477 | 478 | # Check if agent had finished 479 | if False not in np.array(np.ceil(np.sum(visited_list, axis=0)), dtype=bool): 480 | with open(Config.BASE_ROUTE + 'solution_times.txt', 'a+') as f: 481 | f.write('solution time ' + str(done_count) + ': ' 482 | + time.strftime('%H:%M:%S', time.gmtime(time.time() - start_time)) 483 | + ' epsilon: ' + str(epsilon) 484 | + '\n') 485 | done_count += 1 486 | done = True 487 | break 488 | 489 | episode_counter += 1 490 | 491 | # Combine agents results 492 | drone_map = np.array(np.sum(drone_position_list, axis=0), dtype=bool) 493 | 494 | # Plot coverages for each observation graph 495 | if len(coverages_episode) > 1: 496 | import matplotlib 497 | matplotlib.use('Agg') # For running in SO without graphical environment 498 | import matplotlib.pyplot as plt 499 | ax = plt.figure().gca() 500 | ax.set_ylim([0.0, 1.0]) 501 | x = list(range(len(coverages_episode))) 502 | y = coverages_episode 503 | from numpy import polyfit 504 | fit = polyfit(x, y, 1) 505 | yfit = [n * fit[0] for n in x] + fit[1] 506 | ax.plot(x, y) 507 | ax.plot(yfit, 'r--') 508 | plt.savefig('coverages_episode_' + str(episode_number) + '.png') 509 | plt.clf() 510 | 511 | # Store and plot observation's time 512 | episodes_time.append((time.time() - start_time) / 3600.0) 513 | import numpy as np 514 | average_episode_time = np.average(episodes_time) 515 | import matplotlib 516 | matplotlib.use('Agg') # For running in SO without graphical environment 517 | import matplotlib.pyplot as plt 518 | ax = plt.figure().gca() 519 | ax.plot(episodes_time) 520 | from matplotlib.ticker import MaxNLocator 521 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 522 | plt.savefig('episode_time_hours.png') 523 | plt.clf() 524 | 525 | 526 | # Plot valid action percentage per observation graph 527 | if len(episode_total_valid_actions) > 1: 528 | import matplotlib 529 | matplotlib.use('Agg') # For running in SO without graphical environment 530 | import matplotlib.pyplot as plt 531 | import numpy as np 532 | ax = plt.figure().gca() 533 | division = np.divide(episode_total_valid_actions, episode_total_actions) 534 | ax.set_ylim([0.0, 1.0]) 535 | x = list(range(len(division))) 536 | y = division 537 | from numpy import polyfit 538 | fit = polyfit(x, y, 1) 539 | yfit = [n * fit[0] for n in x] + fit[1] 540 | ax.plot(x, y) 541 | ax.plot(yfit, 'r--') 542 | plt.savefig('actions_percentages_episodes.png') 543 | plt.clf() 544 | 545 | import matplotlib 546 | matplotlib.use('Agg') # For running in SO without graphical environment 547 | import matplotlib.pyplot as plt 548 | import numpy as np 549 | ax = plt.figure().gca() 550 | ax.set_ylim([0.0, 1.0]) 551 | for element in self.get_agents(): 552 | division = np.divide(valid_actions_taken_agent[element.get_number()], episode_total_actions) 553 | x = list(range(len(division))) 554 | y = division 555 | ax.plot(x, y) 556 | plt.savefig('percentage_work_per_agent.png') 557 | plt.clf() 558 | 559 | # Plot coverages graph 560 | if len(max_coverages) > 1: 561 | import matplotlib 562 | matplotlib.use('Agg') # For running in SO without graphical environment 563 | import matplotlib.pyplot as plt 564 | ax = plt.figure().gca() 565 | ax.set_ylim(bottom=0.0) 566 | x = list(range(len(max_coverages))) 567 | y = max_coverages 568 | from scipy.stats import linregress 569 | trend = linregress(x, y) 570 | trendline_slope = trend.slope # or fit[0] 571 | from numpy import polyfit 572 | fit = polyfit(x, y, 1) 573 | yfit = [n * fit[0] for n in x] + fit[1] 574 | ax.plot(x, y) 575 | ax.plot(yfit, 'r--') 576 | plt.savefig('coverages.png') 577 | plt.clf() 578 | 579 | # Plot epsilon graph 580 | import matplotlib 581 | matplotlib.use('Agg') # For running in SO without graphical environment 582 | import matplotlib.pyplot as plt 583 | ax = plt.figure().gca() 584 | ax.plot(epsilons) 585 | from matplotlib.ticker import MaxNLocator 586 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 587 | plt.savefig('epsilons.png') 588 | plt.clf() 589 | 590 | # Update epsilon 591 | # The lower the epsilon, less random actions are taken 592 | epsilon = max(Config.MIN_EPSILON, epsilon * Config.EPSILON_DECAY) 593 | epsilons.append(epsilon) 594 | 595 | def specify_random_seed(): 596 | import numpy as np 597 | 598 | if Config.SEED == None: 599 | # Get random seed 600 | Config.SEED = np.random.randint(1, 255) 601 | 602 | # 1. Set `PYTHONHASHSEED` environment variable at a fixed value 603 | import os 604 | os.environ['PYTHONHASHSEED'] = str(Config.SEED) 605 | 606 | # 2. Set `python` built-in pseudo-random generator at a fixed value 607 | import random 608 | random.seed(Config.SEED) 609 | 610 | # 3. Set `numpy` pseudo-random generator at a fixed value 611 | import numpy as np 612 | np.random.seed(Config.SEED) 613 | 614 | # 4. Set `tensorflow` pseudo-random generator at a fixed value 615 | import tensorflow as tf 616 | if tf.__version__ < '2.0.0': 617 | tf.set_random_seed(Config.SEED) 618 | else: 619 | import tensorflow.compat.v1 as tf 620 | tf.set_random_seed(Config.SEED) 621 | 622 | # 5. Configure a new global `tensorflow` session 623 | # if tf.__version__ >= '2.0.0': 624 | # import tensorflow.compat.v1 as tf 625 | # tf.disable_v2_behavior() 626 | # import tensorflow.python.keras.backend as K 627 | from tensorflow.python.keras import backend as K 628 | session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) 629 | sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) 630 | K.set_session(sess) 631 | 632 | # 6. Save seed to a file 633 | with open(Config.BASE_ROUTE + 'session_seed.txt', 'w') as seed_file: 634 | seed_file.write(str(Config.SEED) + '\n') 635 | seed_file.close() 636 | 637 | 638 | def run_tests(): 639 | import unittest 640 | from Tests.ProgramTests import ProgramTests 641 | test_classes_to_run = [ProgramTests] 642 | loader = unittest.TestLoader() 643 | 644 | suites_list = [] 645 | for test_class in test_classes_to_run: 646 | suite = loader.loadTestsFromTestCase(test_class) 647 | suites_list.append(suite) 648 | 649 | big_suite = unittest.TestSuite(suites_list) 650 | 651 | runner = unittest.TextTestRunner() 652 | result = runner.run(big_suite) 653 | 654 | print('Tests run ', result.testsRun) 655 | print('Error number: ', len(result.errors)) 656 | print('Errors ', result.errors) 657 | print('Failure number: ', len(result.failures)) 658 | print('Failures ', result.failures) 659 | 660 | 661 | if __name__ == '__main__': 662 | if Config.UNIT_TESTS: 663 | print('\n\n\nRun unit tests') 664 | run_tests() 665 | 666 | print('\n\n\nSetting random seed') 667 | specify_random_seed() 668 | 669 | print('\n\n\nInitializing program') 670 | program = Program() 671 | 672 | print('\n\n\nReading configuration') 673 | program.read_data() 674 | 675 | print('\n\n\nCompute flying environment') 676 | if not Config.LOAD_MAP_FILE: 677 | program.compute_environment() 678 | else: 679 | import numpy as np 680 | program.set_environment(np.loadtxt(Config.BASE_ROUTE + Config.MAP_ROUTE)) 681 | 682 | print('\n\n\nCompute flying path') 683 | program.compute_path() 684 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UAV SWARM PATH PLANNING WITH REINFORCEMENT LEARNING FOR FIELD PROSPECTING: PRELIMINARY RESULTS 2 | 3 | System for the coordination of UAV swarms for Path Planning in agricultural fields. Made in Python 3 with Open Source libraries. 4 | 5 | Reinforcement Learning techniques are used for this system. To be more precise, it uses the Deep Q-Learning technique. 6 | 7 | This code is necessary to experiment with the preliminary results obtained. 8 | 9 | ## Installation 10 | 11 | To install this computer you must download this repository: 12 | 13 | ```bash 14 | $ git clone https://github.com/TheMVS/QLearning_drone.git 15 | ``` 16 | 17 | Once downloaded, it is necessary to enter the project's root folder and install the necessary libraries with [pip](https://pip.pypa.io/en/stable/): 18 | 19 | ```bash 20 | $ cd QLearning_drone 21 | $ pip install -r requirements.txt 22 | ``` 23 | 24 | The requirements.txt file includes the following libraries: 25 | 26 | * [numpy](https://numpy.org) 27 | * [scipy](https://www.scipy.org) 28 | * [Shapely](https://shapely.readthedocs.io/en/latest/) 29 | * [Keras](https://keras.io) 30 | * [Matplotlib](https://matplotlib.org) 31 | 32 | ## Usage 33 | 34 | ### Run system 35 | 36 | To run the system you must be in the root folder of the project and execute the file [Program.py](https://github.com/TheMVS/QLearning_drone/blob/master/Program.py): 37 | 38 | ```bash 39 | $ cd av_swarm_learning_preliminary 40 | $ python Program.py 41 | ``` 42 | 43 | ### Configuration 44 | 45 | All necessary data for experimented should be added to [data.json](https://github.com/TheMVS/uav_swarm_learning_preliminary/blob/main/data.json) and [Config.py](https://github.com/TheMVS/uav_swarm_learning_preliminary/blob/main/Config.py). 46 | 47 | ## Authors 48 | 49 | * [Alejandro Puente-Castro](https://orcid.org/0000-0002-0134-6877) 50 | * [Daniel Rivero](https://orcid.org/0000-0001-8245-3094) 51 | * [Alejandro Pazos](https://orcid.org/0000-0003-2324-238X) 52 | * [Enrique Fernandez-Blanco](https://orcid.org/0000-0003-3260-8734) 53 | 54 | ## License 55 | [MIT](https://choosealicense.com/licenses/mit/) 56 | -------------------------------------------------------------------------------- /Tests/ProgramTests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | 4 | from Program import Program 5 | 6 | 7 | class ProgramTests(unittest.TestCase): 8 | def test_compute_minimum_area(self): 9 | from Drone import Drone 10 | drone_list = [Drone('test1', 30.0, 18.0, (3840, 2160), 12, 104), 11 | Drone('test2', 30.0, 18.0, (3840, 2160), 12, 104), 12 | Drone('test3', 30.0, 18.0, (3840, 2160), 6, 104)] 13 | obtained = Program().compute_minimum_area(drone_list) 14 | self.assertEqual(obtained, (15.359299586316945, 8.639606017303281)) 15 | self.assertEqual(obtained[0] * obtained[1], 132.6982971275077) 16 | 17 | -------------------------------------------------------------------------------- /Tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheMVS/uav_swarm_reinforcement_learning/1f61b2b74b5575372928fe4e53a53b1e6cb630ea/Tests/__init__.py -------------------------------------------------------------------------------- /data.json: -------------------------------------------------------------------------------- 1 | { 2 | "drones": [ 3 | { 4 | "name": "Xiaomi Mi Drone", 5 | "battery_time": 27, 6 | "image_size": { 7 | "w": 3840, 8 | "h": 2160 9 | }, 10 | "image_angle": 104, 11 | "speed": 18, 12 | "height": 12 13 | }, 14 | { 15 | "name": "Xiaomi_Mi_Drone", 16 | "battery_time": 27, 17 | "image_size": { 18 | "w": 3840, 19 | "h": 2160 20 | }, 21 | "image_angle": 104, 22 | "speed": 18, 23 | "height": 12 24 | }, 25 | { 26 | "name": "Xiaomi_Mi_Drone", 27 | "battery_time": 27, 28 | "image_size": { 29 | "w": 3840, 30 | "h": 2160 31 | }, 32 | "image_angle": 104, 33 | "speed": 18, 34 | "height": 6 35 | } 36 | ], 37 | "points": [ 38 | { 39 | "lat": 43.334799, 40 | "long": -8.412510 41 | },{ 42 | "lat": 43.333987, 43 | "long": -8.411464 44 | },{ 45 | "lat": 43.333507, 46 | "long": -8.412168 47 | },{ 48 | "lat": 43.333337, 49 | "long": -8.413867 50 | },{ 51 | "lat": 43.334284, 52 | "long": -8.413229 53 | } 54 | ], 55 | "operator_position": { 56 | "lat": 43.334799, 57 | "long": -8.412505 58 | } 59 | } --------------------------------------------------------------------------------