├── log ├── model │ └── .gitkeep ├── param │ └── .gitkeep ├── tf │ └── .gitkeep └── viz │ └── .gitkeep ├── desired_goal.npy ├── .gitignore ├── .gitmodules ├── requirements.txt ├── utils.py ├── constants.py ├── memory_her.py ├── README.md ├── environment.py ├── dqn_her.py ├── airsimapi.py └── run.py /log/model/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /log/param/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /log/tf/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /log/viz/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /desired_goal.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jcarcamo/sar-deep-rl/HEAD/desired_goal.npy -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | __pycache__/ 3 | log/model/*.h5 4 | log/param/*.json 5 | log/tf/events* 6 | log/viz/*.png 7 | log/*.csv 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sar-airsim-env"] 2 | path = sar-airsim-env 3 | url = https://github.com/jcarcamo/sar-airsim-env.git 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.3.0 2 | astor==0.7.1 3 | certifi==2018.8.13 4 | cycler==0.10.0 5 | gast==0.2.0 6 | grpcio==1.14.1 7 | h5py==2.8.0 8 | Keras==2.1.3 9 | Markdown==2.6.11 10 | matplotlib==2.1.2 11 | msgpack-python==0.5.6 12 | msgpack-rpc-python==0.4.1 13 | numpy==1.14.5 14 | opencv-python==3.4.0.12 15 | Pillow==5.0.0 16 | protobuf==3.6.1 17 | pyparsing==2.2.0 18 | python-dateutil==2.7.3 19 | pytz==2018.5 20 | PyYAML==3.13 21 | scipy==1.1.0 22 | six==1.11.0 23 | tensorboard==1.10.0 24 | tensorflow-gpu==1.10.0 25 | termcolor==1.1.0 26 | tornado==4.5.3 27 | Werkzeug==0.14.1 28 | wincertstore==0.2 29 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | def add_channel(image): 2 | gray_image = image/255.0 3 | gray_image = gray_image.reshape((1,image.shape[0],image.shape[1],1)) 4 | #Debug to confirm the image is passed as floats 5 | #cv2.imshow("original",image) 6 | #cv2.imshow("float",gray_image[0,:,:,0]) 7 | #cv2.waitKey(0) 8 | #exit() 9 | return gray_image 10 | 11 | def get_observation_images(obs): 12 | observation = [] 13 | observation.append(add_channel(obs[0].image)) 14 | observation.append(add_channel(obs[1])) 15 | return observation 16 | 17 | def connectpoints(x,y,p1,p2): 18 | x1, x2 = x[p1], x[p2] 19 | y1, y2 = y[p1], y[p2] 20 | plt.plot([x1,x2],[y1,y2],'r-') 21 | -------------------------------------------------------------------------------- /constants.py: -------------------------------------------------------------------------------- 1 | ENV_NAME = 'AirSimEnv-v0' 2 | 3 | CONTINUE = False #load a pre-trained model 4 | RESTART_EP = 0 # the episode number of the pre-trained model 5 | 6 | TRAIN = True # train the network 7 | USE_TARGET_NETWORK = False # use the target network 8 | 9 | RANDOM_WALK=False 10 | TF_DEVICE = '/gpu:0' 11 | MAX_EPOCHS = 100000 # max episode number 12 | MEMORY_SIZE = 50000 13 | LEARN_START_STEP = 32 14 | 15 | BATCH_SIZE = 32 16 | LEARNING_RATE = 1e-3 # 1e6 17 | GAMMA = 0.95 18 | INITIAL_EPSILON = 1 # starting value of epsilon 19 | FINAL_EPSILON = 0.1 # final value of epsilon 20 | MAX_EXPLORE_STEPS = 1000 21 | TEST_INTERVAL_EPOCHS = 100 22 | SAVE_INTERVAL_EPOCHS = 50 23 | 24 | LOG_NAME_SAVE = 'log' 25 | MODEL_DIR = LOG_NAME_SAVE + '/model' # the path to save deep model 26 | PARAM_DIR = LOG_NAME_SAVE + '/param' # the path to save the parameters 27 | VIZ_DIR = LOG_NAME_SAVE + '/viz' # the path to save viz 28 | DATA_FILE = LOG_NAME_SAVE + '/data_trained.csv' # the path to save trajectory 29 | 30 | LOG_NAME_READ = 'log' 31 | #the path to reload weights, monitor and params 32 | weights_path = LOG_NAME_READ + '/model/dqn_her_ep' + str(RESTART_EP)+ '.h5' 33 | params_json = LOG_NAME_READ + '/param/dqn_her_ep' + str(RESTART_EP) + '.json' 34 | -------------------------------------------------------------------------------- /memory_her.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class Memory: 5 | """ 6 | Modified to include goals as part of the Experience Replay 7 | """ 8 | 9 | def __init__(self, size): 10 | self.size = size 11 | self.currentPosition = 0 12 | self.states = [] 13 | self.statesGoals = [] 14 | self.actions = [] 15 | self.rewards = [] 16 | self.newStates = [] 17 | self.newStatesGoals = [] 18 | self.finals = [] 19 | 20 | def getCurrentSize(self) : 21 | return len(self.states) 22 | 23 | def getMemory(self, index): 24 | return {'state': self.states[index],'action': self.actions[index], 'reward': self.rewards[index], 'newState': self.newStates[index], 'isFinal': self.finals[index],'statesGoals': self.statesGoals[index], 'newStatesGoals': self.newStatesGoals[index]} 25 | 26 | def addMemory(self, state, stateGoal, action, reward, newState, \ 27 | newStateGoal, isFinal) : 28 | if (self.currentPosition >= self.size - 1) : 29 | self.currentPosition = 0 30 | if (len(self.states) > self.size) : 31 | self.states[self.currentPosition] = state 32 | self.statesGoals[self.currentPosition] = stateGoal 33 | self.actions[self.currentPosition] = action 34 | self.rewards[self.currentPosition] = reward 35 | self.newStates[self.currentPosition] = newState 36 | self.newStatesGoals[self.currentPosition] = newStateGoal 37 | self.finals[self.currentPosition] = isFinal 38 | 39 | else : 40 | self.states.append(state) 41 | self.statesGoals.append(stateGoal) 42 | self.actions.append(action) 43 | self.rewards.append(reward) 44 | self.newStates.append(newState) 45 | self.newStatesGoals.append(newStateGoal) 46 | self.finals.append(isFinal) 47 | 48 | self.currentPosition += 1 49 | 50 | def getMiniBatch(self, size) : 51 | state_batch = [] 52 | stateGoal_batch = [] 53 | action_batch = [] 54 | reward_batch = [] 55 | newState_batch = [] 56 | newStateGoal_batch = [] 57 | isFinal_batch = [] 58 | 59 | indices = random.sample(list(np.arange(len(self.states))), min(size,len(self.states)) ) 60 | for index in indices: 61 | state_batch.append(self.states[index][0]) 62 | stateGoal_batch.append(self.statesGoals[index][0]) 63 | action_batch.append(self.actions[index]) 64 | reward_batch.append(self.rewards[index]) 65 | newState_batch.append(self.newStates[index][0]) 66 | newStateGoal_batch.append(self.newStatesGoals[index][0]) 67 | isFinal_batch.append(self.finals[index]) 68 | 69 | 70 | return state_batch, stateGoal_batch, action_batch, reward_batch, newState_batch, stateGoal_batch, isFinal_batch 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine Learning approach to use Unmanned Autonomous Vehicles (UAVs) in Search and Rescue (SAR) 2 | 3 | ## Abstract 4 | 5 | Unmanned Aerial Vehicles (UAVs) are becoming more prevalent, more capable, and less expensive every day. Advances in battery life and electronic sensors have spurred the development of diverse UAV applications outside their original military domain. For example, Search and Rescue (SAR) operations stand to benefit greatly from modern UAVs since even the simplest commercial models are equipped with high-resolution cameras and the ability to stream video to a computer or portable device. As a result, autonomous unmanned systems (terrestrial, marine, and aerial) have begun to be employed for such typical SAR tasks as terrain mapping, task observation, and early supply delivery. However, these systems were developed before recent advances in artificial intelligence such as Google Deepmind's breakthrough with the Deep Q-Network (DQN) technology. Therefore, most of them rely heavily on Greedy or Potential-based heuristics, without the ability to learn. In this research, we investigate a possible approximation (called Partially Observable Markov Decision Processes) for enhancing the performance of autonomous UAVs in SAR by incorporating newly-developed Reinforcement Learning methods. The project utilizes open-source tools such as Microsoft's state-of-the-art UAV simulator AirSim, and Keras, a machine learning framework that can make use of Google's popular tensor library called TensorFlow. The main approach investigated in this research is the Deep Q-Network. 6 | 7 | ## Requirements 8 | 9 | This project uses [Unreal Engine 4](https://www.unrealengine.com/en-US/what-is-unreal-engine-4), and [Microsoft's AirSim](https://github.com/Microsoft/AirSim) as the vehicle simulator, Python 3.6 was used to build the environment and agent. [Keras](https://keras.io/) with [TensorFlow](https://www.tensorflow.org/) as Backend was used for the Agent implementation. 10 | 11 | ### Python dependencies 12 | 13 | Python 3.6 was used for this development. All dependencies can be found in the ```requirements.txt``` file. Below is an example of the most visible ones. 14 | 15 | - TensorFlow 1.10 16 | - Keras 2.1.3 17 | - opencv-python 3.4 18 | - msgpack-rpc-python 0.4.1 19 | 20 | 21 | ### Installation 22 | 23 | It is recommended to use [anaconda](https://www.continuum.io/downloads) to install and manage your python environment. 24 | 25 | ```bash 26 | conda create -n python=3.6 27 | activate 28 | cd 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | There is a submodule of an example Unreal Engine project for convenience. If you wish to use it, issue the following commands: 33 | 34 | ```bash 35 | cd sar-airsim-env 36 | git submodule init 37 | git submodule update 38 | ``` 39 | 40 | After that please follow the [AirSim Instructions](https://github.com/Microsoft/AirSim/blob/297f1c49d3ef0a1a0f0d841ef7dafa89603db327/docs/unreal_custenv.md) to properly setup the AirSim plugin into the Unreal Engine project. 41 | 42 | ## Acknowledgments 43 | 44 | This implementation is inspired in the following repositories [AirGym](https://github.com/Kjell-K/AirGym) and [gym-unrealcv](https://github.com/Kjell-K/AirGym). Both of these repositories contain exceptional examples of what was needed to get started with [Unreal Engine 4](https://www.unrealengine.com/en-US/what-is-unreal-engine-4), and [Microsoft's AirSim](https://github.com/Microsoft/AirSim). 45 | -------------------------------------------------------------------------------- /environment.py: -------------------------------------------------------------------------------- 1 | from airsimapi import * 2 | 3 | class State(): 4 | def __init__(self, image, position, orientation): 5 | self.image = image 6 | self.position = position 7 | self.orientation = orientation 8 | 9 | class Env(): 10 | def __init__(self, shaped_reward = False): 11 | self.client = customAirSimClient() 12 | self.state = State(self.client.getDepthImage(), \ 13 | self.client.getPosition(), \ 14 | self.client.getOrientation()) 15 | self.shaped_reward = shaped_reward 16 | self.reward_factor = 1 17 | self.reward_th = 0.05 18 | self.desired_goal = np.load("desired_goal.npy") 19 | 20 | def reward(self, boxes): 21 | reward = 0 22 | # if self.shaped_reward: # Not sparsed reward 23 | for box in boxes: 24 | # get reward of all boxes considering the size and position of box 25 | (xmin, ymin), (xmax, ymax) = box 26 | boxsize = (ymax - ymin) * (xmax - xmin) 27 | x_c = (xmax + xmin) / 2.0 28 | x_bias = x_c - 0.5 29 | discount = max(0, 1 - x_bias ** 2) 30 | reward += discount * boxsize 31 | 32 | if reward > self.reward_th: 33 | if self.shaped_reward: # Not sparsed reward 34 | reward = min(reward * self.reward_factor, 10) 35 | else: 36 | reward = 0 37 | print ('Get ideal Target!!!') 38 | elif reward == 0: 39 | reward = -1 40 | # print ('Get Nothing') 41 | else: 42 | if self.shaped_reward: # Not sparsed reward 43 | reward = 0 44 | else: 45 | reward = -1 46 | # print ('Get small Target!!!') 47 | # else: # Sparsed reward, to test HER 48 | # for box in boxes: 49 | # (xmin, ymin), (xmax, ymax) = box 50 | # # print ("xmax:",(xmax),"xmin:", (xmin),"xmax-xmin:",(xmax-xmin)) 51 | # # print ("ymax:",(ymax),"ymin:", (ymin),"ymax-ymin:",(ymax-ymin)) 52 | # boxsize = (ymax - ymin) * (xmax - xmin) 53 | # reward += boxsize 54 | # print ("Reward:",reward) 55 | # if reward > self.reward_th: 56 | # reward = 0 57 | # print ('Found object') 58 | # else: 59 | # reward = -1 60 | # print ('Get Nothing') 61 | return reward 62 | 63 | def step(self, action): 64 | done = False 65 | reward = -1 66 | collided = self.client.move(action) 67 | observation = self.client.getDepthImage() 68 | self.state = State(observation, \ 69 | self.client.getPosition(), \ 70 | self.client.getOrientation()) 71 | if not collided: 72 | #check if object is in image. 73 | object_mask = self.client.getSegementationImage() 74 | boxes = self.client.getBboxes(object_mask, ['Goal']) 75 | reward = self.reward(boxes) 76 | if self.shaped_reward: 77 | if reward > 0: 78 | done = True 79 | else: 80 | if reward == 0: 81 | done = True 82 | else: 83 | if self.shaped_reward: 84 | reward = -100 85 | else: 86 | reward = -1 87 | done = True 88 | return [self.state, self.desired_goal], reward, done 89 | 90 | def reset(self, size = None): 91 | self.client.reset() 92 | self.state = State(self.client.getDepthImage(), \ 93 | self.client.getPosition(), \ 94 | self.client.getOrientation()) 95 | return [self.state,self.desired_goal] 96 | -------------------------------------------------------------------------------- /dqn_her.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import numpy as np 4 | from keras.models import Model, Sequential, load_model 5 | from keras.layers import concatenate, Input, Conv2D, Convolution2D, Flatten, ZeroPadding2D 6 | from keras.layers.core import Dense, Dropout, Activation 7 | from keras.layers.pooling import MaxPooling2D 8 | from keras.optimizers import SGD , Adam 9 | import tensorflow as tf 10 | import keras.backend.tensorflow_backend as KTF 11 | from keras.callbacks import TensorBoard 12 | import memory_her 13 | import keras.backend as K 14 | from constants import * 15 | class DeepQ: 16 | 17 | def __init__(self, outputs, memorySize, discountFactor, learningRate, img_rows, img_cols, img_channels ,useTargetNetwork): 18 | 19 | self.output_size = outputs 20 | self.memory = memory_her.Memory(memorySize) 21 | self.discountFactor = discountFactor 22 | self.learningRate = learningRate 23 | self.img_rows = img_rows 24 | self.img_cols = img_cols 25 | self.img_channels = img_channels 26 | self.useTargetNetwork = useTargetNetwork 27 | self.count_steps = 0 28 | 29 | self.board = TensorBoard(log_dir='./log/tf', \ 30 | histogram_freq=0, write_graph=False, write_images=True) 31 | if K.backend() == 'tensorflow': 32 | with KTF.tf.device(TF_DEVICE): 33 | config = tf.ConfigProto(allow_soft_placement = True) 34 | config.gpu_options.allow_growth = True 35 | KTF.set_session(tf.Session(config=config)) 36 | self.initNetworks() 37 | else : 38 | self.initNetworks() 39 | 40 | def initNetworks(self): 41 | 42 | self.model = self.createModel() 43 | if self.useTargetNetwork: 44 | self.targetModel = self.createModel() 45 | 46 | def createModel(self): 47 | input_shape = (self.img_channels, self.img_rows, self.img_cols) 48 | if K.image_dim_ordering() == 'tf': 49 | input_shape = (self.img_rows, self.img_cols, self.img_channels) 50 | 51 | state = Sequential(name='state_conv') 52 | state.add(Conv2D(32, (4, 4), strides=(4, 4) ,activation='relu', input_shape=input_shape)) 53 | state.add(Conv2D(64, (3, 3), strides=(2, 2), activation='relu')) 54 | state.add(Conv2D(64, (1, 1), strides=(1, 1), activation='relu')) 55 | state.add(Flatten()) 56 | 57 | # Now let's get a tensor with the output of our state: 58 | state_input = Input(input_shape, name='state_input') 59 | encoded_state = state(state_input) 60 | 61 | goal = Sequential(name='goal_conv') 62 | goal.add(Conv2D(32, (4, 4), strides=(4, 4) ,activation='relu', input_shape=input_shape)) 63 | goal.add(Conv2D(64, (3, 3), strides=(2, 2), activation='relu')) 64 | goal.add(Conv2D(64, (1, 1), strides=(1, 1), activation='relu')) 65 | goal.add(Flatten()) 66 | 67 | # Now let's get a tensor with the output of our goal: 68 | goal_input = Input(input_shape, name='goal_input') 69 | encoded_goal = goal(goal_input) 70 | 71 | merged = concatenate([encoded_state, encoded_goal], \ 72 | name='state_goal_input') 73 | 74 | fc = Dense(512, activation='relu', name='fc')(merged) 75 | model_output = Dense(self.output_size, activation='softmax', \ 76 | name='action')(fc) 77 | 78 | model = Model(inputs=[state_input, goal_input], outputs=model_output) 79 | 80 | model.compile(optimizer=Adam(lr=self.learningRate), loss='mean_squared_error', metrics=['mae','accuracy']) 81 | model.summary() 82 | 83 | 84 | return model 85 | 86 | 87 | def backupNetwork(self, model, backup): 88 | weightMatrix = [] 89 | for layer in model.layers: 90 | weights = layer.get_weights() 91 | weightMatrix.append(weights) 92 | i = 0 93 | for layer in backup.layers: 94 | weights = weightMatrix[i] 95 | layer.set_weights(weights) 96 | i += 1 97 | 98 | def updateTargetNetwork(self): 99 | self.backupNetwork(self.model, self.targetModel) 100 | print ('update target network') 101 | 102 | # predict Q values for all the actions 103 | def getQValues(self, state): 104 | if self.useTargetNetwork: 105 | predicted = self.targetModel.predict(state) 106 | else: 107 | predicted = self.model.predict(state) 108 | return predicted[0] 109 | 110 | def getMaxIndex(self, qValues): 111 | return np.argmax(qValues) 112 | 113 | # select the action with the highest Q value 114 | def selectAction(self, qValues, explorationRate): 115 | rand = random.random() 116 | was_rand = False 117 | if rand < explorationRate : 118 | was_rand = True 119 | action = np.random.randint(0, self.output_size) 120 | else : 121 | action = self.getMaxIndex(qValues) 122 | # action = self.getMaxIndex(qValues) 123 | return action, was_rand 124 | 125 | def addMemory(self, state, stateGoal, action, reward, newState, \ 126 | newStateGoal, isFinal): 127 | self.memory.addMemory(state, stateGoal, action, reward, newState, \ 128 | newStateGoal, isFinal) 129 | 130 | 131 | def getMemorySize(self): 132 | return self.memory.getCurrentSize() 133 | 134 | 135 | def learnOnMiniBatch(self, miniBatchSize,): 136 | 137 | self.count_steps += 1 138 | 139 | state_batch, goals_batch, action_batch,reward_batch,newState_batch, _,isFinal_batch, \ 140 | = self.memory.getMiniBatch(miniBatchSize) 141 | 142 | qValues_batch = self.model.predict([np.array(state_batch), \ 143 | np.array(goals_batch)],batch_size=miniBatchSize) 144 | 145 | isFinal_batch = np.array(isFinal_batch) + 0 146 | 147 | if self.useTargetNetwork: 148 | qValuesNewState_batch = self.targetModel.predict_on_batch([np.array(newState_batch), \ 149 | np.array(goals_batch)]) 150 | else : 151 | qValuesNewState_batch = self.model.predict_on_batch([np.array(newState_batch), \ 152 | np.array(goals_batch)]) 153 | 154 | Y_sample_batch = reward_batch + (1 - isFinal_batch) * self.discountFactor * np.max(qValuesNewState_batch, axis=1) 155 | 156 | X_batch = [np.array(state_batch),np.array(goals_batch)] 157 | Y_batch = np.array(qValues_batch) 158 | 159 | for i,action in enumerate(action_batch): 160 | Y_batch[i][action] = Y_sample_batch[i] 161 | 162 | self.model.fit(X_batch, Y_batch, validation_split=0.0, batch_size = miniBatchSize, epochs=1, verbose = 0, callbacks=[self.board]) 163 | 164 | if self.useTargetNetwork and self.count_steps % 1000 == 0: 165 | self.updateTargetNetwork() 166 | 167 | 168 | def saveModel(self, path): 169 | if self.useTargetNetwork: 170 | self.targetModel.save(path) 171 | else: 172 | self.model.save(path) 173 | 174 | def loadWeights(self, path): 175 | self.model.load_weights(path) 176 | if self.useTargetNetwork: 177 | self.targetModel.load_weights(path) 178 | 179 | 180 | def feedforward(self,observation,explorationRate): 181 | qValues = self.getQValues(observation) 182 | action, was_rand = self.selectAction(qValues, explorationRate) 183 | return action, was_rand 184 | -------------------------------------------------------------------------------- /airsimapi.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import cv2 4 | from PIL import Image 5 | 6 | #Using AirSimClient from AirSim Repo 7 | sys.path.append('C:/Users/juan_/git/AirSim/PythonClient') 8 | from AirSimClient import * 9 | 10 | class customAirSimClient(MultirotorClient): 11 | def __init__(self): 12 | MultirotorClient.__init__(self) 13 | MultirotorClient.confirmConnection(self) 14 | self.enableApiControl(True) 15 | self.armDisarm(True) 16 | self.takeoff() 17 | 18 | # hack instead of a classifier for objects 19 | # TODO include the real classifier 20 | # success = self.simSetSegmentationObjectID("SkySphere", 42, True) 21 | # print('success', success) 22 | success = self.simSetSegmentationObjectID("[\w]*", 0, True) 23 | print('Setting everything to 0', success) 24 | # success = self.simSetSegmentationObjectID("birch[\w]*", 2, True) 25 | # print('success', success) 26 | # success = self.simSetSegmentationObjectID("fir[\w]*", 2, True) 27 | # print('success', success) 28 | # success = self.simSetSegmentationObjectID("hedge[\w]*", 5, True) 29 | # print('success', success) 30 | #success = self.simSetSegmentationObjectID("tree[\w]*", 255, True) 31 | success = self.simSetSegmentationObjectID("Goal", 2) 32 | print('Setting our goal', success) 33 | #wait until it takes off 34 | counter = 0 35 | while self.getPosition().z_val > -1.45: 36 | print("taking off: ", self.getPosition().z_val) 37 | time.sleep(0.2) 38 | self.z = self.getPosition().z_val 39 | 40 | 41 | def straight(self, duration, speed): 42 | pitch, roll, yaw = self.getPitchRollYaw() 43 | vx = math.cos(yaw) * speed 44 | vy = math.sin(yaw) * speed 45 | self.moveByVelocityZ(vx, vy, self.z, duration, DrivetrainType.ForwardOnly) 46 | start = time.time() 47 | return start, duration 48 | 49 | def yaw_right(self, duration): 50 | self.rotateByYawRate(30, duration) 51 | start = time.time() 52 | return start, duration 53 | 54 | def yaw_left(self, duration): 55 | self.rotateByYawRate(-30, duration) 56 | start = time.time() 57 | return start, duration 58 | """ 59 | # According to AirSim docs, this are the drone available cameras 60 | # cam_id=0, center front 61 | # cam_id=1, left front 62 | # cam_id=2, right front 63 | # cam_id=4, center rear 64 | # https://github.com/Microsoft/AirSim/blob/e169e65755f4254c29b67617f50502a262e64e63/docs/image_apis.md 65 | """ 66 | def getSceneImage(self, cam_id): 67 | responses = self.simGetImages([ \ 68 | ImageRequest(cam_id, AirSimImageType.Scene, False, False), \ 69 | ]) 70 | response = responses[0] 71 | img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) 72 | # reshape array to 4 channel image array H X W X 4 73 | img_rgba = img1d.reshape(response.height, response.width, 4) 74 | #discard alpha, to make image smaller 75 | #TODO check if it influence training 76 | img_rgb = img_rgba[:,:,:-1] 77 | #cv2.imshow("Scene", img_rgba) 78 | #cv2.waitKey(0) 79 | return img_rgb[:,:,::-1] 80 | 81 | def getDepthImage(self): 82 | rawImage = self.simGetImages([ImageRequest(0, AirSimImageType.DepthPerspective, True, False)])[0] 83 | # Taken from original AirGym, transforms from float img to int image 84 | img1d = np.array(rawImage.image_data_float, dtype=np.float) 85 | img1d = 255/np.maximum(np.ones(img1d.size), img1d) 86 | img2d = np.reshape(img1d, (rawImage.height, rawImage.width)) 87 | image = np.invert(np.array(Image.fromarray(img2d.astype(np.uint8), mode='L'))) 88 | 89 | factor = 10 90 | maxIntensity = 255.0 # depends on dtype of image data 91 | # Decrease intensity such that dark pixels become much darker, bright pixels become slightly dark 92 | depth = (maxIntensity)*(image/maxIntensity)**factor 93 | depth = np.array(depth, dtype=np.uint8) 94 | return depth 95 | 96 | def getSegementationImage(self): 97 | rawImage = self.simGetImages([ \ 98 | ImageRequest(0, AirSimImageType.Segmentation, False, False)])[0] 99 | img1d = np.fromstring(rawImage.image_data_uint8, dtype=np.uint8) #get numpy array 100 | img_rgba = img1d.reshape(rawImage.height, rawImage.width, 4) 101 | img_rgb = img_rgba[:,:,:-1] 102 | # print(np.unique(img_rgba[:,:,0], return_counts=True)) #red 103 | # print(np.unique(img_rgba[:,:,1], return_counts=True)) #green 104 | # print(np.unique(img_rgba[:,:,2], return_counts=True)) #blue 105 | return img_rgb 106 | 107 | #Harcoded currently 108 | def getMask(self,object_mask): 109 | #[r,g,b] = 94, 253, 92 110 | #lower_range = np.array([g-50,b-50,r-25]) 111 | #upper_range = np.array([g+2,b+50,r+25]) 112 | [r,g,b] = 193, 103, 112 113 | lower_range = np.array([b-3,g-3,r-3], dtype='uint8') 114 | upper_range = np.array([b+3,g+3,r+3], dtype='uint8') 115 | mask = cv2.inRange(object_mask, lower_range, upper_range) 116 | return mask 117 | 118 | def getBbox(self,object_mask,object): 119 | #only get an object's bounding box 120 | width = object_mask.shape[1] 121 | height = object_mask.shape[0] 122 | mask = self.getMask(object_mask) 123 | output = cv2.bitwise_and(object_mask, object_mask, mask = mask) 124 | # cv2.imshow("mask",mask) 125 | # cv2.imshow("side-by-side",np.hstack([object_mask, output])) 126 | # cv2.waitKey(0) 127 | nparray = np.array([[[0, 0]]]) 128 | pixelpointsCV2 = cv2.findNonZero(mask) 129 | 130 | if type(pixelpointsCV2) == type(nparray):# exist target in image 131 | x_min = pixelpointsCV2[:,:,0].min() 132 | x_max = pixelpointsCV2[:,:,0].max() 133 | y_min = pixelpointsCV2[:,:,1].min() 134 | y_max = pixelpointsCV2[:,:,1].max() 135 | #print x_min, x_max ,y_min, y_max 136 | box = ((x_min/float(width),y_min/float(height)),#left top 137 | (x_max/float(width),y_max/float(height)))#right down 138 | else: 139 | box = ((0,0),(0,0)) 140 | 141 | return mask , box 142 | 143 | def getBboxes(self, object_mask, targets): 144 | boxes = [] 145 | for obj in targets: 146 | mask,box = self.getBbox(object_mask, obj) 147 | boxes.append(box) 148 | return boxes 149 | 150 | def move(self, action): 151 | start = time.time() 152 | duration = 0 153 | collided = False 154 | 155 | if action == 0: 156 | start, duration = self.straight(1, 4) 157 | while duration > time.time() - start: 158 | if self.getCollisionInfo().has_collided == True: 159 | return True 160 | self.moveByVelocity(0, 0, 0, 1) 161 | self.rotateByYawRate(0, 1) 162 | 163 | 164 | if action == 1: 165 | start, duration = self.yaw_right(0.8) 166 | while duration > time.time() - start: 167 | if self.getCollisionInfo().has_collided == True: 168 | return True 169 | self.moveByVelocity(0, 0, 0, 1) 170 | self.rotateByYawRate(0, 1) 171 | 172 | if action == 2: 173 | start, duration = self.yaw_left(1) 174 | while duration > time.time() - start: 175 | if self.getCollisionInfo().has_collided == True: 176 | return True 177 | self.moveByVelocity(0, 0, 0, 1) 178 | self.rotateByYawRate(0, 1) 179 | 180 | if action == 3: 181 | fake = True 182 | #start, duration = self.straight(1, 4) 183 | #while duration > time.time() - start: 184 | # if self.getCollisionInfo().has_collided == True: 185 | # return True 186 | #self.moveByVelocity(0, 0, 0, 1) 187 | #self.rotateByYawRate(0, 1) 188 | 189 | return collided 190 | 191 | def reset(self): 192 | super().reset() 193 | time.sleep(0.2) 194 | self.enableApiControl(True) 195 | self.armDisarm(True) 196 | self.takeoff() 197 | #wait until it takes off 198 | while self.getPosition().z_val > -1.45: 199 | print("taking off: ", self.getPosition().z_val) 200 | time.sleep(0.2) 201 | counter +=1 202 | if counter == 5: 203 | this.reset() 204 | self.z = self.getPosition().z_val 205 | return self.getDepthImage() 206 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import json 4 | import csv 5 | 6 | import numpy as np 7 | import random 8 | import os 9 | 10 | from environment import * 11 | from utils import * 12 | from constants import * 13 | 14 | from matplotlib import pyplot as plt 15 | import dqn_her 16 | 17 | 18 | if __name__ == '__main__': 19 | 20 | env = Env(shaped_reward=False) 21 | obs_new = env.reset() 22 | 23 | ACTION_SIZE = 4 24 | # #ACTION_LIST = env.discrete_actions 25 | INPUT_HEIGTH=obs_new[0].image.shape[0] 26 | INPUT_WIDTH=obs_new[0].image.shape[1] 27 | INPUT_CHANNELS = 1 28 | 29 | succeed = 0 30 | 31 | #HER 32 | K = 4 33 | #load init param 34 | if not CONTINUE: 35 | explorationRate = INITIAL_EPSILON 36 | current_epoch = 1 37 | stepCounter = 0 38 | loadsim_seconds = 0 39 | Agent = dqn_her.DeepQ(ACTION_SIZE, MEMORY_SIZE, GAMMA, LEARNING_RATE, \ 40 | INPUT_HEIGTH, INPUT_WIDTH, INPUT_CHANNELS, \ 41 | USE_TARGET_NETWORK) 42 | else: 43 | #Load weights and parameter info. 44 | with open(params_json) as outfile: 45 | d = json.load(outfile) 46 | explorationRate = d.get('explorationRate') 47 | current_epoch = d.get('current_epoch') 48 | stepCounter = d.get('stepCounter') 49 | loadsim_seconds = d.get('loadsim_seconds') 50 | succeed = d.get('succeed') 51 | if succeed is None: 52 | succeed = 0 53 | Agent = dqn_her.DeepQ(ACTION_SIZE, MEMORY_SIZE, GAMMA, \ 54 | LEARNING_RATE, INPUT_HEIGTH, INPUT_WIDTH, \ 55 | INPUT_CHANNELS, USE_TARGET_NETWORK) 56 | Agent.loadWeights(weights_path) 57 | 58 | #main loop 59 | try: 60 | start_time = time.time() 61 | epoch_data = [] 62 | for epoch in range(current_epoch, MAX_EPOCHS, 1): 63 | positionsList = [[],[]] 64 | agent_decision = 0 65 | 66 | obs = env.reset() 67 | observation = get_observation_images(obs) 68 | positionsList[0].append(obs[0].position.x_val) 69 | positionsList[1].append(obs[0].position.y_val) 70 | cumulated_reward = 0 71 | if (epoch % TEST_INTERVAL_EPOCHS != 0 or stepCounter < LEARN_START_STEP) and TRAIN is True : # explore 72 | EXPLORE = True 73 | else: 74 | EXPLORE = False 75 | print ("Evaluate Model") 76 | 77 | #borrowed from HER example 78 | episode_experience = [] 79 | episode_succeeded = False 80 | 81 | for t in range(1000): 82 | 83 | start_req = time.time() 84 | 85 | if EXPLORE is True: #explore 86 | s = observation[0] 87 | g = observation[1] 88 | action, was_rand = Agent.feedforward(observation, explorationRate) 89 | if not was_rand: 90 | agent_decision += 1 91 | # if np.random.rand(1) < 0.5: 92 | # action = np.random.randint(ACTION_SIZE) 93 | obs_new, reward, done = env.step(action) 94 | newObservation = get_observation_images(obs_new) 95 | 96 | s_next = newObservation[0] 97 | stepCounter += 1 98 | # Agent.addMemory(observation[0], observation[1], action, \ 99 | # reward, newObservation[0], \ 100 | # newObservation[1], done) 101 | episode_experience.append((s,action,reward,s_next,g, done)) 102 | observation = newObservation 103 | obs = obs_new 104 | #test 105 | else: 106 | if not RANDOM_WALK: 107 | action, _ = Agent.feedforward(observation,0) 108 | else: 109 | action, _ = Agent.feedforward(observation,1) 110 | print(action) 111 | obs_new, reward, done = env.step(action) 112 | newObservation = get_observation_images(obs_new) 113 | observation = newObservation 114 | obs = obs_new 115 | 116 | cumulated_reward += reward 117 | if reward == 0: 118 | episode_succeeded = True 119 | succeed += 1 120 | 121 | positionsList[0].append(obs[0].position.x_val) 122 | positionsList[1].append(obs[0].position.y_val) 123 | 124 | if done: 125 | print("Agent taking decisions", agent_decision,"times out of",t+1,"steps.",100*agent_decision/float(t+1),"%") 126 | m, sec = divmod(int(time.time() - start_time + loadsim_seconds), 60) 127 | h, m = divmod(m, 60) 128 | 129 | if stepCounter == LEARN_START_STEP: 130 | print("Starting learning") 131 | 132 | #Episode is done, let's add some experience to memory 133 | if TRAIN: 134 | for replay in range(t): 135 | s, a, r, s_n, g, d = episode_experience[t] 136 | Agent.addMemory(s, g, a, r, s_n, g, d) 137 | # K-future strategy 138 | for k in range(K): 139 | future = np.random.randint(replay, t) 140 | _, _, _, g_n, _, d = episode_experience[future] 141 | final = np.allclose(s_n, g_n) 142 | #only working with sparsed reward 143 | r_n = 0 if final else -1 144 | Agent.addMemory(s, g_n, a, r_n, s_n, g_n, d) 145 | 146 | if Agent.getMemorySize() >= LEARN_START_STEP: 147 | Agent.learnOnMiniBatch(BATCH_SIZE) 148 | #print("Episode Done, Learning time") 149 | if explorationRate > FINAL_EPSILON and stepCounter > LEARN_START_STEP: 150 | explorationRate -= (INITIAL_EPSILON - FINAL_EPSILON) / MAX_EXPLORE_STEPS 151 | 152 | print ("EP " + str(epoch) +" Csteps= " + str(stepCounter) + " - {} steps".format(t + 1) + " - CReward: " + str( 153 | round(cumulated_reward, 2)) + " Eps=" + str(round(explorationRate, 2)) + " Success rate=" + str(round(succeed/float(epoch)*100, 2)) + " Time: %d:%02d:%02d" % (h, m, sec)) 154 | epoch_data.append([str(epoch), str(stepCounter),t + 1, str( 155 | round(cumulated_reward, 2)), str(round(explorationRate, 2)), str(round(succeed/float(epoch)*100, 2)), "%d:%02d:%02d" % (h, m, sec)]) 156 | with open(DATA_FILE, 'a') as myfile: 157 | wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) 158 | for data in epoch_data: 159 | wr.writerow(data) 160 | epoch_data = [] 161 | # SAVE SIMULATION DATA 162 | if epoch % SAVE_INTERVAL_EPOCHS == 0 and TRAIN is True: 163 | # save model weights and monitoring data 164 | print ('Save model') 165 | Agent.saveModel(MODEL_DIR + '/dqn_her_ep' + str(epoch) + '.h5') 166 | 167 | parameter_keys = ['explorationRate', 'current_epoch','stepCounter', 'FINAL_EPSILON','loadsim_seconds','succeed'] 168 | parameter_values = [explorationRate, epoch, stepCounter,FINAL_EPSILON, int(time.time() - start_time + loadsim_seconds), succeed] 169 | parameter_dictionary = dict(zip(parameter_keys, parameter_values)) 170 | with open(PARAM_DIR + '/dqn_her_ep' + str(epoch) + '.json','w') as outfile: 171 | json.dump(parameter_dictionary, outfile) 172 | 173 | 174 | plt.title('Traveled path of agent on epoch '+str(epoch)) 175 | 176 | #for i in np.arange(0,len(positionsList[0]),2): 177 | # if(i+2 <= len(positionsList[0])): 178 | # plt.plot(positionsList[0][i:i+2],positionsList[1][i:i+2],'k-') 179 | #plt.savefig(VIZ_DIR + '/dqn_her_ep' + str(epoch) + '.png', format='png') 180 | plt.scatter(positionsList[0],positionsList[1]) 181 | plt.xlim(0, 20) 182 | plt.ylim(-10, 10) 183 | plt.plot(positionsList[0],positionsList[1]) 184 | plt.savefig(VIZ_DIR + '/dqn_her_ep' + str(epoch) + '.png', format='png') 185 | plt.close() 186 | break 187 | 188 | except KeyboardInterrupt: 189 | print("Shutting down") 190 | --------------------------------------------------------------------------------