├── log
    ├── model
    │   └── .gitkeep
    ├── param
    │   └── .gitkeep
    ├── tf
    │   └── .gitkeep
    └── viz
    │   └── .gitkeep
├── desired_goal.npy
├── .gitignore
├── .gitmodules
├── requirements.txt
├── utils.py
├── constants.py
├── memory_her.py
├── README.md
├── environment.py
├── dqn_her.py
├── airsimapi.py
└── run.py


/log/model/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/log/param/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/log/tf/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/log/viz/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/desired_goal.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jcarcamo/sar-deep-rl/HEAD/desired_goal.npy


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | env/
2 | __pycache__/
3 | log/model/*.h5
4 | log/param/*.json
5 | log/tf/events*
6 | log/viz/*.png
7 | log/*.csv
8 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "sar-airsim-env"]
2 | 	path = sar-airsim-env
3 | 	url = https://github.com/jcarcamo/sar-airsim-env.git
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.3.0
 2 | astor==0.7.1
 3 | certifi==2018.8.13
 4 | cycler==0.10.0
 5 | gast==0.2.0
 6 | grpcio==1.14.1
 7 | h5py==2.8.0
 8 | Keras==2.1.3
 9 | Markdown==2.6.11
10 | matplotlib==2.1.2
11 | msgpack-python==0.5.6
12 | msgpack-rpc-python==0.4.1
13 | numpy==1.14.5
14 | opencv-python==3.4.0.12
15 | Pillow==5.0.0
16 | protobuf==3.6.1
17 | pyparsing==2.2.0
18 | python-dateutil==2.7.3
19 | pytz==2018.5
20 | PyYAML==3.13
21 | scipy==1.1.0
22 | six==1.11.0
23 | tensorboard==1.10.0
24 | tensorflow-gpu==1.10.0
25 | termcolor==1.1.0
26 | tornado==4.5.3
27 | Werkzeug==0.14.1
28 | wincertstore==0.2
29 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | def add_channel(image):
 2 |     gray_image = image/255.0
 3 |     gray_image = gray_image.reshape((1,image.shape[0],image.shape[1],1))
 4 |     #Debug to confirm the image is passed as floats
 5 |     #cv2.imshow("original",image)
 6 |     #cv2.imshow("float",gray_image[0,:,:,0])
 7 |     #cv2.waitKey(0)
 8 |     #exit()
 9 |     return gray_image
10 | 
11 | def get_observation_images(obs):
12 |     observation = []
13 |     observation.append(add_channel(obs[0].image))
14 |     observation.append(add_channel(obs[1]))
15 |     return observation
16 | 
17 | def connectpoints(x,y,p1,p2):
18 |     x1, x2 = x[p1], x[p2]
19 |     y1, y2 = y[p1], y[p2]
20 |     plt.plot([x1,x2],[y1,y2],'r-')
21 | 


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
 1 | ENV_NAME = 'AirSimEnv-v0'
 2 | 
 3 | CONTINUE = False #load a pre-trained model
 4 | RESTART_EP = 0 # the episode number of the pre-trained model
 5 | 
 6 | TRAIN = True # train the network
 7 | USE_TARGET_NETWORK = False # use the target network
 8 | 
 9 | RANDOM_WALK=False
10 | TF_DEVICE = '/gpu:0'
11 | MAX_EPOCHS = 100000 # max episode number
12 | MEMORY_SIZE = 50000
13 | LEARN_START_STEP = 32
14 | 
15 | BATCH_SIZE = 32
16 | LEARNING_RATE = 1e-3  # 1e6
17 | GAMMA = 0.95
18 | INITIAL_EPSILON = 1  # starting value of epsilon
19 | FINAL_EPSILON = 0.1  # final value of epsilon
20 | MAX_EXPLORE_STEPS = 1000
21 | TEST_INTERVAL_EPOCHS = 100
22 | SAVE_INTERVAL_EPOCHS = 50
23 | 
24 | LOG_NAME_SAVE = 'log'
25 | MODEL_DIR = LOG_NAME_SAVE + '/model' # the path to save deep model
26 | PARAM_DIR = LOG_NAME_SAVE + '/param' # the path to save the parameters
27 | VIZ_DIR = LOG_NAME_SAVE + '/viz' # the path to save viz
28 | DATA_FILE = LOG_NAME_SAVE + '/data_trained.csv' # the path to save trajectory
29 | 
30 | LOG_NAME_READ = 'log'
31 | #the path to reload weights, monitor and params
32 | weights_path = LOG_NAME_READ + '/model/dqn_her_ep' + str(RESTART_EP)+ '.h5'
33 | params_json = LOG_NAME_READ + '/param/dqn_her_ep' + str(RESTART_EP) + '.json'
34 | 


--------------------------------------------------------------------------------
/memory_her.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | 
 4 | class Memory:
 5 |     """
 6 |     Modified to include goals as part of the Experience Replay
 7 |     """
 8 | 
 9 |     def __init__(self, size):
10 |         self.size = size
11 |         self.currentPosition = 0
12 |         self.states = []
13 |         self.statesGoals = []
14 |         self.actions = []
15 |         self.rewards = []
16 |         self.newStates = []
17 |         self.newStatesGoals = []
18 |         self.finals = []
19 | 
20 |     def getCurrentSize(self) :
21 |         return len(self.states)
22 | 
23 |     def getMemory(self, index):
24 |         return {'state': self.states[index],'action': self.actions[index], 'reward': self.rewards[index], 'newState': self.newStates[index], 'isFinal': self.finals[index],'statesGoals': self.statesGoals[index], 'newStatesGoals': self.newStatesGoals[index]}
25 | 
26 |     def addMemory(self, state, stateGoal, action, reward, newState, \
27 |                     newStateGoal, isFinal) :
28 |         if (self.currentPosition >= self.size - 1) :
29 |             self.currentPosition = 0
30 |         if (len(self.states) > self.size) :
31 |             self.states[self.currentPosition] = state
32 |             self.statesGoals[self.currentPosition] = stateGoal
33 |             self.actions[self.currentPosition] = action
34 |             self.rewards[self.currentPosition] = reward
35 |             self.newStates[self.currentPosition] = newState
36 |             self.newStatesGoals[self.currentPosition] = newStateGoal
37 |             self.finals[self.currentPosition] = isFinal
38 | 
39 |         else :
40 |             self.states.append(state)
41 |             self.statesGoals.append(stateGoal)
42 |             self.actions.append(action)
43 |             self.rewards.append(reward)
44 |             self.newStates.append(newState)
45 |             self.newStatesGoals.append(newStateGoal)
46 |             self.finals.append(isFinal)
47 | 
48 |         self.currentPosition += 1
49 | 
50 |     def getMiniBatch(self, size) :
51 |         state_batch = []
52 |         stateGoal_batch = []
53 |         action_batch = []
54 |         reward_batch = []
55 |         newState_batch = []
56 |         newStateGoal_batch = []
57 |         isFinal_batch = []
58 | 
59 |         indices = random.sample(list(np.arange(len(self.states))), min(size,len(self.states)) )
60 |         for index in indices:
61 |             state_batch.append(self.states[index][0])
62 |             stateGoal_batch.append(self.statesGoals[index][0])
63 |             action_batch.append(self.actions[index])
64 |             reward_batch.append(self.rewards[index])
65 |             newState_batch.append(self.newStates[index][0])
66 |             newStateGoal_batch.append(self.newStatesGoals[index][0])
67 |             isFinal_batch.append(self.finals[index])
68 | 
69 | 
70 |         return state_batch, stateGoal_batch, action_batch, reward_batch, newState_batch, stateGoal_batch, isFinal_batch
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine Learning approach to use Unmanned Autonomous Vehicles (UAVs) in Search and Rescue (SAR)
 2 | 
 3 | ## Abstract
 4 | 
 5 | Unmanned Aerial Vehicles (UAVs) are becoming more prevalent, more capable, and less expensive every day. Advances in battery life and electronic sensors have spurred the development of diverse UAV applications outside their original military domain. For example, Search and Rescue (SAR) operations stand to benefit greatly from modern UAVs since even the simplest commercial models are equipped with high-resolution cameras and the ability to stream video to a computer or portable device.  As a result, autonomous unmanned systems (terrestrial, marine, and aerial) have begun to be employed for such typical SAR tasks as terrain mapping, task observation, and early supply delivery. However, these systems were developed before recent advances in artificial intelligence such as Google Deepmind's breakthrough with the Deep Q-Network (DQN) technology.  Therefore, most of them rely heavily on Greedy or Potential-based heuristics, without the ability to learn.  In this research, we investigate a possible approximation (called Partially Observable Markov Decision Processes) for enhancing the performance of autonomous UAVs in SAR by incorporating newly-developed Reinforcement Learning methods. The project utilizes open-source tools such as Microsoft's state-of-the-art UAV simulator AirSim, and Keras, a machine learning framework that can make use of Google's popular tensor library called TensorFlow. The main approach investigated in this research is the Deep Q-Network.
 6 | 
 7 | ## Requirements
 8 | 
 9 | This project uses [Unreal Engine 4](https://www.unrealengine.com/en-US/what-is-unreal-engine-4), and [Microsoft's AirSim](https://github.com/Microsoft/AirSim) as the vehicle simulator, Python 3.6 was used to build the environment and agent. [Keras](https://keras.io/) with [TensorFlow](https://www.tensorflow.org/) as Backend was used for the Agent implementation.
10 | 
11 | ### Python dependencies
12 | 
13 | Python 3.6 was used for this development. All dependencies can be found in the ```requirements.txt``` file. Below is an example of the most visible ones.
14 | 
15 | - TensorFlow 1.10
16 | - Keras 2.1.3
17 | - opencv-python 3.4
18 | - msgpack-rpc-python 0.4.1
19 | 
20 | 
21 | ### Installation
22 | 
23 | It is recommended to use [anaconda](https://www.continuum.io/downloads) to install and manage your python environment.
24 | 
25 | ```bash
26 | conda create -n <env> python=3.6
27 | activate <env>
28 | cd <repo>
29 | pip install -r requirements.txt
30 | ```
31 | 
32 | There is a submodule of an example Unreal Engine project for convenience. If you wish to use it, issue the following commands:
33 | 
34 | ```bash
35 | cd sar-airsim-env
36 | git submodule init
37 | git submodule update
38 | ```
39 | 
40 | After that please follow the [AirSim Instructions](https://github.com/Microsoft/AirSim/blob/297f1c49d3ef0a1a0f0d841ef7dafa89603db327/docs/unreal_custenv.md) to properly setup the AirSim plugin into the Unreal Engine project.
41 | 
42 | ## Acknowledgments
43 | 
44 | This implementation is inspired in the following repositories [AirGym](https://github.com/Kjell-K/AirGym) and [gym-unrealcv](https://github.com/Kjell-K/AirGym). Both of these repositories contain exceptional examples of what was needed to get started with [Unreal Engine 4](https://www.unrealengine.com/en-US/what-is-unreal-engine-4), and [Microsoft's AirSim](https://github.com/Microsoft/AirSim).
45 | 


--------------------------------------------------------------------------------
/environment.py:
--------------------------------------------------------------------------------
 1 | from airsimapi import  *
 2 | 
 3 | class State():
 4 |     def __init__(self, image, position, orientation):
 5 |         self.image = image
 6 |         self.position = position
 7 |         self.orientation = orientation
 8 | 
 9 | class Env():
10 |     def __init__(self, shaped_reward = False):
11 |         self.client = customAirSimClient()
12 |         self.state = State(self.client.getDepthImage(), \
13 |                             self.client.getPosition(), \
14 |                             self.client.getOrientation())
15 |         self.shaped_reward = shaped_reward
16 |         self.reward_factor = 1
17 |         self.reward_th =  0.05
18 |         self.desired_goal = np.load("desired_goal.npy")
19 | 
20 |     def reward(self, boxes):
21 |         reward = 0
22 |         # if self.shaped_reward: # Not sparsed reward
23 |         for box in boxes:
24 |             # get reward of all boxes considering the size and position of box
25 |             (xmin, ymin), (xmax, ymax) = box
26 |             boxsize = (ymax - ymin) * (xmax - xmin)
27 |             x_c = (xmax + xmin) / 2.0
28 |             x_bias = x_c - 0.5
29 |             discount = max(0, 1 - x_bias ** 2)
30 |             reward += discount * boxsize
31 | 
32 |         if reward > self.reward_th:
33 |             if self.shaped_reward: # Not sparsed reward
34 |                 reward = min(reward * self.reward_factor, 10)
35 |             else:
36 |                 reward = 0
37 |             print ('Get ideal Target!!!')
38 |         elif reward == 0:
39 |             reward = -1
40 |             # print ('Get Nothing')
41 |         else:
42 |             if self.shaped_reward: # Not sparsed reward
43 |                 reward = 0
44 |             else:
45 |                 reward = -1
46 |             # print ('Get small Target!!!')
47 |         # else: # Sparsed reward, to test HER
48 |         #     for box in boxes:
49 |         #         (xmin, ymin), (xmax, ymax) = box
50 |         #         # print ("xmax:",(xmax),"xmin:", (xmin),"xmax-xmin:",(xmax-xmin))
51 |         #         # print ("ymax:",(ymax),"ymin:", (ymin),"ymax-ymin:",(ymax-ymin))
52 |         #         boxsize = (ymax - ymin) * (xmax - xmin)
53 |         #         reward += boxsize
54 |         #     print ("Reward:",reward)
55 |         #     if reward > self.reward_th:
56 |         #         reward = 0
57 |         #         print ('Found object')
58 |         #     else:
59 |         #         reward = -1
60 |         #         print ('Get Nothing')
61 |         return reward
62 | 
63 |     def step(self, action):
64 |         done = False
65 |         reward = -1
66 |         collided = self.client.move(action)
67 |         observation = self.client.getDepthImage()
68 |         self.state = State(observation, \
69 |                             self.client.getPosition(), \
70 |                             self.client.getOrientation())
71 |         if not collided:
72 |             #check if object is in image.
73 |             object_mask = self.client.getSegementationImage()
74 |             boxes = self.client.getBboxes(object_mask, ['Goal'])
75 |             reward = self.reward(boxes)
76 |             if self.shaped_reward:
77 |                 if reward > 0:
78 |                     done = True
79 |             else:
80 |                 if reward == 0:
81 |                     done = True
82 |         else:
83 |             if self.shaped_reward:
84 |                 reward = -100
85 |             else:
86 |                 reward = -1
87 |             done = True
88 |         return [self.state, self.desired_goal], reward, done
89 | 
90 |     def reset(self, size = None):
91 |         self.client.reset()
92 |         self.state = State(self.client.getDepthImage(), \
93 |                             self.client.getPosition(), \
94 |                             self.client.getOrientation())
95 |         return [self.state,self.desired_goal]
96 | 


--------------------------------------------------------------------------------
/dqn_her.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import random
  3 | import numpy as np
  4 | from keras.models import Model, Sequential, load_model
  5 | from keras.layers import concatenate, Input, Conv2D, Convolution2D, Flatten, ZeroPadding2D
  6 | from keras.layers.core import Dense, Dropout, Activation
  7 | from keras.layers.pooling import MaxPooling2D
  8 | from keras.optimizers import SGD , Adam
  9 | import tensorflow as tf
 10 | import keras.backend.tensorflow_backend as KTF
 11 | from keras.callbacks import TensorBoard
 12 | import memory_her
 13 | import keras.backend as K
 14 | from constants import *
 15 | class DeepQ:
 16 | 
 17 |     def __init__(self, outputs, memorySize, discountFactor, learningRate, img_rows, img_cols, img_channels ,useTargetNetwork):
 18 | 
 19 |         self.output_size = outputs
 20 |         self.memory = memory_her.Memory(memorySize)
 21 |         self.discountFactor = discountFactor
 22 |         self.learningRate = learningRate
 23 |         self.img_rows = img_rows
 24 |         self.img_cols = img_cols
 25 |         self.img_channels = img_channels
 26 |         self.useTargetNetwork = useTargetNetwork
 27 |         self.count_steps = 0
 28 | 
 29 |         self.board = TensorBoard(log_dir='./log/tf', \
 30 |                         histogram_freq=0, write_graph=False, write_images=True)
 31 |         if K.backend() == 'tensorflow':
 32 |             with KTF.tf.device(TF_DEVICE):
 33 |                 config = tf.ConfigProto(allow_soft_placement = True)
 34 |                 config.gpu_options.allow_growth = True
 35 |                 KTF.set_session(tf.Session(config=config))
 36 |                 self.initNetworks()
 37 |         else :
 38 |             self.initNetworks()
 39 | 
 40 |     def initNetworks(self):
 41 | 
 42 |         self.model = self.createModel()
 43 |         if self.useTargetNetwork:
 44 |             self.targetModel = self.createModel()
 45 | 
 46 |     def createModel(self):
 47 |         input_shape = (self.img_channels, self.img_rows, self.img_cols)
 48 |         if K.image_dim_ordering() == 'tf':
 49 |             input_shape = (self.img_rows, self.img_cols, self.img_channels)
 50 | 
 51 |         state = Sequential(name='state_conv')
 52 |         state.add(Conv2D(32, (4, 4), strides=(4, 4) ,activation='relu', input_shape=input_shape))
 53 |         state.add(Conv2D(64, (3, 3), strides=(2, 2),  activation='relu'))
 54 |         state.add(Conv2D(64, (1, 1), strides=(1, 1),  activation='relu'))
 55 |         state.add(Flatten())
 56 | 
 57 |         # Now let's get a tensor with the output of our state:
 58 |         state_input = Input(input_shape, name='state_input')
 59 |         encoded_state = state(state_input)
 60 | 
 61 |         goal = Sequential(name='goal_conv')
 62 |         goal.add(Conv2D(32, (4, 4), strides=(4, 4) ,activation='relu', input_shape=input_shape))
 63 |         goal.add(Conv2D(64, (3, 3), strides=(2, 2),  activation='relu'))
 64 |         goal.add(Conv2D(64, (1, 1), strides=(1, 1),  activation='relu'))
 65 |         goal.add(Flatten())
 66 | 
 67 |         # Now let's get a tensor with the output of our goal:
 68 |         goal_input = Input(input_shape, name='goal_input')
 69 |         encoded_goal = goal(goal_input)
 70 | 
 71 |         merged = concatenate([encoded_state, encoded_goal], \
 72 |                                 name='state_goal_input')
 73 | 
 74 |         fc = Dense(512, activation='relu', name='fc')(merged)
 75 |         model_output = Dense(self.output_size, activation='softmax', \
 76 |                             name='action')(fc)
 77 | 
 78 |         model =  Model(inputs=[state_input, goal_input], outputs=model_output)
 79 | 
 80 |         model.compile(optimizer=Adam(lr=self.learningRate), loss='mean_squared_error', metrics=['mae','accuracy'])
 81 |         model.summary()
 82 | 
 83 | 
 84 |         return model
 85 | 
 86 | 
 87 |     def backupNetwork(self, model, backup):
 88 |         weightMatrix = []
 89 |         for layer in model.layers:
 90 |             weights = layer.get_weights()
 91 |             weightMatrix.append(weights)
 92 |         i = 0
 93 |         for layer in backup.layers:
 94 |             weights = weightMatrix[i]
 95 |             layer.set_weights(weights)
 96 |             i += 1
 97 | 
 98 |     def updateTargetNetwork(self):
 99 |         self.backupNetwork(self.model, self.targetModel)
100 |         print ('update target network')
101 | 
102 |     # predict Q values for all the actions
103 |     def getQValues(self, state):
104 |         if self.useTargetNetwork:
105 |             predicted = self.targetModel.predict(state)
106 |         else:
107 |             predicted = self.model.predict(state)
108 |         return predicted[0]
109 | 
110 |     def getMaxIndex(self, qValues):
111 |         return np.argmax(qValues)
112 | 
113 |     # select the action with the highest Q value
114 |     def selectAction(self, qValues, explorationRate):
115 |         rand = random.random()
116 |         was_rand = False
117 |         if rand < explorationRate :
118 |             was_rand = True
119 |             action = np.random.randint(0, self.output_size)
120 |         else :
121 |             action = self.getMaxIndex(qValues)
122 |         # action = self.getMaxIndex(qValues)
123 |         return action, was_rand
124 | 
125 |     def addMemory(self, state, stateGoal, action, reward, newState, \
126 |                     newStateGoal, isFinal):
127 |         self.memory.addMemory(state, stateGoal, action, reward, newState, \
128 |                                 newStateGoal, isFinal)
129 | 
130 | 
131 |     def getMemorySize(self):
132 |         return self.memory.getCurrentSize()
133 | 
134 | 
135 |     def learnOnMiniBatch(self, miniBatchSize,):
136 | 
137 |         self.count_steps += 1
138 | 
139 |         state_batch, goals_batch, action_batch,reward_batch,newState_batch, _,isFinal_batch, \
140 |              = self.memory.getMiniBatch(miniBatchSize)
141 | 
142 |         qValues_batch = self.model.predict([np.array(state_batch), \
143 |                         np.array(goals_batch)],batch_size=miniBatchSize)
144 | 
145 |         isFinal_batch = np.array(isFinal_batch) + 0
146 | 
147 |         if self.useTargetNetwork:
148 |             qValuesNewState_batch = self.targetModel.predict_on_batch([np.array(newState_batch), \
149 |                                                np.array(goals_batch)])
150 |         else :
151 |             qValuesNewState_batch = self.model.predict_on_batch([np.array(newState_batch), \
152 |                                          np.array(goals_batch)])
153 | 
154 |         Y_sample_batch = reward_batch + (1 - isFinal_batch) * self.discountFactor * np.max(qValuesNewState_batch, axis=1)
155 | 
156 |         X_batch = [np.array(state_batch),np.array(goals_batch)]
157 |         Y_batch = np.array(qValues_batch)
158 | 
159 |         for i,action in enumerate(action_batch):
160 |             Y_batch[i][action] = Y_sample_batch[i]
161 | 
162 |         self.model.fit(X_batch, Y_batch, validation_split=0.0, batch_size = miniBatchSize, epochs=1, verbose = 0, callbacks=[self.board])
163 | 
164 |         if self.useTargetNetwork and self.count_steps % 1000 == 0:
165 |             self.updateTargetNetwork()
166 | 
167 | 
168 |     def saveModel(self, path):
169 |         if self.useTargetNetwork:
170 |             self.targetModel.save(path)
171 |         else:
172 |             self.model.save(path)
173 | 
174 |     def loadWeights(self, path):
175 |         self.model.load_weights(path)
176 |         if self.useTargetNetwork:
177 |             self.targetModel.load_weights(path)
178 | 
179 | 
180 |     def feedforward(self,observation,explorationRate):
181 |         qValues = self.getQValues(observation)
182 |         action, was_rand = self.selectAction(qValues, explorationRate)
183 |         return action, was_rand
184 | 


--------------------------------------------------------------------------------
/airsimapi.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import sys
  3 | import cv2
  4 | from PIL import Image
  5 | 
  6 | #Using AirSimClient from AirSim Repo
  7 | sys.path.append('C:/Users/juan_/git/AirSim/PythonClient')
  8 | from AirSimClient import *
  9 | 
 10 | class customAirSimClient(MultirotorClient):
 11 |     def __init__(self):
 12 |         MultirotorClient.__init__(self)
 13 |         MultirotorClient.confirmConnection(self)
 14 |         self.enableApiControl(True)
 15 |         self.armDisarm(True)
 16 |         self.takeoff()
 17 | 
 18 |         # hack instead of a classifier for objects
 19 |         # TODO include the real classifier
 20 |         # success = self.simSetSegmentationObjectID("SkySphere", 42, True)
 21 |         # print('success', success)
 22 |         success = self.simSetSegmentationObjectID("[\w]*", 0, True)
 23 |         print('Setting everything to 0', success)
 24 |         # success = self.simSetSegmentationObjectID("birch[\w]*", 2, True)
 25 |         # print('success', success)
 26 |         # success = self.simSetSegmentationObjectID("fir[\w]*", 2, True)
 27 |         # print('success', success)
 28 |         # success = self.simSetSegmentationObjectID("hedge[\w]*", 5, True)
 29 |         # print('success', success)
 30 |         #success = self.simSetSegmentationObjectID("tree[\w]*", 255, True)
 31 |         success = self.simSetSegmentationObjectID("Goal", 2)
 32 |         print('Setting our goal', success)
 33 |         #wait until it takes off
 34 |         counter = 0
 35 |         while self.getPosition().z_val > -1.45:
 36 |             print("taking off: ", self.getPosition().z_val)
 37 |             time.sleep(0.2)
 38 |         self.z = self.getPosition().z_val
 39 | 
 40 | 
 41 |     def straight(self, duration, speed):
 42 |         pitch, roll, yaw  = self.getPitchRollYaw()
 43 |         vx = math.cos(yaw) * speed
 44 |         vy = math.sin(yaw) * speed
 45 |         self.moveByVelocityZ(vx, vy, self.z, duration, DrivetrainType.ForwardOnly)
 46 |         start = time.time()
 47 |         return start, duration
 48 | 
 49 |     def yaw_right(self, duration):
 50 |         self.rotateByYawRate(30, duration)
 51 |         start = time.time()
 52 |         return start, duration
 53 | 
 54 |     def yaw_left(self, duration):
 55 |         self.rotateByYawRate(-30, duration)
 56 |         start = time.time()
 57 |         return start, duration
 58 |     """
 59 |     # According to AirSim docs, this are the drone available cameras
 60 |     # cam_id=0, center front
 61 |     # cam_id=1, left front
 62 |     # cam_id=2, right front
 63 |     # cam_id=4, center rear
 64 |     #  https://github.com/Microsoft/AirSim/blob/e169e65755f4254c29b67617f50502a262e64e63/docs/image_apis.md
 65 |     """
 66 |     def getSceneImage(self, cam_id):
 67 |         responses = self.simGetImages([ \
 68 |             ImageRequest(cam_id, AirSimImageType.Scene, False, False), \
 69 |             ])
 70 |         response = responses[0]
 71 |         img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8)
 72 |         # reshape array to 4 channel image array H X W X 4
 73 |         img_rgba = img1d.reshape(response.height, response.width, 4)
 74 |         #discard alpha, to make image smaller
 75 |         #TODO check if it influence training
 76 |         img_rgb = img_rgba[:,:,:-1]
 77 |         #cv2.imshow("Scene", img_rgba)
 78 |         #cv2.waitKey(0)
 79 |         return img_rgb[:,:,::-1]
 80 | 
 81 |     def getDepthImage(self):
 82 |         rawImage = self.simGetImages([ImageRequest(0, AirSimImageType.DepthPerspective, True, False)])[0]
 83 |         # Taken from original AirGym, transforms from float img to int image
 84 |         img1d = np.array(rawImage.image_data_float, dtype=np.float)
 85 |         img1d = 255/np.maximum(np.ones(img1d.size), img1d)
 86 |         img2d = np.reshape(img1d, (rawImage.height, rawImage.width))
 87 |         image = np.invert(np.array(Image.fromarray(img2d.astype(np.uint8), mode='L')))
 88 | 
 89 |         factor = 10
 90 |         maxIntensity = 255.0 # depends on dtype of image data
 91 |         # Decrease intensity such that dark pixels become much darker, bright pixels become slightly dark
 92 |         depth = (maxIntensity)*(image/maxIntensity)**factor
 93 |         depth = np.array(depth, dtype=np.uint8)
 94 |         return depth
 95 | 
 96 |     def getSegementationImage(self):
 97 |         rawImage = self.simGetImages([ \
 98 |             ImageRequest(0, AirSimImageType.Segmentation, False, False)])[0]
 99 |         img1d = np.fromstring(rawImage.image_data_uint8, dtype=np.uint8) #get numpy array
100 |         img_rgba = img1d.reshape(rawImage.height, rawImage.width, 4)
101 |         img_rgb = img_rgba[:,:,:-1]
102 |         # print(np.unique(img_rgba[:,:,0], return_counts=True)) #red
103 |         # print(np.unique(img_rgba[:,:,1], return_counts=True)) #green
104 |         # print(np.unique(img_rgba[:,:,2], return_counts=True)) #blue
105 |         return img_rgb
106 | 
107 |     #Harcoded currently
108 |     def getMask(self,object_mask):
109 |         #[r,g,b] = 94, 253, 92
110 |         #lower_range = np.array([g-50,b-50,r-25])
111 |         #upper_range = np.array([g+2,b+50,r+25])
112 |         [r,g,b] = 193, 103, 112
113 |         lower_range = np.array([b-3,g-3,r-3], dtype='uint8')
114 |         upper_range = np.array([b+3,g+3,r+3], dtype='uint8')
115 |         mask = cv2.inRange(object_mask, lower_range, upper_range)
116 |         return mask
117 | 
118 |     def getBbox(self,object_mask,object):
119 |         #only get an object's bounding box
120 |         width = object_mask.shape[1]
121 |         height = object_mask.shape[0]
122 |         mask = self.getMask(object_mask)
123 |         output = cv2.bitwise_and(object_mask, object_mask, mask = mask)
124 |         # cv2.imshow("mask",mask)
125 |         # cv2.imshow("side-by-side",np.hstack([object_mask, output]))
126 |         # cv2.waitKey(0)
127 |         nparray = np.array([[[0, 0]]])
128 |         pixelpointsCV2 = cv2.findNonZero(mask)
129 | 
130 |         if type(pixelpointsCV2) == type(nparray):# exist target in image
131 |             x_min = pixelpointsCV2[:,:,0].min()
132 |             x_max = pixelpointsCV2[:,:,0].max()
133 |             y_min = pixelpointsCV2[:,:,1].min()
134 |             y_max = pixelpointsCV2[:,:,1].max()
135 |             #print x_min, x_max ,y_min, y_max
136 |             box = ((x_min/float(width),y_min/float(height)),#left top
137 |                    (x_max/float(width),y_max/float(height)))#right down
138 |         else:
139 |             box = ((0,0),(0,0))
140 | 
141 |         return mask , box
142 | 
143 |     def getBboxes(self, object_mask, targets):
144 |         boxes = []
145 |         for obj in targets:
146 |             mask,box = self.getBbox(object_mask, obj)
147 |             boxes.append(box)
148 |         return boxes
149 | 
150 |     def move(self, action):
151 |         start = time.time()
152 |         duration = 0
153 |         collided = False
154 | 
155 |         if action == 0:
156 |             start, duration = self.straight(1, 4)
157 |             while duration > time.time() - start:
158 |                 if self.getCollisionInfo().has_collided == True:
159 |                     return True
160 |             self.moveByVelocity(0, 0, 0, 1)
161 |             self.rotateByYawRate(0, 1)
162 | 
163 | 
164 |         if action == 1:
165 |             start, duration = self.yaw_right(0.8)
166 |             while duration > time.time() - start:
167 |                 if self.getCollisionInfo().has_collided == True:
168 |                     return True
169 |             self.moveByVelocity(0, 0, 0, 1)
170 |             self.rotateByYawRate(0, 1)
171 | 
172 |         if action == 2:
173 |             start, duration = self.yaw_left(1)
174 |             while duration > time.time() - start:
175 |                 if self.getCollisionInfo().has_collided == True:
176 |                     return True
177 |             self.moveByVelocity(0, 0, 0, 1)
178 |             self.rotateByYawRate(0, 1)
179 | 
180 |         if action == 3:
181 |             fake = True
182 |             #start, duration = self.straight(1, 4)
183 |             #while duration > time.time() - start:
184 |             #    if self.getCollisionInfo().has_collided == True:
185 |             #        return True
186 |             #self.moveByVelocity(0, 0, 0, 1)
187 |             #self.rotateByYawRate(0, 1)
188 | 
189 |         return collided
190 | 
191 |     def reset(self):
192 |         super().reset()
193 |         time.sleep(0.2)
194 |         self.enableApiControl(True)
195 |         self.armDisarm(True)
196 |         self.takeoff()
197 |         #wait until it takes off
198 |         while self.getPosition().z_val > -1.45:
199 |             print("taking off: ", self.getPosition().z_val)
200 |             time.sleep(0.2)
201 |             counter +=1
202 |             if counter == 5:
203 |                 this.reset()
204 |         self.z = self.getPosition().z_val
205 |         return self.getDepthImage()
206 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import json
  4 | import csv
  5 | 
  6 | import numpy as np
  7 | import random
  8 | import os
  9 | 
 10 | from environment import  *
 11 | from utils import *
 12 | from constants import *
 13 | 
 14 | from matplotlib import pyplot as plt
 15 | import dqn_her
 16 | 
 17 | 
 18 | if __name__ == '__main__':
 19 | 
 20 |     env = Env(shaped_reward=False)
 21 |     obs_new = env.reset()
 22 | 
 23 |     ACTION_SIZE = 4
 24 |     # #ACTION_LIST = env.discrete_actions
 25 |     INPUT_HEIGTH=obs_new[0].image.shape[0]
 26 |     INPUT_WIDTH=obs_new[0].image.shape[1]
 27 |     INPUT_CHANNELS = 1
 28 | 
 29 |     succeed = 0
 30 | 
 31 |     #HER
 32 |     K = 4
 33 |     #load init param
 34 |     if not CONTINUE:
 35 |         explorationRate = INITIAL_EPSILON
 36 |         current_epoch = 1
 37 |         stepCounter = 0
 38 |         loadsim_seconds = 0
 39 |         Agent = dqn_her.DeepQ(ACTION_SIZE, MEMORY_SIZE, GAMMA, LEARNING_RATE, \
 40 |                           INPUT_HEIGTH, INPUT_WIDTH, INPUT_CHANNELS, \
 41 |                           USE_TARGET_NETWORK)
 42 |     else:
 43 |         #Load weights and parameter info.
 44 |         with open(params_json) as outfile:
 45 |             d = json.load(outfile)
 46 |             explorationRate = d.get('explorationRate')
 47 |             current_epoch = d.get('current_epoch')
 48 |             stepCounter = d.get('stepCounter')
 49 |             loadsim_seconds = d.get('loadsim_seconds')
 50 |             succeed = d.get('succeed')
 51 |             if succeed is None:
 52 |                 succeed = 0
 53 |             Agent = dqn_her.DeepQ(ACTION_SIZE, MEMORY_SIZE, GAMMA, \
 54 |                               LEARNING_RATE, INPUT_HEIGTH, INPUT_WIDTH,  \
 55 |                               INPUT_CHANNELS, USE_TARGET_NETWORK)
 56 |             Agent.loadWeights(weights_path)
 57 | 
 58 |     #main loop
 59 |     try:
 60 |         start_time = time.time()
 61 |         epoch_data = []
 62 |         for epoch in range(current_epoch, MAX_EPOCHS, 1):
 63 |             positionsList = [[],[]]
 64 |             agent_decision = 0
 65 | 
 66 |             obs = env.reset()
 67 |             observation = get_observation_images(obs)
 68 |             positionsList[0].append(obs[0].position.x_val)
 69 |             positionsList[1].append(obs[0].position.y_val)
 70 |             cumulated_reward = 0
 71 |             if (epoch % TEST_INTERVAL_EPOCHS != 0 or stepCounter < LEARN_START_STEP) and TRAIN is True :  # explore
 72 |                 EXPLORE = True
 73 |             else:
 74 |                 EXPLORE = False
 75 |                 print ("Evaluate Model")
 76 | 
 77 |             #borrowed from HER example
 78 |             episode_experience = []
 79 |             episode_succeeded = False
 80 | 
 81 |             for t in range(1000):
 82 | 
 83 |                 start_req = time.time()
 84 | 
 85 |                 if EXPLORE is True: #explore
 86 |                     s = observation[0]
 87 |                     g = observation[1]
 88 |                     action, was_rand = Agent.feedforward(observation, explorationRate)
 89 |                     if not was_rand:
 90 |                         agent_decision += 1
 91 |                     # if np.random.rand(1) < 0.5:
 92 |                     #     action = np.random.randint(ACTION_SIZE)
 93 |                     obs_new, reward, done = env.step(action)
 94 |                     newObservation =  get_observation_images(obs_new)
 95 | 
 96 |                     s_next = newObservation[0]
 97 |                     stepCounter += 1
 98 |                     # Agent.addMemory(observation[0], observation[1], action, \
 99 |                     #                 reward, newObservation[0], \
100 |                     #                 newObservation[1],  done)
101 |                     episode_experience.append((s,action,reward,s_next,g, done))
102 |                     observation = newObservation
103 |                     obs = obs_new
104 |                 #test
105 |                 else:
106 |                     if not RANDOM_WALK:
107 |                         action, _ = Agent.feedforward(observation,0)
108 |                     else:
109 |                         action, _ = Agent.feedforward(observation,1)
110 |                     print(action)
111 |                     obs_new, reward, done = env.step(action)
112 |                     newObservation =  get_observation_images(obs_new)
113 |                     observation = newObservation
114 |                     obs = obs_new
115 | 
116 |                 cumulated_reward += reward
117 |                 if reward == 0:
118 |                     episode_succeeded = True
119 |                     succeed += 1
120 | 
121 |                 positionsList[0].append(obs[0].position.x_val)
122 |                 positionsList[1].append(obs[0].position.y_val)
123 | 
124 |                 if done:
125 |                     print("Agent taking decisions", agent_decision,"times out of",t+1,"steps.",100*agent_decision/float(t+1),"%")
126 |                     m, sec = divmod(int(time.time() - start_time + loadsim_seconds), 60)
127 |                     h, m = divmod(m, 60)
128 | 
129 |                     if stepCounter == LEARN_START_STEP:
130 |                         print("Starting learning")
131 | 
132 |                     #Episode is done, let's add some experience to memory
133 |                     if TRAIN:
134 |                         for replay in range(t):
135 |                             s, a, r, s_n, g, d = episode_experience[t]
136 |                             Agent.addMemory(s, g, a, r, s_n, g, d)
137 |                             # K-future strategy
138 |                             for k in range(K):
139 |                                 future = np.random.randint(replay, t)
140 |                                 _, _, _, g_n, _, d = episode_experience[future]
141 |                                 final = np.allclose(s_n, g_n)
142 |                                 #only working with sparsed reward
143 |                                 r_n = 0 if final else -1
144 |                                 Agent.addMemory(s, g_n, a, r_n, s_n, g_n, d)
145 | 
146 |                         if Agent.getMemorySize() >= LEARN_START_STEP:
147 |                             Agent.learnOnMiniBatch(BATCH_SIZE)
148 |                             #print("Episode Done, Learning time")
149 |                             if explorationRate > FINAL_EPSILON and stepCounter > LEARN_START_STEP:
150 |                                 explorationRate -= (INITIAL_EPSILON - FINAL_EPSILON) / MAX_EXPLORE_STEPS
151 | 
152 |                     print ("EP " + str(epoch) +" Csteps= " + str(stepCounter) + " - {} steps".format(t + 1) + " - CReward: " + str(
153 |                         round(cumulated_reward, 2)) + "  Eps=" + str(round(explorationRate, 2)) + "  Success rate=" + str(round(succeed/float(epoch)*100, 2)) + "  Time: %d:%02d:%02d" % (h, m, sec))
154 |                     epoch_data.append([str(epoch), str(stepCounter),t + 1, str(
155 |                         round(cumulated_reward, 2)), str(round(explorationRate, 2)), str(round(succeed/float(epoch)*100, 2)), "%d:%02d:%02d" % (h, m, sec)])
156 |                     with open(DATA_FILE, 'a') as myfile:
157 |                         wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
158 |                         for data in epoch_data:
159 |                             wr.writerow(data)
160 |                     epoch_data = []
161 |                     # SAVE SIMULATION DATA
162 |                     if epoch % SAVE_INTERVAL_EPOCHS == 0 and TRAIN is True:
163 |                         # save model weights and monitoring data
164 |                         print ('Save model')
165 |                         Agent.saveModel(MODEL_DIR + '/dqn_her_ep' + str(epoch) + '.h5')
166 | 
167 |                         parameter_keys = ['explorationRate', 'current_epoch','stepCounter', 'FINAL_EPSILON','loadsim_seconds','succeed']
168 |                         parameter_values = [explorationRate, epoch, stepCounter,FINAL_EPSILON, int(time.time() - start_time + loadsim_seconds), succeed]
169 |                         parameter_dictionary = dict(zip(parameter_keys, parameter_values))
170 |                         with open(PARAM_DIR + '/dqn_her_ep' + str(epoch) + '.json','w') as outfile:
171 |                             json.dump(parameter_dictionary, outfile)
172 | 
173 | 
174 |                     plt.title('Traveled path of agent on epoch '+str(epoch))
175 | 
176 |                     #for i in np.arange(0,len(positionsList[0]),2):
177 |                     #    if(i+2 <= len(positionsList[0])):
178 |                     #        plt.plot(positionsList[0][i:i+2],positionsList[1][i:i+2],'k-')
179 |                     #plt.savefig(VIZ_DIR + '/dqn_her_ep' + str(epoch) + '.png', format='png')
180 |                     plt.scatter(positionsList[0],positionsList[1])
181 |                     plt.xlim(0, 20)
182 |                     plt.ylim(-10, 10)
183 |                     plt.plot(positionsList[0],positionsList[1])
184 |                     plt.savefig(VIZ_DIR + '/dqn_her_ep' + str(epoch) + '.png', format='png')
185 |                     plt.close()
186 |                     break
187 | 
188 |     except KeyboardInterrupt:
189 |         print("Shutting down")
190 | 


--------------------------------------------------------------------------------