├── .gitignore
├── AirSimClient.py
├── LICENSE.md
├── README.md
├── airsim.png
├── collision_testing.py
├── collision_training.py
├── image_collection.py
├── image_helper.py
└── tf_softmax_layer.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | carpix/
3 | logistic_logs/
4 | data/
5 | *.pkl
6 | 


--------------------------------------------------------------------------------
/AirSimClient.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import msgpackrpc #install as admin: pip install msgpack-rpc-python
  3 | import numpy as np #pip install numpy
  4 | import msgpack
  5 | import math
  6 | import time
  7 | import sys
  8 | import os
  9 | import inspect
 10 | import types
 11 | import re
 12 | 
 13 | 
 14 | class MsgpackMixin:
 15 |     def to_msgpack(self, *args, **kwargs):
 16 |         return self.__dict__ #msgpack.dump(self.to_dict(*args, **kwargs))
 17 | 
 18 |     @classmethod
 19 |     def from_msgpack(cls, encoded):
 20 |         obj = cls()
 21 |         obj.__dict__ = {k.decode('utf-8'): v for k, v in encoded.items()}
 22 |         return obj
 23 | 
 24 | 
 25 | class AirSimImageType:    
 26 |     Scene = 0
 27 |     DepthPlanner = 1
 28 |     DepthPerspective = 2
 29 |     DepthVis = 3
 30 |     DisparityNormalized = 4
 31 |     Segmentation = 5
 32 |     SurfaceNormals = 6
 33 | 
 34 | class DrivetrainType:
 35 |     MaxDegreeOfFreedom = 0
 36 |     ForwardOnly = 1
 37 |     
 38 | class LandedState:
 39 |     Landed = 0
 40 |     Flying = 1
 41 | 
 42 | class Vector3r(MsgpackMixin):
 43 |     x_val = np.float32(0)
 44 |     y_val = np.float32(0)
 45 |     z_val = np.float32(0)
 46 | 
 47 |     def __init__(self, x_val = np.float32(0), y_val = np.float32(0), z_val = np.float32(0)):
 48 |         self.x_val = x_val
 49 |         self.y_val = y_val
 50 |         self.z_val = z_val
 51 | 
 52 | 
 53 | class Quaternionr(MsgpackMixin):
 54 |     w_val = np.float32(0)
 55 |     x_val = np.float32(0)
 56 |     y_val = np.float32(0)
 57 |     z_val = np.float32(0)
 58 | 
 59 |     def __init__(self, x_val = np.float32(0), y_val = np.float32(0), z_val = np.float32(0), w_val = np.float32(1)):
 60 |         self.x_val = x_val
 61 |         self.y_val = y_val
 62 |         self.z_val = z_val
 63 |         self.w_val = w_val
 64 | 
 65 | class Pose(MsgpackMixin):
 66 |     position = Vector3r()
 67 |     orientation = Quaternionr()
 68 | 
 69 |     def __init__(self, position_val, orientation_val):
 70 |         self.position = position_val
 71 |         self.orientation = orientation_val
 72 | 
 73 | 
 74 | class CollisionInfo(MsgpackMixin):
 75 |     has_collided = False
 76 |     normal = Vector3r()
 77 |     impact_point = Vector3r()
 78 |     position = Vector3r()
 79 |     penetration_depth = np.float32(0)
 80 |     time_stamp = np.float32(0)
 81 |     object_name = ""
 82 |     object_id = -1
 83 | 
 84 | class GeoPoint(MsgpackMixin):
 85 |     latitude = 0.0
 86 |     longitude = 0.0
 87 |     altitude = 0.0
 88 | 
 89 | class YawMode(MsgpackMixin):
 90 |     is_rate = True
 91 |     yaw_or_rate = 0.0
 92 |     def __init__(self, is_rate = True, yaw_or_rate = 0.0):
 93 |         self.is_rate = is_rate
 94 |         self.yaw_or_rate = yaw_or_rate
 95 | 
 96 | class ImageRequest(MsgpackMixin):
 97 |     camera_id = np.uint8(0)
 98 |     image_type = AirSimImageType.Scene
 99 |     pixels_as_float = False
100 |     compress = False
101 | 
102 |     def __init__(self, camera_id, image_type, pixels_as_float = False, compress = True):
103 |         self.camera_id = camera_id
104 |         self.image_type = image_type
105 |         self.pixels_as_float = pixels_as_float
106 |         self.compress = compress
107 | 
108 | 
109 | class ImageResponse(MsgpackMixin):
110 |     image_data_uint8 = np.uint8(0)
111 |     image_data_float = np.float32(0)
112 |     camera_position = Vector3r()
113 |     camera_orientation = Quaternionr()
114 |     time_stamp = np.uint64(0)
115 |     message = ''
116 |     pixels_as_float = np.float32(0)
117 |     compress = True
118 |     width = 0
119 |     height = 0
120 |     image_type = AirSimImageType.Scene
121 | 
122 | class CarControls(MsgpackMixin):
123 |     throttle = np.float32(0)
124 |     steering = np.float32(0)
125 |     brake = np.float32(0)
126 |     handbrake = False
127 |     is_manual_gear = False
128 |     manual_gear = 0
129 |     gear_immediate = True
130 | 
131 |     def set_throttle(self, throttle_val, forward):
132 |         if (forward):
133 |             is_manual_gear = False
134 |             manual_gear = 0
135 |             throttle = abs(throttle_val)
136 |         else:
137 |             is_manual_gear = False
138 |             manual_gear = -1
139 |             throttle = - abs(throttle_val)
140 | 
141 | class CarState(MsgpackMixin):
142 |     speed = np.float32(0)
143 |     gear = 0
144 |     position = Vector3r()
145 |     velocity = Vector3r()
146 |     orientation = Quaternionr()
147 | 
148 | class AirSimClientBase:
149 |     def __init__(self, ip, port):
150 |         self.client = msgpackrpc.Client(msgpackrpc.Address(ip, port), timeout = 3600)
151 |         
152 |     def ping(self):
153 |         return self.client.call('ping')
154 |     
155 |     def reset(self):
156 |         self.client.call('reset')
157 | 
158 |     def confirmConnection(self):
159 |         print('Waiting for connection: ', end='')
160 |         home = self.getHomeGeoPoint()
161 |         while ((home.latitude == 0 and home.longitude == 0 and home.altitude == 0) or
162 |                 math.isnan(home.latitude) or  math.isnan(home.longitude) or  math.isnan(home.altitude)):
163 |             time.sleep(1)
164 |             home = self.getHomeGeoPoint()
165 |             print('X', end='')
166 |         print('')
167 | 
168 |     def getHomeGeoPoint(self):
169 |         return GeoPoint.from_msgpack(self.client.call('getHomeGeoPoint'))
170 | 
171 |     # basic flight control
172 |     def enableApiControl(self, is_enabled):
173 |         return self.client.call('enableApiControl', is_enabled)
174 |     def isApiControlEnabled(self):
175 |         return self.client.call('isApiControlEnabled')
176 | 
177 |     def simSetSegmentationObjectID(self, mesh_name, object_id, is_name_regex = False):
178 |         return self.client.call('simSetSegmentationObjectID', mesh_name, object_id, is_name_regex)
179 |     def simGetSegmentationObjectID(self, mesh_name):
180 |         return self.client.call('simGetSegmentationObjectID', mesh_name)
181 |             
182 |     # camera control
183 |     # simGetImage returns compressed png in array of bytes
184 |     # image_type uses one of the AirSimImageType members
185 |     def simGetImage(self, camera_id, image_type):
186 |         # because this method returns std::vector<uint8>, msgpack decides to encode it as a string unfortunately.
187 |         result = self.client.call('simGetImage', camera_id, image_type)
188 |         if (result == "" or result == "\0"):
189 |             return None
190 |         return result
191 | 
192 |     # camera control
193 |     # simGetImage returns compressed png in array of bytes
194 |     # image_type uses one of the AirSimImageType members
195 |     def simGetImages(self, requests):
196 |         responses_raw = self.client.call('simGetImages', requests)
197 |         return [ImageResponse.from_msgpack(response_raw) for response_raw in responses_raw]
198 | 
199 |     def getCollisionInfo(self):
200 |         return CollisionInfo.from_msgpack(self.client.call('getCollisionInfo'))
201 | 
202 |     @staticmethod
203 |     def stringToUint8Array(bstr):
204 |         return np.fromstring(bstr, np.uint8)
205 |     @staticmethod
206 |     def stringToFloatArray(bstr):
207 |         return np.fromstring(bstr, np.float32)
208 |     @staticmethod
209 |     def listTo2DFloatArray(flst, width, height):
210 |         return np.reshape(np.asarray(flst, np.float32), (height, width))
211 |     @staticmethod
212 |     def getPfmArray(response):
213 |         return AirSimClientBase.listTo2DFloatArray(response.image_data_float, response.width, response.height)
214 | 
215 |     @staticmethod
216 |     def get_public_fields(obj):
217 |         return [attr for attr in dir(obj)
218 |                              if not (attr.startswith("_") 
219 |                                 or inspect.isbuiltin(attr)
220 |                                 or inspect.isfunction(attr)
221 |                                 or inspect.ismethod(attr))]
222 | 
223 | 
224 |     @staticmethod
225 |     def to_dict(obj):
226 |         return dict([attr, getattr(obj, attr)] for attr in AirSimClientBase.get_public_fields(obj))
227 | 
228 |     @staticmethod
229 |     def to_str(obj):
230 |         return str(AirSimClientBase.to_dict(obj))
231 | 
232 |     @staticmethod
233 |     def write_file(filename, bstr):
234 |         with open(filename, 'wb') as afile:
235 |             afile.write(bstr)
236 | 
237 |     def simSetPose(self, pose, ignore_collison):
238 |         self.client.call('simSetPose', pose, ignore_collison)
239 | 
240 |     def simGetPose(self):
241 |         return self.client.call('simGetPose')
242 | 
243 |     # helper method for converting getOrientation to roll/pitch/yaw
244 |     # https:#en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles
245 |     @staticmethod
246 |     def toEulerianAngle(q):
247 |         z = q.z_val
248 |         y = q.y_val
249 |         x = q.x_val
250 |         w = q.w_val
251 |         ysqr = y * y
252 | 
253 |         # roll (x-axis rotation)
254 |         t0 = +2.0 * (w*x + y*z)
255 |         t1 = +1.0 - 2.0*(x*x + ysqr)
256 |         roll = math.atan2(t0, t1)
257 | 
258 |         # pitch (y-axis rotation)
259 |         t2 = +2.0 * (w*y - z*x)
260 |         if (t2 > 1.0):
261 |             t2 = 1
262 |         if (t2 < -1.0):
263 |             t2 = -1.0
264 |         pitch = math.asin(t2)
265 | 
266 |         # yaw (z-axis rotation)
267 |         t3 = +2.0 * (w*z + x*y)
268 |         t4 = +1.0 - 2.0 * (ysqr + z*z)
269 |         yaw = math.atan2(t3, t4)
270 | 
271 |         return (pitch, roll, yaw)
272 | 
273 |     @staticmethod
274 |     def toQuaternion(pitch, roll, yaw):
275 |         t0 = math.cos(yaw * 0.5)
276 |         t1 = math.sin(yaw * 0.5)
277 |         t2 = math.cos(roll * 0.5)
278 |         t3 = math.sin(roll * 0.5)
279 |         t4 = math.cos(pitch * 0.5)
280 |         t5 = math.sin(pitch * 0.5)
281 | 
282 |         q = Quaternionr()
283 |         q.w_val = t0 * t2 * t4 + t1 * t3 * t5 #w
284 |         q.x_val = t0 * t3 * t4 - t1 * t2 * t5 #x
285 |         q.y_val = t0 * t2 * t5 + t1 * t3 * t4 #y
286 |         q.z_val = t1 * t2 * t4 - t0 * t3 * t5 #z
287 |         return q
288 | 
289 |     @staticmethod
290 |     def wait_key(message = ''):
291 |         ''' Wait for a key press on the console and return it. '''
292 |         if message != '':
293 |             print (message)
294 | 
295 |         result = None
296 |         if os.name == 'nt':
297 |             import msvcrt
298 |             result = msvcrt.getch()
299 |         else:
300 |             import termios
301 |             fd = sys.stdin.fileno()
302 | 
303 |             oldterm = termios.tcgetattr(fd)
304 |             newattr = termios.tcgetattr(fd)
305 |             newattr[3] = newattr[3] & ~termios.ICANON & ~termios.ECHO
306 |             termios.tcsetattr(fd, termios.TCSANOW, newattr)
307 | 
308 |             try:
309 |                 result = sys.stdin.read(1)
310 |             except IOError:
311 |                 pass
312 |             finally:
313 |                 termios.tcsetattr(fd, termios.TCSAFLUSH, oldterm)
314 | 
315 |         return result
316 | 
317 |     @staticmethod
318 |     def read_pfm(file):
319 |         """ Read a pfm file """
320 |         file = open(file, 'rb')
321 | 
322 |         color = None
323 |         width = None
324 |         height = None
325 |         scale = None
326 |         endian = None
327 | 
328 |         header = file.readline().rstrip()
329 |         header = str(bytes.decode(header, encoding='utf-8'))
330 |         if header == 'PF':
331 |             color = True
332 |         elif header == 'Pf':
333 |             color = False
334 |         else:
335 |             raise Exception('Not a PFM file.')
336 | 
337 |         temp_str = str(bytes.decode(file.readline(), encoding='utf-8'))
338 |         dim_match = re.match(r'^(\d+)\s(\d+)\s$', temp_str)
339 |         if dim_match:
340 |             width, height = map(int, dim_match.groups())
341 |         else:
342 |             raise Exception('Malformed PFM header.')
343 | 
344 |         scale = float(file.readline().rstrip())
345 |         if scale < 0: # little-endian
346 |             endian = '<'
347 |             scale = -scale
348 |         else:
349 |             endian = '>' # big-endian
350 | 
351 |         data = np.fromfile(file, endian + 'f')
352 |         shape = (height, width, 3) if color else (height, width)
353 | 
354 |         data = np.reshape(data, shape)
355 |         # DEY: I don't know why this was there.
356 |         #data = np.flipud(data)
357 |         file.close()
358 |     
359 |         return data, scale
360 | 
361 |     @staticmethod
362 |     def write_pfm(file, image, scale=1):
363 |         """ Write a pfm file """
364 |         file = open(file, 'wb')
365 | 
366 |         color = None
367 | 
368 |         if image.dtype.name != 'float32':
369 |             raise Exception('Image dtype must be float32.')
370 | 
371 |         image = np.flipud(image)
372 | 
373 |         if len(image.shape) == 3 and image.shape[2] == 3: # color image
374 |             color = True
375 |         elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
376 |             color = False
377 |         else:
378 |             raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')
379 | 
380 |         file.write('PF\n'.encode('utf-8')  if color else 'Pf\n'.encode('utf-8'))
381 |         temp_str = '%d %d\n' % (image.shape[1], image.shape[0])
382 |         file.write(temp_str.encode('utf-8'))
383 | 
384 |         endian = image.dtype.byteorder
385 | 
386 |         if endian == '<' or endian == '=' and sys.byteorder == 'little':
387 |             scale = -scale
388 | 
389 |         temp_str = '%f\n' % scale
390 |         file.write(temp_str.encode('utf-8'))
391 | 
392 |         image.tofile(file)
393 | 
394 |     @staticmethod
395 |     def write_png(filename, image):
396 |         """ image must be numpy array H X W X channels
397 |         """
398 |         import zlib, struct
399 | 
400 |         buf = image.flatten().tobytes()
401 |         width = image.shape[1]
402 |         height = image.shape[0]
403 | 
404 |         # reverse the vertical line order and add null bytes at the start
405 |         width_byte_4 = width * 4
406 |         raw_data = b''.join(b'\x00' + buf[span:span + width_byte_4]
407 |                             for span in range((height - 1) * width_byte_4, -1, - width_byte_4))
408 | 
409 |         def png_pack(png_tag, data):
410 |             chunk_head = png_tag + data
411 |             return (struct.pack("!I", len(data)) +
412 |                     chunk_head +
413 |                     struct.pack("!I", 0xFFFFFFFF & zlib.crc32(chunk_head)))
414 | 
415 |         png_bytes = b''.join([
416 |             b'\x89PNG\r\n\x1a\n',
417 |             png_pack(b'IHDR', struct.pack("!2I5B", width, height, 8, 6, 0, 0, 0)),
418 |             png_pack(b'IDAT', zlib.compress(raw_data, 9)),
419 |             png_pack(b'IEND', b'')])
420 | 
421 |         AirSimClientBase.write_file(filename, png_bytes)
422 | 
423 | 
424 | # -----------------------------------  Multirotor APIs ---------------------------------------------
425 | class MultirotorClient(AirSimClientBase, object):
426 |     def __init__(self, ip = ""):
427 |         if (ip == ""):
428 |             ip = "127.0.0.1"
429 |         super(MultirotorClient, self).__init__(ip, 41451)
430 | 
431 |     def armDisarm(self, arm):
432 |         return self.client.call('armDisarm', arm)
433 | 
434 |     def takeoff(self, max_wait_seconds = 15):
435 |         return self.client.call('takeoff', max_wait_seconds)
436 |         
437 |     def land(self, max_wait_seconds = 60):
438 |         return self.client.call('land', max_wait_seconds)
439 |         
440 |     def goHome(self):
441 |         return self.client.call('goHome')
442 | 
443 |     def hover(self):
444 |         return self.client.call('hover')
445 | 
446 |         
447 |     # query vehicle state
448 |     def getPosition(self):
449 |         return Vector3r.from_msgpack(self.client.call('getPosition'))
450 |     def getVelocity(self):
451 |         return Vector3r.from_msgpack(self.client.call('getVelocity'))
452 |     def getOrientation(self):
453 |         return Quaternionr.from_msgpack(self.client.call('getOrientation'))
454 |     def getLandedState(self):
455 |         return self.client.call('getLandedState')
456 |     def getGpsLocation(self):
457 |         return GeoPoint.from_msgpack(self.client.call('getGpsLocation'))
458 |     def getPitchRollYaw(self):
459 |         return self.toEulerianAngle(self.getOrientation())
460 | 
461 |     #def getRCData(self):
462 |     #    return self.client.call('getRCData')
463 |     def timestampNow(self):
464 |         return self.client.call('timestampNow')
465 |     def isApiControlEnabled(self):
466 |         return self.client.call('isApiControlEnabled')
467 |     def isSimulationMode(self):
468 |         return self.client.call('isSimulationMode')
469 |     def getServerDebugInfo(self):
470 |         return self.client.call('getServerDebugInfo')
471 | 
472 | 
473 |     # APIs for control
474 |     def moveByAngle(self, pitch, roll, z, yaw, duration):
475 |         return self.client.call('moveByAngle', pitch, roll, z, yaw, duration)
476 | 
477 |     def moveByVelocity(self, vx, vy, vz, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()):
478 |         return self.client.call('moveByVelocity', vx, vy, vz, duration, drivetrain, yaw_mode)
479 | 
480 |     def moveByVelocityZ(self, vx, vy, z, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()):
481 |         return self.client.call('moveByVelocityZ', vx, vy, z, duration, drivetrain, yaw_mode)
482 | 
483 |     def moveOnPath(self, path, velocity, max_wait_seconds = 60, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1):
484 |         return self.client.call('moveOnPath', path, velocity, max_wait_seconds, drivetrain, yaw_mode, lookahead, adaptive_lookahead)
485 | 
486 |     def moveToZ(self, z, velocity, max_wait_seconds = 60, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1):
487 |         return self.client.call('moveToZ', z, velocity, max_wait_seconds, yaw_mode, lookahead, adaptive_lookahead)
488 | 
489 |     def moveToPosition(self, x, y, z, velocity, max_wait_seconds = 60, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1):
490 |         return self.client.call('moveToPosition', x, y, z, velocity, max_wait_seconds, drivetrain, yaw_mode, lookahead, adaptive_lookahead)
491 | 
492 |     def moveByManual(self, vx_max, vy_max, z_min, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()):
493 |         return self.client.call('moveByManual', vx_max, vy_max, z_min, duration, drivetrain, yaw_mode)
494 | 
495 |     def rotateToYaw(self, yaw, max_wait_seconds = 60, margin = 5):
496 |         return self.client.call('rotateToYaw', yaw, max_wait_seconds, margin)
497 | 
498 |     def rotateByYawRate(self, yaw_rate, duration):
499 |         return self.client.call('rotateByYawRate', yaw_rate, duration)
500 | 
501 | # -----------------------------------  Car APIs ---------------------------------------------
502 | class CarClient(AirSimClientBase, object):
503 |     def __init__(self, ip = ""):
504 |         if (ip == ""):
505 |             ip = "127.0.0.1"
506 |         super(CarClient, self).__init__(ip, 42451)
507 | 
508 |     def setCarControls(self, controls):
509 |         self.client.call('setCarControls', controls)
510 | 
511 |     def getCarState(self):
512 |         state_raw = self.client.call('getCarState')
513 |         return CarState.from_msgpack(state_raw)
514 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | AirSimTensorFlow: A simple example of using Microsoft AirSim to train a TensorFlow neural net on collision avoidance
 2 | 
 3 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
 4 | Simon D. Levy, Will McMurtry, Jacob Rosen
 5 | 
 6 | All rights reserved. 
 7 | 
 8 | MIT License
 9 | 
10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a href="https://www.youtube.com/watch?v=9bEPKMctNpI"><img src="airsim.png"></a>
 2 | 
 3 | This repository contains Python scripts showing how you can use [Microsoft AirSim](https://github.com/Microsoft/AirSim) to collect image data
 4 | from a moving vehicle, then use that data to train and test a deep-learning neural net in TensorFlow.  
 5 | 
 6 | # Prerequisites
 7 | 
 8 | * [Recommended hardware](https://wiki.unrealengine.com/Recommended_Hardware) for running UnrealEngine4, required
 9 | for AirSim.  Although it is possible build AirSim on OS X and Linux, we found
10 | it easiest to use the pre-compiled Windows binaries in the
11 | [Neighborhood](https://github.com/Microsoft/AirSim/releases/download/v1.1.7/Neighbourhood.zip)
12 | example.
13 | 
14 | * [Python3](https://www.python.org/ftp/python/3.6.3/python-3.6.3-amd64.exe) for 64-bit Windows
15 | 
16 | * [TensorFlow](https://www.tensorflow.org/install/install_windows). To run TensorFlow on your GPU as we and
17 | most people do, you'll need to follow the 
18 | [directions](https://www.tensorflow.org/install/install_windows) for installing CUDA and CuDNN.  We recommend setting aside at least an hour to make sure you do this right.
19 | 
20 | # Instructions
21 | 
22 | 1. Clone this repository.
23 | 2. Download and unzip the [Neighborhood](https://github.com/Microsoft/AirSim/releases/download/v1.1.7/Neighbourhood.zip)
24 | example, open it, and click <b>run.bat</b> to launch AirSim.  
25 | 3. When prompted, go with the default car simulation. If you press the <b>3</b> key on your keyboard,
26 | you will see the little image on which the neural net will be trained.
27 | 4. From the repository, run the <b>image_collection.py</b> script.  It will start the car moving and stop when the
28 | car collides with the fence, creating a <b>carpix</b> folder containing the images on which you will train 
29 | the network in the next step.
30 | 5. From the repository, run the <b>collision_training.py</b> script.  Running on an HP Z440 workstation with 
31 | NVIDIA GeForce GTX 1080 Ti GPU, we were able to complete the 500 training iterations in a few seconds.
32 | 6. From the repository, run the <b>collision_testing.py</b> script.  This should drive the car forward as before, but 
33 | but the car should stop right before it hits the fence, based on the collision predicted by the neural net.
34 | 
35 | # How it works
36 | 
37 | The <b>image_collection</b> script maintains a queue of the ten most recent images and saves them to numbered
38 | files in the <b>carpix</b> folder.  The <b>collision_training</b> script converts these color images to
39 | grayscale, then builds a training set in which all images but the final one are labeled as safe (no
40 | collision; code <tt>[1 0]</tt>), and the final one is labeled as a collision (code <tt>[0 1]</tt>).  
41 | Finally, this training script uses Python's built-in <tt>pickle</tt> library to 
42 | [save](https://github.com/simondlevy/AirSimTensorFlow/blob/master/collision_training.py#L111-L113)
43 | the trained network parameters (weights and biases).  The <b>collision_testing</b> script uses <tt>pickle</tt> to
44 | [restore](https://github.com/simondlevy/AirSimTensorFlow/blob/master/collision_testing.py#L42-L45)
45 | these parameters, then reconstructs the TensorFlow [neural net](https://github.com/simondlevy/AirSimTensorFlow/blob/master/tf_softmax_layer.py#L18-L28) from them.  (We found this approach easier than
46 | using TensorFlow's [save-and-restore](https://www.tensorflow.org/programmers_guide/saved_model) API.)
47 | Finally, the <b>collision_testing</b> script moves the vehicle forward, converting the live 
48 | image into grayscale and running it through the network to make a collision/no-collision prediction.
49 | When the value of the &ldquo;collision bit&rdquo; exceeds 0.5, the script stops the vehicle by applying the brakes.
50 | 
51 | # Future work
52 | 
53 | Our single-layer logistic regression network provides a simple proof-of-concept
54 | example; however, for a more realistic data set involving collisions with
55 | different types of objects, a convolutional network would make more sense.
56 | AirSim also provides access to depth images (just press the <b>1</b> key during
57 | the simulation) which, like the Lidar on today's self-driving cars, would
58 | provide a valuable additional source of information for avoiding collisions.
59 | 
60 | # Credits
61 | 
62 | This code represents the combined work of two teams in Prof. Simon D. Levy's fall 2017 AI course 
63 | ([CSCI 315](http://home.wlu.edu/~levys/courses/csci315f2017/)) at 
64 | Washington and Lee University (listed alphabetically):
65 | 
66 | * Jack Baird 
67 | * Alex Cantrell 
68 | * Keith Denning 
69 | * Rajwol Joshi
70 | * Will McMurtry 
71 | * Jacob Rosen
72 | 
73 | # Acknowledgement
74 | 
75 | We thank David Pfaff of the [W&L IQ Center](https://www.wlu.edu/iq-center) for
76 | providing the hardware on which we developed this project.
77 | 


--------------------------------------------------------------------------------
/airsim.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simondlevy/AirSimTensorFlow/998fec23f3d717d8aa5d407bfc41c4ad05e2c208/airsim.png


--------------------------------------------------------------------------------
/collision_testing.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | collision_testing.py : tests pickled network on ability to predict a collision
 4 | 
 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
 6 | Simon D. Levy, Will McMurtry, Jacob Rosen
 7 | 
 8 | This file is part of AirSimTensorFlow
 9 | 
10 | MIT License
11 | '''
12 | 
13 | from AirSimClient import CarClient, CarControls, ImageRequest, AirSimImageType, AirSimClientBase
14 | import os
15 | import time
16 | import tensorflow as tf
17 | import pickle
18 | import sys
19 | 
20 | from image_helper import loadgray, IMAGEDIR
21 | from tf_softmax_layer import inference
22 | 
23 | TMPFILE = IMAGEDIR + '/active.png'
24 | PARAMFILE = 'params.pkl'
25 | IMGSIZE = 1032
26 | INITIAL_THROTTLE= 0.65
27 | BRAKING_DURATION = 15
28 | 
29 | # connect to the AirSim simulator 
30 | client = CarClient()
31 | client.confirmConnection()
32 | print('Connected')
33 | client.enableApiControl(True)
34 | car_controls = CarControls()
35 | 
36 | client.reset()
37 | 
38 | # go forward
39 | car_controls.throttle = INITIAL_THROTTLE
40 | car_controls.steering = 0
41 | client.setCarControls(car_controls)
42 | 
43 | # Load saved training params as ordinary NumPy
44 | W,b = pickle.load(open('params.pkl', 'rb'))
45 | 
46 | with tf.Graph().as_default():
47 | 
48 |     # Placeholder for an image
49 |     x = tf.placeholder('float', [None, IMGSIZE])
50 | 
51 |     # Our inference engine, intialized with weights we just loaded
52 |     output = inference(x, IMGSIZE, 2, W, b)
53 | 
54 |     # TensorFlow initialization boilerplate
55 |     sess = tf.Session()
56 |     init_op = tf.global_variables_initializer()
57 |     sess.run(init_op)
58 | 
59 |     # Once the brakes come on, we need to keep them on for a while before exiting; otherwise,
60 |     # the vehicle will resume moving.
61 |     brakingCount = 0
62 | 
63 |     # Loop until we detect a collision
64 |     while True:
65 | 
66 |         # Get RGBA camera images from the car
67 |         responses = client.simGetImages([ImageRequest(1, AirSimImageType.Scene)])
68 | 
69 |         # Save it to a temporary file
70 |         image = responses[0].image_data_uint8
71 |         AirSimClientBase.write_file(os.path.normpath(TMPFILE), image)
72 | 
73 |         # Read-load the image as a grayscale array
74 |         image = loadgray(TMPFILE)
75 | 
76 |         # Run the image through our inference engine.
77 |         # Engine returns a softmax output inside a list, so we grab the first
78 |         # element of the list (the actual softmax vector), whose second element
79 |         # is the absence of an obstacle.
80 |         safety = sess.run(output, feed_dict={x:[image]})[0][1]
81 | 
82 |         # Slam on the brakes if it ain't safe!
83 |         if safety < 0.5:
84 | 
85 |             if brakingCount > BRAKING_DURATION:
86 |                 print('BRAKING TO AVOID COLLISSION')
87 |                 sys.stdout.flush()
88 |                 break
89 |             
90 |             car_controls.brake = 1.0
91 |             client.setCarControls(car_controls)
92 | 
93 |             brakingCount += 1
94 |             
95 |         # Wait a bit on each iteration
96 |         time.sleep(0.1)
97 | 
98 | client.enableApiControl(False)
99 | 


--------------------------------------------------------------------------------
/collision_training.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | '''
  3 | collision_training.py : uses stored images to training a neural net to detect collisions
  4 | 
  5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
  6 | Simon D. Levy, Will McMurtry, Jacob Rosen
  7 | 
  8 | This file is part of AirSimTensorFlow
  9 | 
 10 | MIT License
 11 | '''
 12 | 
 13 | # Built-in modules
 14 | import tensorflow as tf
 15 | import numpy as np
 16 | import pickle
 17 | 
 18 | # Modules for this project
 19 | from image_helper import loadgray
 20 | from tf_softmax_layer import inference
 21 | 
 22 | # Final image is crash; previous are no-crash
 23 | SAFESIZE = 5
 24 | 
 25 | # Where we've stored images
 26 | IMAGEDIR = './carpix'
 27 | 
 28 | # Where we'll store weights and biases
 29 | PARAMFILE = 'params.pkl'
 30 | 
 31 | # Parameters
 32 | learning_rate = 0.01
 33 | training_epochs = 500
 34 | batch_size = 100
 35 | display_step = 10
 36 | 
 37 | def loss(output, y):
 38 |     dot_product = y * tf.log(output)
 39 | 
 40 |     # Reduction along axis 0 collapses each column into a single
 41 |     # value, whereas reduction along axis 1 collapses each row 
 42 |     # into a single value. In general, reduction along axis i 
 43 |     # collapses the ith dimension of a tensor to size 1.
 44 |     xentropy = -tf.reduce_sum(dot_product, axis=1)
 45 |      
 46 |     loss = tf.reduce_mean(xentropy)
 47 | 
 48 |     return loss
 49 | 
 50 | def training(cost, global_step):
 51 | 
 52 |     tf.summary.scalar('cost', cost)
 53 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 54 |     train_op = optimizer.minimize(cost, global_step=global_step)
 55 | 
 56 |     return train_op
 57 | 
 58 | def main():
 59 | 
 60 |     # This will get number of pixels in each image (they must all be the same!)
 61 |     imgsize = 0
 62 | 
 63 |     # Read in images from car, convert to grayscale, scale down, and flatten for use as input
 64 |     images = []
 65 |     for k in range(SAFESIZE):
 66 | 
 67 |         image = loadgray(IMAGEDIR + '/image%03d.png' % k)
 68 |                 
 69 |         imgsize = np.prod(image.shape)
 70 |         images.append(image)
 71 | 
 72 |     # All but last image is safe (01 = no-crash; 10 = crash)
 73 |     targets = []
 74 |     for k in range(SAFESIZE-1):
 75 |         targets.append([0,1])
 76 |     targets.append([1,0])
 77 |     
 78 |     with tf.Graph().as_default():
 79 | 
 80 |         x = tf.placeholder('float', [None, imgsize]) # car FPV images
 81 |         y = tf.placeholder('float', [None, 2])       # 01 = no-crash; 10 = crash
 82 | 
 83 |         output = inference(x, imgsize, 2)
 84 | 
 85 |         cost = loss(output, y)
 86 | 
 87 |         global_step = tf.Variable(0, name='global_step', trainable=False)
 88 | 
 89 |         train_op = training(cost, global_step)
 90 | 
 91 |         sess = tf.Session()
 92 | 
 93 |         init_op = tf.global_variables_initializer()
 94 | 
 95 |         sess.run(init_op)
 96 | 
 97 |         # Training cycle
 98 |         for epoch in range(training_epochs):
 99 |                 
100 |             # Fit training using batch data
101 |             sess.run(train_op, feed_dict={x: images, y: targets})
102 |                 
103 |             # Compute average loss
104 |             avg_cost = sess.run(cost, feed_dict={x: images, y: targets})
105 |                 
106 |             # Display logs per epoch step
107 |             if epoch%display_step == 0:
108 |                 print('Epoch:', '%04d' % epoch, 'cost =', '{:.9f}'.format(avg_cost))
109 | 
110 |         print('Optimization Finished; saving weights to ' + PARAMFILE)
111 |         params = [sess.run(param) for param in tf.trainable_variables()]
112 |         
113 |         pickle.dump( params, open(PARAMFILE, 'wb'))
114 | 
115 | if __name__ == '__main__':
116 | 
117 |     main()
118 | 


--------------------------------------------------------------------------------
/image_collection.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | '''
 3 | image_collection.py : uses AirSim to collect vehicle first-person-view images
 4 | 
 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
 6 | Simon D. Levy, Will McMurtry, Jacob Rosen
 7 | 
 8 | This file is part of AirSimTensorFlow
 9 | 
10 | MIT License
11 | '''
12 | 
13 | from AirSimClient import CarClient, CarControls, ImageRequest, AirSimImageType, AirSimClientBase
14 | from image_helper import IMAGEDIR
15 | import pprint
16 | import os
17 | import time
18 | 
19 | # We maintain a queue of images of this size
20 | QUEUESIZE = 10
21 | 
22 | # Create image directory if it doesn't already exist
23 | try:
24 |     os.stat(IMAGEDIR)
25 | except:
26 |     os.mkdir(IMAGEDIR)
27 |     
28 | # connect to the AirSim simulator 
29 | client = CarClient()
30 | client.confirmConnection()
31 | print('Connected')
32 | client.enableApiControl(True)
33 | car_controls = CarControls()
34 | 
35 | client.reset()
36 | 
37 | # go forward
38 | car_controls.throttle = 1.0
39 | car_controls.steering = 0
40 | client.setCarControls(car_controls)
41 | 
42 | imagequeue = []
43 | 
44 | while True:
45 | 
46 |     # get RGBA camera images from the car
47 |     responses = client.simGetImages([ImageRequest(1, AirSimImageType.Scene)])  
48 | 
49 |     # add image to queue        
50 |     imagequeue.append(responses[0].image_data_uint8)
51 | 
52 |     # dump queue when it gets full
53 |     if len(imagequeue) == QUEUESIZE:
54 |         for i in range(QUEUESIZE):
55 |             AirSimClientBase.write_file(os.path.normpath(IMAGEDIR + '/image%03d.png'  % i ), imagequeue[i])
56 |         imagequeue.pop(0)    
57 | 
58 |     collision_info = client.getCollisionInfo()
59 | 
60 |     if collision_info.has_collided:
61 |         print("Collision at pos %s, normal %s, impact pt %s, penetration %f, name %s, obj id %d" % (
62 |             pprint.pformat(collision_info.position), 
63 |             pprint.pformat(collision_info.normal), 
64 |             pprint.pformat(collision_info.impact_point), 
65 |             collision_info.penetration_depth, collision_info.object_name, collision_info.object_id))
66 |         break
67 | 
68 |     time.sleep(0.1)
69 | 
70 | client.enableApiControl(False)
71 | 


--------------------------------------------------------------------------------
/image_helper.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | image_helper.py : contains loadgray() method to load an RGBA image in PNG format 
 3 | and return a grayscale image
 4 | 
 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
 6 | Simon D. Levy, Will McMurtry, Jacob Rosen
 7 | 
 8 | This file is part of AirSimTensorFlow
 9 | 
10 | MIT License
11 | '''
12 | 
13 | import matplotlib.pyplot as plt
14 | 
15 | # Images are too big to train quickly, so we scale 'em down
16 | SCALEDOWN = 6
17 | 
18 | # Where we'll store images
19 | IMAGEDIR = './carpix'
20 | 
21 | # Create images directory if it doesn't exist
22 | def loadgray(filename):
23 |     '''
24 |     Loads an RGBA image from FILENAME, converts it to grayscale, and returns a flattened copy
25 |     '''
26 |     
27 |     image = plt.imread(filename)
28 | 
29 |     # RGB -> gray formula from https://www.johndcook.com/blog/2009/08/24/algorithms-convert-color-grayscale/
30 |     image = 0.21 * image[:,:,0] + 0.72 * image[:,:,1] + 0.07 * image[:,:,2]
31 |     image = image[0::SCALEDOWN, 0::SCALEDOWN]
32 |     image = image.flatten()
33 | 
34 |     return image
35 | 


--------------------------------------------------------------------------------
/tf_softmax_layer.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | softmax_layer.py : generic SoftMax inference() method for TensorFlow
 3 | 
 4 | Adapted from:
 5 | 
 6 |     https://github.com/darksigma/Fundamentals-of-Deep-Learning-Book/blob/master/fdl_examples/chapter3/logistic_regression_updated.py
 7 | 
 8 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 
 9 | Simon D. Levy, Will McMurtry, Jacob Rosen
10 | 
11 | This file is part of AirSimTensorFlow
12 | 
13 | MIT License
14 | '''
15 | 
16 | import tensorflow as tf
17 | 
18 | def inference(x, xsize, ysize, W_vals=0, b_vals=0):
19 |     '''
20 |     This is a general-purpose softmax inference layer implementation.
21 |     '''
22 |     W_init = tf.constant_initializer(value=W_vals)
23 |     b_init = tf.constant_initializer(value=b_vals)
24 |     W = tf.get_variable('W', [xsize, ysize], initializer=W_init)
25 |     b = tf.get_variable('b', [ysize],        initializer=b_init)
26 |     output = tf.nn.softmax(tf.matmul(x, W) + b)
27 | 
28 |     return output
29 | 


--------------------------------------------------------------------------------