├── .gitignore ├── AirSimClient.py ├── LICENSE.md ├── README.md ├── airsim.png ├── collision_testing.py ├── collision_training.py ├── image_collection.py ├── image_helper.py └── tf_softmax_layer.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | carpix/ 3 | logistic_logs/ 4 | data/ 5 | *.pkl 6 | -------------------------------------------------------------------------------- /AirSimClient.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import msgpackrpc #install as admin: pip install msgpack-rpc-python 3 | import numpy as np #pip install numpy 4 | import msgpack 5 | import math 6 | import time 7 | import sys 8 | import os 9 | import inspect 10 | import types 11 | import re 12 | 13 | 14 | class MsgpackMixin: 15 | def to_msgpack(self, *args, **kwargs): 16 | return self.__dict__ #msgpack.dump(self.to_dict(*args, **kwargs)) 17 | 18 | @classmethod 19 | def from_msgpack(cls, encoded): 20 | obj = cls() 21 | obj.__dict__ = {k.decode('utf-8'): v for k, v in encoded.items()} 22 | return obj 23 | 24 | 25 | class AirSimImageType: 26 | Scene = 0 27 | DepthPlanner = 1 28 | DepthPerspective = 2 29 | DepthVis = 3 30 | DisparityNormalized = 4 31 | Segmentation = 5 32 | SurfaceNormals = 6 33 | 34 | class DrivetrainType: 35 | MaxDegreeOfFreedom = 0 36 | ForwardOnly = 1 37 | 38 | class LandedState: 39 | Landed = 0 40 | Flying = 1 41 | 42 | class Vector3r(MsgpackMixin): 43 | x_val = np.float32(0) 44 | y_val = np.float32(0) 45 | z_val = np.float32(0) 46 | 47 | def __init__(self, x_val = np.float32(0), y_val = np.float32(0), z_val = np.float32(0)): 48 | self.x_val = x_val 49 | self.y_val = y_val 50 | self.z_val = z_val 51 | 52 | 53 | class Quaternionr(MsgpackMixin): 54 | w_val = np.float32(0) 55 | x_val = np.float32(0) 56 | y_val = np.float32(0) 57 | z_val = np.float32(0) 58 | 59 | def __init__(self, x_val = np.float32(0), y_val = np.float32(0), z_val = np.float32(0), w_val = np.float32(1)): 60 | self.x_val = x_val 61 | self.y_val = y_val 62 | self.z_val = z_val 63 | self.w_val = w_val 64 | 65 | class Pose(MsgpackMixin): 66 | position = Vector3r() 67 | orientation = Quaternionr() 68 | 69 | def __init__(self, position_val, orientation_val): 70 | self.position = position_val 71 | self.orientation = orientation_val 72 | 73 | 74 | class CollisionInfo(MsgpackMixin): 75 | has_collided = False 76 | normal = Vector3r() 77 | impact_point = Vector3r() 78 | position = Vector3r() 79 | penetration_depth = np.float32(0) 80 | time_stamp = np.float32(0) 81 | object_name = "" 82 | object_id = -1 83 | 84 | class GeoPoint(MsgpackMixin): 85 | latitude = 0.0 86 | longitude = 0.0 87 | altitude = 0.0 88 | 89 | class YawMode(MsgpackMixin): 90 | is_rate = True 91 | yaw_or_rate = 0.0 92 | def __init__(self, is_rate = True, yaw_or_rate = 0.0): 93 | self.is_rate = is_rate 94 | self.yaw_or_rate = yaw_or_rate 95 | 96 | class ImageRequest(MsgpackMixin): 97 | camera_id = np.uint8(0) 98 | image_type = AirSimImageType.Scene 99 | pixels_as_float = False 100 | compress = False 101 | 102 | def __init__(self, camera_id, image_type, pixels_as_float = False, compress = True): 103 | self.camera_id = camera_id 104 | self.image_type = image_type 105 | self.pixels_as_float = pixels_as_float 106 | self.compress = compress 107 | 108 | 109 | class ImageResponse(MsgpackMixin): 110 | image_data_uint8 = np.uint8(0) 111 | image_data_float = np.float32(0) 112 | camera_position = Vector3r() 113 | camera_orientation = Quaternionr() 114 | time_stamp = np.uint64(0) 115 | message = '' 116 | pixels_as_float = np.float32(0) 117 | compress = True 118 | width = 0 119 | height = 0 120 | image_type = AirSimImageType.Scene 121 | 122 | class CarControls(MsgpackMixin): 123 | throttle = np.float32(0) 124 | steering = np.float32(0) 125 | brake = np.float32(0) 126 | handbrake = False 127 | is_manual_gear = False 128 | manual_gear = 0 129 | gear_immediate = True 130 | 131 | def set_throttle(self, throttle_val, forward): 132 | if (forward): 133 | is_manual_gear = False 134 | manual_gear = 0 135 | throttle = abs(throttle_val) 136 | else: 137 | is_manual_gear = False 138 | manual_gear = -1 139 | throttle = - abs(throttle_val) 140 | 141 | class CarState(MsgpackMixin): 142 | speed = np.float32(0) 143 | gear = 0 144 | position = Vector3r() 145 | velocity = Vector3r() 146 | orientation = Quaternionr() 147 | 148 | class AirSimClientBase: 149 | def __init__(self, ip, port): 150 | self.client = msgpackrpc.Client(msgpackrpc.Address(ip, port), timeout = 3600) 151 | 152 | def ping(self): 153 | return self.client.call('ping') 154 | 155 | def reset(self): 156 | self.client.call('reset') 157 | 158 | def confirmConnection(self): 159 | print('Waiting for connection: ', end='') 160 | home = self.getHomeGeoPoint() 161 | while ((home.latitude == 0 and home.longitude == 0 and home.altitude == 0) or 162 | math.isnan(home.latitude) or math.isnan(home.longitude) or math.isnan(home.altitude)): 163 | time.sleep(1) 164 | home = self.getHomeGeoPoint() 165 | print('X', end='') 166 | print('') 167 | 168 | def getHomeGeoPoint(self): 169 | return GeoPoint.from_msgpack(self.client.call('getHomeGeoPoint')) 170 | 171 | # basic flight control 172 | def enableApiControl(self, is_enabled): 173 | return self.client.call('enableApiControl', is_enabled) 174 | def isApiControlEnabled(self): 175 | return self.client.call('isApiControlEnabled') 176 | 177 | def simSetSegmentationObjectID(self, mesh_name, object_id, is_name_regex = False): 178 | return self.client.call('simSetSegmentationObjectID', mesh_name, object_id, is_name_regex) 179 | def simGetSegmentationObjectID(self, mesh_name): 180 | return self.client.call('simGetSegmentationObjectID', mesh_name) 181 | 182 | # camera control 183 | # simGetImage returns compressed png in array of bytes 184 | # image_type uses one of the AirSimImageType members 185 | def simGetImage(self, camera_id, image_type): 186 | # because this method returns std::vector, msgpack decides to encode it as a string unfortunately. 187 | result = self.client.call('simGetImage', camera_id, image_type) 188 | if (result == "" or result == "\0"): 189 | return None 190 | return result 191 | 192 | # camera control 193 | # simGetImage returns compressed png in array of bytes 194 | # image_type uses one of the AirSimImageType members 195 | def simGetImages(self, requests): 196 | responses_raw = self.client.call('simGetImages', requests) 197 | return [ImageResponse.from_msgpack(response_raw) for response_raw in responses_raw] 198 | 199 | def getCollisionInfo(self): 200 | return CollisionInfo.from_msgpack(self.client.call('getCollisionInfo')) 201 | 202 | @staticmethod 203 | def stringToUint8Array(bstr): 204 | return np.fromstring(bstr, np.uint8) 205 | @staticmethod 206 | def stringToFloatArray(bstr): 207 | return np.fromstring(bstr, np.float32) 208 | @staticmethod 209 | def listTo2DFloatArray(flst, width, height): 210 | return np.reshape(np.asarray(flst, np.float32), (height, width)) 211 | @staticmethod 212 | def getPfmArray(response): 213 | return AirSimClientBase.listTo2DFloatArray(response.image_data_float, response.width, response.height) 214 | 215 | @staticmethod 216 | def get_public_fields(obj): 217 | return [attr for attr in dir(obj) 218 | if not (attr.startswith("_") 219 | or inspect.isbuiltin(attr) 220 | or inspect.isfunction(attr) 221 | or inspect.ismethod(attr))] 222 | 223 | 224 | @staticmethod 225 | def to_dict(obj): 226 | return dict([attr, getattr(obj, attr)] for attr in AirSimClientBase.get_public_fields(obj)) 227 | 228 | @staticmethod 229 | def to_str(obj): 230 | return str(AirSimClientBase.to_dict(obj)) 231 | 232 | @staticmethod 233 | def write_file(filename, bstr): 234 | with open(filename, 'wb') as afile: 235 | afile.write(bstr) 236 | 237 | def simSetPose(self, pose, ignore_collison): 238 | self.client.call('simSetPose', pose, ignore_collison) 239 | 240 | def simGetPose(self): 241 | return self.client.call('simGetPose') 242 | 243 | # helper method for converting getOrientation to roll/pitch/yaw 244 | # https:#en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles 245 | @staticmethod 246 | def toEulerianAngle(q): 247 | z = q.z_val 248 | y = q.y_val 249 | x = q.x_val 250 | w = q.w_val 251 | ysqr = y * y 252 | 253 | # roll (x-axis rotation) 254 | t0 = +2.0 * (w*x + y*z) 255 | t1 = +1.0 - 2.0*(x*x + ysqr) 256 | roll = math.atan2(t0, t1) 257 | 258 | # pitch (y-axis rotation) 259 | t2 = +2.0 * (w*y - z*x) 260 | if (t2 > 1.0): 261 | t2 = 1 262 | if (t2 < -1.0): 263 | t2 = -1.0 264 | pitch = math.asin(t2) 265 | 266 | # yaw (z-axis rotation) 267 | t3 = +2.0 * (w*z + x*y) 268 | t4 = +1.0 - 2.0 * (ysqr + z*z) 269 | yaw = math.atan2(t3, t4) 270 | 271 | return (pitch, roll, yaw) 272 | 273 | @staticmethod 274 | def toQuaternion(pitch, roll, yaw): 275 | t0 = math.cos(yaw * 0.5) 276 | t1 = math.sin(yaw * 0.5) 277 | t2 = math.cos(roll * 0.5) 278 | t3 = math.sin(roll * 0.5) 279 | t4 = math.cos(pitch * 0.5) 280 | t5 = math.sin(pitch * 0.5) 281 | 282 | q = Quaternionr() 283 | q.w_val = t0 * t2 * t4 + t1 * t3 * t5 #w 284 | q.x_val = t0 * t3 * t4 - t1 * t2 * t5 #x 285 | q.y_val = t0 * t2 * t5 + t1 * t3 * t4 #y 286 | q.z_val = t1 * t2 * t4 - t0 * t3 * t5 #z 287 | return q 288 | 289 | @staticmethod 290 | def wait_key(message = ''): 291 | ''' Wait for a key press on the console and return it. ''' 292 | if message != '': 293 | print (message) 294 | 295 | result = None 296 | if os.name == 'nt': 297 | import msvcrt 298 | result = msvcrt.getch() 299 | else: 300 | import termios 301 | fd = sys.stdin.fileno() 302 | 303 | oldterm = termios.tcgetattr(fd) 304 | newattr = termios.tcgetattr(fd) 305 | newattr[3] = newattr[3] & ~termios.ICANON & ~termios.ECHO 306 | termios.tcsetattr(fd, termios.TCSANOW, newattr) 307 | 308 | try: 309 | result = sys.stdin.read(1) 310 | except IOError: 311 | pass 312 | finally: 313 | termios.tcsetattr(fd, termios.TCSAFLUSH, oldterm) 314 | 315 | return result 316 | 317 | @staticmethod 318 | def read_pfm(file): 319 | """ Read a pfm file """ 320 | file = open(file, 'rb') 321 | 322 | color = None 323 | width = None 324 | height = None 325 | scale = None 326 | endian = None 327 | 328 | header = file.readline().rstrip() 329 | header = str(bytes.decode(header, encoding='utf-8')) 330 | if header == 'PF': 331 | color = True 332 | elif header == 'Pf': 333 | color = False 334 | else: 335 | raise Exception('Not a PFM file.') 336 | 337 | temp_str = str(bytes.decode(file.readline(), encoding='utf-8')) 338 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', temp_str) 339 | if dim_match: 340 | width, height = map(int, dim_match.groups()) 341 | else: 342 | raise Exception('Malformed PFM header.') 343 | 344 | scale = float(file.readline().rstrip()) 345 | if scale < 0: # little-endian 346 | endian = '<' 347 | scale = -scale 348 | else: 349 | endian = '>' # big-endian 350 | 351 | data = np.fromfile(file, endian + 'f') 352 | shape = (height, width, 3) if color else (height, width) 353 | 354 | data = np.reshape(data, shape) 355 | # DEY: I don't know why this was there. 356 | #data = np.flipud(data) 357 | file.close() 358 | 359 | return data, scale 360 | 361 | @staticmethod 362 | def write_pfm(file, image, scale=1): 363 | """ Write a pfm file """ 364 | file = open(file, 'wb') 365 | 366 | color = None 367 | 368 | if image.dtype.name != 'float32': 369 | raise Exception('Image dtype must be float32.') 370 | 371 | image = np.flipud(image) 372 | 373 | if len(image.shape) == 3 and image.shape[2] == 3: # color image 374 | color = True 375 | elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale 376 | color = False 377 | else: 378 | raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.') 379 | 380 | file.write('PF\n'.encode('utf-8') if color else 'Pf\n'.encode('utf-8')) 381 | temp_str = '%d %d\n' % (image.shape[1], image.shape[0]) 382 | file.write(temp_str.encode('utf-8')) 383 | 384 | endian = image.dtype.byteorder 385 | 386 | if endian == '<' or endian == '=' and sys.byteorder == 'little': 387 | scale = -scale 388 | 389 | temp_str = '%f\n' % scale 390 | file.write(temp_str.encode('utf-8')) 391 | 392 | image.tofile(file) 393 | 394 | @staticmethod 395 | def write_png(filename, image): 396 | """ image must be numpy array H X W X channels 397 | """ 398 | import zlib, struct 399 | 400 | buf = image.flatten().tobytes() 401 | width = image.shape[1] 402 | height = image.shape[0] 403 | 404 | # reverse the vertical line order and add null bytes at the start 405 | width_byte_4 = width * 4 406 | raw_data = b''.join(b'\x00' + buf[span:span + width_byte_4] 407 | for span in range((height - 1) * width_byte_4, -1, - width_byte_4)) 408 | 409 | def png_pack(png_tag, data): 410 | chunk_head = png_tag + data 411 | return (struct.pack("!I", len(data)) + 412 | chunk_head + 413 | struct.pack("!I", 0xFFFFFFFF & zlib.crc32(chunk_head))) 414 | 415 | png_bytes = b''.join([ 416 | b'\x89PNG\r\n\x1a\n', 417 | png_pack(b'IHDR', struct.pack("!2I5B", width, height, 8, 6, 0, 0, 0)), 418 | png_pack(b'IDAT', zlib.compress(raw_data, 9)), 419 | png_pack(b'IEND', b'')]) 420 | 421 | AirSimClientBase.write_file(filename, png_bytes) 422 | 423 | 424 | # ----------------------------------- Multirotor APIs --------------------------------------------- 425 | class MultirotorClient(AirSimClientBase, object): 426 | def __init__(self, ip = ""): 427 | if (ip == ""): 428 | ip = "127.0.0.1" 429 | super(MultirotorClient, self).__init__(ip, 41451) 430 | 431 | def armDisarm(self, arm): 432 | return self.client.call('armDisarm', arm) 433 | 434 | def takeoff(self, max_wait_seconds = 15): 435 | return self.client.call('takeoff', max_wait_seconds) 436 | 437 | def land(self, max_wait_seconds = 60): 438 | return self.client.call('land', max_wait_seconds) 439 | 440 | def goHome(self): 441 | return self.client.call('goHome') 442 | 443 | def hover(self): 444 | return self.client.call('hover') 445 | 446 | 447 | # query vehicle state 448 | def getPosition(self): 449 | return Vector3r.from_msgpack(self.client.call('getPosition')) 450 | def getVelocity(self): 451 | return Vector3r.from_msgpack(self.client.call('getVelocity')) 452 | def getOrientation(self): 453 | return Quaternionr.from_msgpack(self.client.call('getOrientation')) 454 | def getLandedState(self): 455 | return self.client.call('getLandedState') 456 | def getGpsLocation(self): 457 | return GeoPoint.from_msgpack(self.client.call('getGpsLocation')) 458 | def getPitchRollYaw(self): 459 | return self.toEulerianAngle(self.getOrientation()) 460 | 461 | #def getRCData(self): 462 | # return self.client.call('getRCData') 463 | def timestampNow(self): 464 | return self.client.call('timestampNow') 465 | def isApiControlEnabled(self): 466 | return self.client.call('isApiControlEnabled') 467 | def isSimulationMode(self): 468 | return self.client.call('isSimulationMode') 469 | def getServerDebugInfo(self): 470 | return self.client.call('getServerDebugInfo') 471 | 472 | 473 | # APIs for control 474 | def moveByAngle(self, pitch, roll, z, yaw, duration): 475 | return self.client.call('moveByAngle', pitch, roll, z, yaw, duration) 476 | 477 | def moveByVelocity(self, vx, vy, vz, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()): 478 | return self.client.call('moveByVelocity', vx, vy, vz, duration, drivetrain, yaw_mode) 479 | 480 | def moveByVelocityZ(self, vx, vy, z, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()): 481 | return self.client.call('moveByVelocityZ', vx, vy, z, duration, drivetrain, yaw_mode) 482 | 483 | def moveOnPath(self, path, velocity, max_wait_seconds = 60, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1): 484 | return self.client.call('moveOnPath', path, velocity, max_wait_seconds, drivetrain, yaw_mode, lookahead, adaptive_lookahead) 485 | 486 | def moveToZ(self, z, velocity, max_wait_seconds = 60, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1): 487 | return self.client.call('moveToZ', z, velocity, max_wait_seconds, yaw_mode, lookahead, adaptive_lookahead) 488 | 489 | def moveToPosition(self, x, y, z, velocity, max_wait_seconds = 60, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode(), lookahead = -1, adaptive_lookahead = 1): 490 | return self.client.call('moveToPosition', x, y, z, velocity, max_wait_seconds, drivetrain, yaw_mode, lookahead, adaptive_lookahead) 491 | 492 | def moveByManual(self, vx_max, vy_max, z_min, duration, drivetrain = DrivetrainType.MaxDegreeOfFreedom, yaw_mode = YawMode()): 493 | return self.client.call('moveByManual', vx_max, vy_max, z_min, duration, drivetrain, yaw_mode) 494 | 495 | def rotateToYaw(self, yaw, max_wait_seconds = 60, margin = 5): 496 | return self.client.call('rotateToYaw', yaw, max_wait_seconds, margin) 497 | 498 | def rotateByYawRate(self, yaw_rate, duration): 499 | return self.client.call('rotateByYawRate', yaw_rate, duration) 500 | 501 | # ----------------------------------- Car APIs --------------------------------------------- 502 | class CarClient(AirSimClientBase, object): 503 | def __init__(self, ip = ""): 504 | if (ip == ""): 505 | ip = "127.0.0.1" 506 | super(CarClient, self).__init__(ip, 42451) 507 | 508 | def setCarControls(self, controls): 509 | self.client.call('setCarControls', controls) 510 | 511 | def getCarState(self): 512 | state_raw = self.client.call('getCarState') 513 | return CarState.from_msgpack(state_raw) 514 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | AirSimTensorFlow: A simple example of using Microsoft AirSim to train a TensorFlow neural net on collision avoidance 2 | 3 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 4 | Simon D. Levy, Will McMurtry, Jacob Rosen 5 | 6 | All rights reserved. 7 | 8 | MIT License 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | This repository contains Python scripts showing how you can use [Microsoft AirSim](https://github.com/Microsoft/AirSim) to collect image data 4 | from a moving vehicle, then use that data to train and test a deep-learning neural net in TensorFlow. 5 | 6 | # Prerequisites 7 | 8 | * [Recommended hardware](https://wiki.unrealengine.com/Recommended_Hardware) for running UnrealEngine4, required 9 | for AirSim. Although it is possible build AirSim on OS X and Linux, we found 10 | it easiest to use the pre-compiled Windows binaries in the 11 | [Neighborhood](https://github.com/Microsoft/AirSim/releases/download/v1.1.7/Neighbourhood.zip) 12 | example. 13 | 14 | * [Python3](https://www.python.org/ftp/python/3.6.3/python-3.6.3-amd64.exe) for 64-bit Windows 15 | 16 | * [TensorFlow](https://www.tensorflow.org/install/install_windows). To run TensorFlow on your GPU as we and 17 | most people do, you'll need to follow the 18 | [directions](https://www.tensorflow.org/install/install_windows) for installing CUDA and CuDNN. We recommend setting aside at least an hour to make sure you do this right. 19 | 20 | # Instructions 21 | 22 | 1. Clone this repository. 23 | 2. Download and unzip the [Neighborhood](https://github.com/Microsoft/AirSim/releases/download/v1.1.7/Neighbourhood.zip) 24 | example, open it, and click run.bat to launch AirSim. 25 | 3. When prompted, go with the default car simulation. If you press the 3 key on your keyboard, 26 | you will see the little image on which the neural net will be trained. 27 | 4. From the repository, run the image_collection.py script. It will start the car moving and stop when the 28 | car collides with the fence, creating a carpix folder containing the images on which you will train 29 | the network in the next step. 30 | 5. From the repository, run the collision_training.py script. Running on an HP Z440 workstation with 31 | NVIDIA GeForce GTX 1080 Ti GPU, we were able to complete the 500 training iterations in a few seconds. 32 | 6. From the repository, run the collision_testing.py script. This should drive the car forward as before, but 33 | but the car should stop right before it hits the fence, based on the collision predicted by the neural net. 34 | 35 | # How it works 36 | 37 | The image_collection script maintains a queue of the ten most recent images and saves them to numbered 38 | files in the carpix folder. The collision_training script converts these color images to 39 | grayscale, then builds a training set in which all images but the final one are labeled as safe (no 40 | collision; code [1 0]), and the final one is labeled as a collision (code [0 1]). 41 | Finally, this training script uses Python's built-in pickle library to 42 | [save](https://github.com/simondlevy/AirSimTensorFlow/blob/master/collision_training.py#L111-L113) 43 | the trained network parameters (weights and biases). The collision_testing script uses pickle to 44 | [restore](https://github.com/simondlevy/AirSimTensorFlow/blob/master/collision_testing.py#L42-L45) 45 | these parameters, then reconstructs the TensorFlow [neural net](https://github.com/simondlevy/AirSimTensorFlow/blob/master/tf_softmax_layer.py#L18-L28) from them. (We found this approach easier than 46 | using TensorFlow's [save-and-restore](https://www.tensorflow.org/programmers_guide/saved_model) API.) 47 | Finally, the collision_testing script moves the vehicle forward, converting the live 48 | image into grayscale and running it through the network to make a collision/no-collision prediction. 49 | When the value of the “collision bit” exceeds 0.5, the script stops the vehicle by applying the brakes. 50 | 51 | # Future work 52 | 53 | Our single-layer logistic regression network provides a simple proof-of-concept 54 | example; however, for a more realistic data set involving collisions with 55 | different types of objects, a convolutional network would make more sense. 56 | AirSim also provides access to depth images (just press the 1 key during 57 | the simulation) which, like the Lidar on today's self-driving cars, would 58 | provide a valuable additional source of information for avoiding collisions. 59 | 60 | # Credits 61 | 62 | This code represents the combined work of two teams in Prof. Simon D. Levy's fall 2017 AI course 63 | ([CSCI 315](http://home.wlu.edu/~levys/courses/csci315f2017/)) at 64 | Washington and Lee University (listed alphabetically): 65 | 66 | * Jack Baird 67 | * Alex Cantrell 68 | * Keith Denning 69 | * Rajwol Joshi 70 | * Will McMurtry 71 | * Jacob Rosen 72 | 73 | # Acknowledgement 74 | 75 | We thank David Pfaff of the [W&L IQ Center](https://www.wlu.edu/iq-center) for 76 | providing the hardware on which we developed this project. 77 | -------------------------------------------------------------------------------- /airsim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simondlevy/AirSimTensorFlow/998fec23f3d717d8aa5d407bfc41c4ad05e2c208/airsim.png -------------------------------------------------------------------------------- /collision_testing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | collision_testing.py : tests pickled network on ability to predict a collision 4 | 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 6 | Simon D. Levy, Will McMurtry, Jacob Rosen 7 | 8 | This file is part of AirSimTensorFlow 9 | 10 | MIT License 11 | ''' 12 | 13 | from AirSimClient import CarClient, CarControls, ImageRequest, AirSimImageType, AirSimClientBase 14 | import os 15 | import time 16 | import tensorflow as tf 17 | import pickle 18 | import sys 19 | 20 | from image_helper import loadgray, IMAGEDIR 21 | from tf_softmax_layer import inference 22 | 23 | TMPFILE = IMAGEDIR + '/active.png' 24 | PARAMFILE = 'params.pkl' 25 | IMGSIZE = 1032 26 | INITIAL_THROTTLE= 0.65 27 | BRAKING_DURATION = 15 28 | 29 | # connect to the AirSim simulator 30 | client = CarClient() 31 | client.confirmConnection() 32 | print('Connected') 33 | client.enableApiControl(True) 34 | car_controls = CarControls() 35 | 36 | client.reset() 37 | 38 | # go forward 39 | car_controls.throttle = INITIAL_THROTTLE 40 | car_controls.steering = 0 41 | client.setCarControls(car_controls) 42 | 43 | # Load saved training params as ordinary NumPy 44 | W,b = pickle.load(open('params.pkl', 'rb')) 45 | 46 | with tf.Graph().as_default(): 47 | 48 | # Placeholder for an image 49 | x = tf.placeholder('float', [None, IMGSIZE]) 50 | 51 | # Our inference engine, intialized with weights we just loaded 52 | output = inference(x, IMGSIZE, 2, W, b) 53 | 54 | # TensorFlow initialization boilerplate 55 | sess = tf.Session() 56 | init_op = tf.global_variables_initializer() 57 | sess.run(init_op) 58 | 59 | # Once the brakes come on, we need to keep them on for a while before exiting; otherwise, 60 | # the vehicle will resume moving. 61 | brakingCount = 0 62 | 63 | # Loop until we detect a collision 64 | while True: 65 | 66 | # Get RGBA camera images from the car 67 | responses = client.simGetImages([ImageRequest(1, AirSimImageType.Scene)]) 68 | 69 | # Save it to a temporary file 70 | image = responses[0].image_data_uint8 71 | AirSimClientBase.write_file(os.path.normpath(TMPFILE), image) 72 | 73 | # Read-load the image as a grayscale array 74 | image = loadgray(TMPFILE) 75 | 76 | # Run the image through our inference engine. 77 | # Engine returns a softmax output inside a list, so we grab the first 78 | # element of the list (the actual softmax vector), whose second element 79 | # is the absence of an obstacle. 80 | safety = sess.run(output, feed_dict={x:[image]})[0][1] 81 | 82 | # Slam on the brakes if it ain't safe! 83 | if safety < 0.5: 84 | 85 | if brakingCount > BRAKING_DURATION: 86 | print('BRAKING TO AVOID COLLISSION') 87 | sys.stdout.flush() 88 | break 89 | 90 | car_controls.brake = 1.0 91 | client.setCarControls(car_controls) 92 | 93 | brakingCount += 1 94 | 95 | # Wait a bit on each iteration 96 | time.sleep(0.1) 97 | 98 | client.enableApiControl(False) 99 | -------------------------------------------------------------------------------- /collision_training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | collision_training.py : uses stored images to training a neural net to detect collisions 4 | 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 6 | Simon D. Levy, Will McMurtry, Jacob Rosen 7 | 8 | This file is part of AirSimTensorFlow 9 | 10 | MIT License 11 | ''' 12 | 13 | # Built-in modules 14 | import tensorflow as tf 15 | import numpy as np 16 | import pickle 17 | 18 | # Modules for this project 19 | from image_helper import loadgray 20 | from tf_softmax_layer import inference 21 | 22 | # Final image is crash; previous are no-crash 23 | SAFESIZE = 5 24 | 25 | # Where we've stored images 26 | IMAGEDIR = './carpix' 27 | 28 | # Where we'll store weights and biases 29 | PARAMFILE = 'params.pkl' 30 | 31 | # Parameters 32 | learning_rate = 0.01 33 | training_epochs = 500 34 | batch_size = 100 35 | display_step = 10 36 | 37 | def loss(output, y): 38 | dot_product = y * tf.log(output) 39 | 40 | # Reduction along axis 0 collapses each column into a single 41 | # value, whereas reduction along axis 1 collapses each row 42 | # into a single value. In general, reduction along axis i 43 | # collapses the ith dimension of a tensor to size 1. 44 | xentropy = -tf.reduce_sum(dot_product, axis=1) 45 | 46 | loss = tf.reduce_mean(xentropy) 47 | 48 | return loss 49 | 50 | def training(cost, global_step): 51 | 52 | tf.summary.scalar('cost', cost) 53 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 54 | train_op = optimizer.minimize(cost, global_step=global_step) 55 | 56 | return train_op 57 | 58 | def main(): 59 | 60 | # This will get number of pixels in each image (they must all be the same!) 61 | imgsize = 0 62 | 63 | # Read in images from car, convert to grayscale, scale down, and flatten for use as input 64 | images = [] 65 | for k in range(SAFESIZE): 66 | 67 | image = loadgray(IMAGEDIR + '/image%03d.png' % k) 68 | 69 | imgsize = np.prod(image.shape) 70 | images.append(image) 71 | 72 | # All but last image is safe (01 = no-crash; 10 = crash) 73 | targets = [] 74 | for k in range(SAFESIZE-1): 75 | targets.append([0,1]) 76 | targets.append([1,0]) 77 | 78 | with tf.Graph().as_default(): 79 | 80 | x = tf.placeholder('float', [None, imgsize]) # car FPV images 81 | y = tf.placeholder('float', [None, 2]) # 01 = no-crash; 10 = crash 82 | 83 | output = inference(x, imgsize, 2) 84 | 85 | cost = loss(output, y) 86 | 87 | global_step = tf.Variable(0, name='global_step', trainable=False) 88 | 89 | train_op = training(cost, global_step) 90 | 91 | sess = tf.Session() 92 | 93 | init_op = tf.global_variables_initializer() 94 | 95 | sess.run(init_op) 96 | 97 | # Training cycle 98 | for epoch in range(training_epochs): 99 | 100 | # Fit training using batch data 101 | sess.run(train_op, feed_dict={x: images, y: targets}) 102 | 103 | # Compute average loss 104 | avg_cost = sess.run(cost, feed_dict={x: images, y: targets}) 105 | 106 | # Display logs per epoch step 107 | if epoch%display_step == 0: 108 | print('Epoch:', '%04d' % epoch, 'cost =', '{:.9f}'.format(avg_cost)) 109 | 110 | print('Optimization Finished; saving weights to ' + PARAMFILE) 111 | params = [sess.run(param) for param in tf.trainable_variables()] 112 | 113 | pickle.dump( params, open(PARAMFILE, 'wb')) 114 | 115 | if __name__ == '__main__': 116 | 117 | main() 118 | -------------------------------------------------------------------------------- /image_collection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | image_collection.py : uses AirSim to collect vehicle first-person-view images 4 | 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 6 | Simon D. Levy, Will McMurtry, Jacob Rosen 7 | 8 | This file is part of AirSimTensorFlow 9 | 10 | MIT License 11 | ''' 12 | 13 | from AirSimClient import CarClient, CarControls, ImageRequest, AirSimImageType, AirSimClientBase 14 | from image_helper import IMAGEDIR 15 | import pprint 16 | import os 17 | import time 18 | 19 | # We maintain a queue of images of this size 20 | QUEUESIZE = 10 21 | 22 | # Create image directory if it doesn't already exist 23 | try: 24 | os.stat(IMAGEDIR) 25 | except: 26 | os.mkdir(IMAGEDIR) 27 | 28 | # connect to the AirSim simulator 29 | client = CarClient() 30 | client.confirmConnection() 31 | print('Connected') 32 | client.enableApiControl(True) 33 | car_controls = CarControls() 34 | 35 | client.reset() 36 | 37 | # go forward 38 | car_controls.throttle = 1.0 39 | car_controls.steering = 0 40 | client.setCarControls(car_controls) 41 | 42 | imagequeue = [] 43 | 44 | while True: 45 | 46 | # get RGBA camera images from the car 47 | responses = client.simGetImages([ImageRequest(1, AirSimImageType.Scene)]) 48 | 49 | # add image to queue 50 | imagequeue.append(responses[0].image_data_uint8) 51 | 52 | # dump queue when it gets full 53 | if len(imagequeue) == QUEUESIZE: 54 | for i in range(QUEUESIZE): 55 | AirSimClientBase.write_file(os.path.normpath(IMAGEDIR + '/image%03d.png' % i ), imagequeue[i]) 56 | imagequeue.pop(0) 57 | 58 | collision_info = client.getCollisionInfo() 59 | 60 | if collision_info.has_collided: 61 | print("Collision at pos %s, normal %s, impact pt %s, penetration %f, name %s, obj id %d" % ( 62 | pprint.pformat(collision_info.position), 63 | pprint.pformat(collision_info.normal), 64 | pprint.pformat(collision_info.impact_point), 65 | collision_info.penetration_depth, collision_info.object_name, collision_info.object_id)) 66 | break 67 | 68 | time.sleep(0.1) 69 | 70 | client.enableApiControl(False) 71 | -------------------------------------------------------------------------------- /image_helper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | image_helper.py : contains loadgray() method to load an RGBA image in PNG format 3 | and return a grayscale image 4 | 5 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 6 | Simon D. Levy, Will McMurtry, Jacob Rosen 7 | 8 | This file is part of AirSimTensorFlow 9 | 10 | MIT License 11 | ''' 12 | 13 | import matplotlib.pyplot as plt 14 | 15 | # Images are too big to train quickly, so we scale 'em down 16 | SCALEDOWN = 6 17 | 18 | # Where we'll store images 19 | IMAGEDIR = './carpix' 20 | 21 | # Create images directory if it doesn't exist 22 | def loadgray(filename): 23 | ''' 24 | Loads an RGBA image from FILENAME, converts it to grayscale, and returns a flattened copy 25 | ''' 26 | 27 | image = plt.imread(filename) 28 | 29 | # RGB -> gray formula from https://www.johndcook.com/blog/2009/08/24/algorithms-convert-color-grayscale/ 30 | image = 0.21 * image[:,:,0] + 0.72 * image[:,:,1] + 0.07 * image[:,:,2] 31 | image = image[0::SCALEDOWN, 0::SCALEDOWN] 32 | image = image.flatten() 33 | 34 | return image 35 | -------------------------------------------------------------------------------- /tf_softmax_layer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | softmax_layer.py : generic SoftMax inference() method for TensorFlow 3 | 4 | Adapted from: 5 | 6 | https://github.com/darksigma/Fundamentals-of-Deep-Learning-Book/blob/master/fdl_examples/chapter3/logistic_regression_updated.py 7 | 8 | Copyright (C) 2017 Jack Baird, Alex Cantrell, Keith Denning, Rajwol Joshi, 9 | Simon D. Levy, Will McMurtry, Jacob Rosen 10 | 11 | This file is part of AirSimTensorFlow 12 | 13 | MIT License 14 | ''' 15 | 16 | import tensorflow as tf 17 | 18 | def inference(x, xsize, ysize, W_vals=0, b_vals=0): 19 | ''' 20 | This is a general-purpose softmax inference layer implementation. 21 | ''' 22 | W_init = tf.constant_initializer(value=W_vals) 23 | b_init = tf.constant_initializer(value=b_vals) 24 | W = tf.get_variable('W', [xsize, ysize], initializer=W_init) 25 | b = tf.get_variable('b', [ysize], initializer=b_init) 26 | output = tf.nn.softmax(tf.matmul(x, W) + b) 27 | 28 | return output 29 | --------------------------------------------------------------------------------